1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Wrappers for generic Unix system calls                       ---*/
5 /*---                                            syswrap-generic.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2015 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
34 
35 #include "pub_core_basics.h"
36 #include "pub_core_vki.h"
37 #include "pub_core_vkiscnums.h"
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 #include "pub_core_stacks.h"
64 
65 #include "priv_types_n_macros.h"
66 #include "priv_syswrap-generic.h"
67 
68 #include "config.h"
69 
70 
ML_(guess_and_register_stack)71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
72 {
73    Bool debug = False;
74    NSegment const* seg;
75 
76    /* We don't really know where the client stack is, because its
77       allocated by the client.  The best we can do is look at the
78       memory mappings and try to derive some useful information.  We
79       assume that sp starts near its highest possible value, and can
80       only go down to the start of the mmaped segment. */
81    seg = VG_(am_find_nsegment)(sp);
82    if (seg &&
83        VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
86 
87       VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
88 
89       if (debug)
90 	 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]\n",
91 		     tst->tid, seg->start, tst->client_stack_highest_byte);
92    } else {
93       VG_(message)(Vg_UserMsg,
94                    "!? New thread %u starts with SP(%#lx) unmapped\n",
95 		   tst->tid, sp);
96       tst->client_stack_highest_byte = 0;
97       tst->client_stack_szB  = 0;
98    }
99 }
100 
101 /* Returns True iff address range is something the client can
102    plausibly mess with: all of it is either already belongs to the
103    client or is free or a reservation. */
104 
ML_(valid_client_addr)105 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
106                                    const HChar *syscallname)
107 {
108    Bool ret;
109 
110    if (size == 0)
111       return True;
112 
113    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
114             (start,size,VKI_PROT_NONE);
115 
116    if (0)
117       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
118 		  syscallname, start, start+size-1, (Int)ret);
119 
120    if (!ret && syscallname != NULL) {
121       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
122                                "to modify addresses %#lx-%#lx\n",
123                                syscallname, start, start+size-1);
124       if (VG_(clo_verbosity) > 1) {
125          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
126       }
127    }
128 
129    return ret;
130 }
131 
132 
ML_(client_signal_OK)133 Bool ML_(client_signal_OK)(Int sigNo)
134 {
135    /* signal 0 is OK for kill */
136    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
137 
138    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
139 
140    return ret;
141 }
142 
143 
144 /* Handy small function to help stop wrappers from segfaulting when
145    presented with bogus client addresses.  Is not used for generating
146    user-visible errors. */
147 
ML_(safe_to_deref)148 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
149 {
150    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
151 }
152 
153 
154 /* ---------------------------------------------------------------------
155    Doing mmap, mremap
156    ------------------------------------------------------------------ */
157 
158 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
159    munmap, mprotect (and mremap??) work at the page level.  So addresses
160    and lengths must be adjusted for this. */
161 
162 /* Mash around start and length so that the area exactly covers
163    an integral number of pages.  If we don't do that, memcheck's
164    idea of addressible memory diverges from that of the
165    kernel's, which causes the leak detector to crash. */
166 static
page_align_addr_and_len(Addr * a,SizeT * len)167 void page_align_addr_and_len( Addr* a, SizeT* len)
168 {
169    Addr ra;
170 
171    ra = VG_PGROUNDDN(*a);
172    *len = VG_PGROUNDUP(*a + *len) - ra;
173    *a = ra;
174 }
175 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)176 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
177                                 UInt flags, Int fd, Off64T offset)
178 {
179    Bool d;
180 
181    /* 'a' is the return value from a real kernel mmap, hence: */
182    vg_assert(VG_IS_PAGE_ALIGNED(a));
183    /* whereas len is whatever the syscall supplied.  So: */
184    len = VG_PGROUNDUP(len);
185 
186    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
187 
188    if (d)
189       VG_(discard_translations)( a, (ULong)len,
190                                  "notify_core_of_mmap" );
191 }
192 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)193 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
194 {
195    Bool rr, ww, xx;
196 
197    /* 'a' is the return value from a real kernel mmap, hence: */
198    vg_assert(VG_IS_PAGE_ALIGNED(a));
199    /* whereas len is whatever the syscall supplied.  So: */
200    len = VG_PGROUNDUP(len);
201 
202    rr = toBool(prot & VKI_PROT_READ);
203    ww = toBool(prot & VKI_PROT_WRITE);
204    xx = toBool(prot & VKI_PROT_EXEC);
205 
206    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
207 }
208 
209 
210 /* When a client mmap has been successfully done, this function must
211    be called.  It notifies both aspacem and the tool of the new
212    mapping.
213 
214    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
215    it is called from is POST(sys_io_setup).  In particular,
216    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
217    client mmap.  But it doesn't call this function; instead it does the
218    relevant notifications itself.  Here, we just pass di_handle=0 to
219    notify_tool_of_mmap as we have no better information.  But really this
220    function should be done away with; problem is I don't understand what
221    POST(sys_io_setup) does or how it works.
222 
223    [However, this function is used lots for Darwin, because
224     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
225  */
226 void
ML_(notify_core_and_tool_of_mmap)227 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
228                                     UInt flags, Int fd, Off64T offset )
229 {
230    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
231    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
232    // Should it?  --njn
233    notify_core_of_mmap(a, len, prot, flags, fd, offset);
234    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
235 }
236 
237 void
ML_(notify_core_and_tool_of_munmap)238 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
239 {
240    Bool d;
241 
242    page_align_addr_and_len(&a, &len);
243    d = VG_(am_notify_munmap)(a, len);
244    VG_TRACK( die_mem_munmap, a, len );
245    VG_(di_notify_munmap)( a, len );
246    if (d)
247       VG_(discard_translations)( a, (ULong)len,
248                                  "ML_(notify_core_and_tool_of_munmap)" );
249 }
250 
251 void
ML_(notify_core_and_tool_of_mprotect)252 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
253 {
254    Bool rr = toBool(prot & VKI_PROT_READ);
255    Bool ww = toBool(prot & VKI_PROT_WRITE);
256    Bool xx = toBool(prot & VKI_PROT_EXEC);
257    Bool d;
258 
259    page_align_addr_and_len(&a, &len);
260    d = VG_(am_notify_mprotect)(a, len, prot);
261    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
262    VG_(di_notify_mprotect)( a, len, prot );
263    if (d)
264       VG_(discard_translations)( a, (ULong)len,
265                                  "ML_(notify_core_and_tool_of_mprotect)" );
266 }
267 
268 
269 
270 #if HAVE_MREMAP
271 /* Expand (or shrink) an existing mapping, potentially moving it at
272    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
273 */
274 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)275 SysRes do_mremap( Addr old_addr, SizeT old_len,
276                   Addr new_addr, SizeT new_len,
277                   UWord flags, ThreadId tid )
278 {
279 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
280 
281    Bool      ok, d;
282    NSegment const* old_seg;
283    Addr      advised;
284    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
285    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
286 
287    if (0)
288       VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
289                   old_addr,old_len,new_addr,new_len,
290                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
291                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
292    if (0)
293       VG_(am_show_nsegments)(0, "do_remap: before");
294 
295    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
296       goto eINVAL;
297 
298    if (!VG_IS_PAGE_ALIGNED(old_addr))
299       goto eINVAL;
300 
301    old_len = VG_PGROUNDUP(old_len);
302    new_len = VG_PGROUNDUP(new_len);
303 
304    if (new_len == 0)
305       goto eINVAL;
306 
307    /* kernel doesn't reject this, but we do. */
308    if (old_len == 0)
309       goto eINVAL;
310 
311    /* reject wraparounds */
312    if (old_addr + old_len < old_addr)
313       goto eINVAL;
314    if (f_fixed == True && new_addr + new_len < new_len)
315       goto eINVAL;
316 
317    /* kernel rejects all fixed, no-move requests (which are
318       meaningless). */
319    if (f_fixed == True && f_maymove == False)
320       goto eINVAL;
321 
322    /* Stay away from non-client areas. */
323    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
324       goto eINVAL;
325 
326    /* In all remaining cases, if the old range does not fall within a
327       single segment, fail. */
328    old_seg = VG_(am_find_nsegment)( old_addr );
329    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
330       goto eINVAL;
331    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC &&
332        old_seg->kind != SkShmC)
333       goto eINVAL;
334 
335    vg_assert(old_len > 0);
336    vg_assert(new_len > 0);
337    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
338    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
339    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
340 
341    /* There are 3 remaining cases:
342 
343       * maymove == False
344 
345         new space has to be at old address, so:
346             - shrink    -> unmap end
347             - same size -> do nothing
348             - grow      -> if can grow in-place, do so, else fail
349 
350       * maymove == True, fixed == False
351 
352         new space can be anywhere, so:
353             - shrink    -> unmap end
354             - same size -> do nothing
355             - grow      -> if can grow in-place, do so, else
356                            move to anywhere large enough, else fail
357 
358       * maymove == True, fixed == True
359 
360         new space must be at new address, so:
361 
362             - if new address is not page aligned, fail
363             - if new address range overlaps old one, fail
364             - if new address range cannot be allocated, fail
365             - else move to new address range with new size
366             - else fail
367    */
368 
369    if (f_maymove == False) {
370       /* new space has to be at old address */
371       if (new_len < old_len)
372          goto shrink_in_place;
373       if (new_len > old_len)
374          goto grow_in_place_or_fail;
375       goto same_in_place;
376    }
377 
378    if (f_maymove == True && f_fixed == False) {
379       /* new space can be anywhere */
380       if (new_len < old_len)
381          goto shrink_in_place;
382       if (new_len > old_len)
383          goto grow_in_place_or_move_anywhere_or_fail;
384       goto same_in_place;
385    }
386 
387    if (f_maymove == True && f_fixed == True) {
388       /* new space can only be at the new address */
389       if (!VG_IS_PAGE_ALIGNED(new_addr))
390          goto eINVAL;
391       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
392          /* no overlap */
393       } else {
394          goto eINVAL;
395       }
396       if (new_addr == 0)
397          goto eINVAL;
398          /* VG_(am_get_advisory_client_simple) interprets zero to mean
399             non-fixed, which is not what we want */
400       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
401       if (!ok || advised != new_addr)
402          goto eNOMEM;
403       ok = VG_(am_relocate_nooverlap_client)
404               ( &d, old_addr, old_len, new_addr, new_len );
405       if (ok) {
406          VG_TRACK( copy_mem_remap, old_addr, new_addr,
407                                    MIN_SIZET(old_len,new_len) );
408          if (new_len > old_len)
409             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
410                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
411                       0/*di_handle*/ );
412          VG_TRACK(die_mem_munmap, old_addr, old_len);
413          if (d) {
414             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
415             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
416          }
417          return VG_(mk_SysRes_Success)( new_addr );
418       }
419       goto eNOMEM;
420    }
421 
422    /* end of the 3 cases */
423    /*NOTREACHED*/ vg_assert(0);
424 
425   grow_in_place_or_move_anywhere_or_fail:
426    {
427    /* try growing it in-place */
428    Addr   needA = old_addr + old_len;
429    SSizeT needL = new_len - old_len;
430 
431    vg_assert(needL > 0);
432    vg_assert(needA > 0);
433 
434    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
435    if (ok) {
436       /* Fixes bug #129866. */
437       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
438    }
439    if (ok && advised == needA) {
440       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
441       if (new_seg) {
442          VG_TRACK( new_mem_mmap, needA, needL,
443                                  new_seg->hasR,
444                                  new_seg->hasW, new_seg->hasX,
445                                  0/*di_handle*/ );
446          return VG_(mk_SysRes_Success)( old_addr );
447       }
448    }
449 
450    /* that failed.  Look elsewhere. */
451    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
452    if (ok) {
453       Bool oldR = old_seg->hasR;
454       Bool oldW = old_seg->hasW;
455       Bool oldX = old_seg->hasX;
456       /* assert new area does not overlap old */
457       vg_assert(advised+new_len-1 < old_addr
458                 || advised > old_addr+old_len-1);
459       ok = VG_(am_relocate_nooverlap_client)
460               ( &d, old_addr, old_len, advised, new_len );
461       if (ok) {
462          VG_TRACK( copy_mem_remap, old_addr, advised,
463                                    MIN_SIZET(old_len,new_len) );
464          if (new_len > old_len)
465             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
466                       oldR, oldW, oldX, 0/*di_handle*/ );
467          VG_TRACK(die_mem_munmap, old_addr, old_len);
468          if (d) {
469             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
470             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
471          }
472          return VG_(mk_SysRes_Success)( advised );
473       }
474    }
475    goto eNOMEM;
476    }
477    /*NOTREACHED*/ vg_assert(0);
478 
479   grow_in_place_or_fail:
480    {
481    Addr  needA = old_addr + old_len;
482    SizeT needL = new_len - old_len;
483 
484    vg_assert(needA > 0);
485 
486    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
487    if (ok) {
488       /* Fixes bug #129866. */
489       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
490    }
491    if (!ok || advised != needA)
492       goto eNOMEM;
493    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
494    if (!new_seg)
495       goto eNOMEM;
496    VG_TRACK( new_mem_mmap, needA, needL,
497                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
498                            0/*di_handle*/ );
499 
500    return VG_(mk_SysRes_Success)( old_addr );
501    }
502    /*NOTREACHED*/ vg_assert(0);
503 
504   shrink_in_place:
505    {
506    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
507    if (sr_isError(sres))
508       return sres;
509    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
510    if (d)
511       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
512                                  "do_remap(7)" );
513    return VG_(mk_SysRes_Success)( old_addr );
514    }
515    /*NOTREACHED*/ vg_assert(0);
516 
517   same_in_place:
518    return VG_(mk_SysRes_Success)( old_addr );
519    /*NOTREACHED*/ vg_assert(0);
520 
521   eINVAL:
522    return VG_(mk_SysRes_Error)( VKI_EINVAL );
523   eNOMEM:
524    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
525 
526 #  undef MIN_SIZET
527 }
528 #endif /* HAVE_MREMAP */
529 
530 
531 /* ---------------------------------------------------------------------
532    File-descriptor tracking
533    ------------------------------------------------------------------ */
534 
535 /* One of these is allocated for each open file descriptor.  */
536 typedef struct OpenFd
537 {
538    Int fd;                        /* The file descriptor */
539    HChar *pathname;               /* NULL if not a regular file or unknown */
540    ExeContext *where;             /* NULL if inherited from parent */
541    struct OpenFd *next, *prev;
542 } OpenFd;
543 
544 /* List of allocated file descriptors. */
545 static OpenFd *allocated_fds = NULL;
546 
547 /* Count of open file descriptors. */
548 static Int fd_count = 0;
549 
550 
551 /* Note the fact that a file descriptor was just closed. */
ML_(record_fd_close)552 void ML_(record_fd_close)(Int fd)
553 {
554    OpenFd *i = allocated_fds;
555 
556    if (fd >= VG_(fd_hard_limit))
557       return;			/* Valgrind internal */
558 
559    while(i) {
560       if(i->fd == fd) {
561          if(i->prev)
562             i->prev->next = i->next;
563          else
564             allocated_fds = i->next;
565          if(i->next)
566             i->next->prev = i->prev;
567          if(i->pathname)
568             VG_(free) (i->pathname);
569          VG_(free) (i);
570          fd_count--;
571          break;
572       }
573       i = i->next;
574    }
575 }
576 
577 /* Note the fact that a file descriptor was just opened.  If the
578    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
579    this either indicates a non-standard file (i.e. a pipe or socket or
580    some such thing) or that we don't know the filename.  If the fd is
581    already open, then we're probably doing a dup2() to an existing fd,
582    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)583 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
584                                          const HChar *pathname)
585 {
586    OpenFd *i;
587 
588    if (fd >= VG_(fd_hard_limit))
589       return;			/* Valgrind internal */
590 
591    /* Check to see if this fd is already open. */
592    i = allocated_fds;
593    while (i) {
594       if (i->fd == fd) {
595          if (i->pathname) VG_(free)(i->pathname);
596          break;
597       }
598       i = i->next;
599    }
600 
601    /* Not already one: allocate an OpenFd */
602    if (i == NULL) {
603       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
604 
605       i->prev = NULL;
606       i->next = allocated_fds;
607       if(allocated_fds) allocated_fds->prev = i;
608       allocated_fds = i;
609       fd_count++;
610    }
611 
612    i->fd = fd;
613    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
614    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
615 }
616 
617 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)618 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
619 {
620    const HChar* buf;
621    const HChar* name;
622    if (VG_(resolve_filename)(fd, &buf))
623       name = buf;
624    else
625       name = NULL;
626 
627    ML_(record_fd_open_with_given_name)(tid, fd, name);
628 }
629 
630 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)631 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
632 {
633    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
634 }
635 
636 // Return if a given file descriptor is already recorded.
ML_(fd_recorded)637 Bool ML_(fd_recorded)(Int fd)
638 {
639    OpenFd *i = allocated_fds;
640    while (i) {
641       if (i->fd == fd)
642          return True;
643       i = i->next;
644    }
645    return False;
646 }
647 
648 /* Returned string must not be modified nor free'd. */
ML_(find_fd_recorded_by_fd)649 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
650 {
651    OpenFd *i = allocated_fds;
652 
653    while (i) {
654       if (i->fd == fd)
655          return i->pathname;
656       i = i->next;
657    }
658 
659    return NULL;
660 }
661 
662 static
unix_to_name(struct vki_sockaddr_un * sa,UInt len,HChar * name)663 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
664 {
665    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
666       VG_(sprintf)(name, "<unknown>");
667    } else {
668       VG_(sprintf)(name, "%s", sa->sun_path);
669    }
670 
671    return name;
672 }
673 
674 static
inet_to_name(struct vki_sockaddr_in * sa,UInt len,HChar * name)675 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
676 {
677    if (sa == NULL || len == 0) {
678       VG_(sprintf)(name, "<unknown>");
679    } else if (sa->sin_port == 0) {
680       VG_(sprintf)(name, "<unbound>");
681    } else {
682       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
683       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
684                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
685                    (addr>>8) & 0xFF, addr & 0xFF,
686                    VG_(ntohs)(sa->sin_port));
687    }
688 
689    return name;
690 }
691 
692 static
inet6_format(HChar * s,const UChar ip[16])693 void inet6_format(HChar *s, const UChar ip[16])
694 {
695    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
696 
697    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
698       const struct vki_in_addr *sin_addr =
699           (const struct vki_in_addr *)(ip + 12);
700       UInt addr = VG_(ntohl)(sin_addr->s_addr);
701 
702       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
703                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
704                    (addr>>8) & 0xFF, addr & 0xFF);
705    } else {
706       Bool compressing = False;
707       Bool compressed = False;
708       Int len = 0;
709       Int i;
710 
711       for (i = 0; i < 16; i += 2) {
712          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
713          if (word == 0 && !compressed) {
714             compressing = True;
715          } else {
716             if (compressing) {
717                compressing = False;
718                compressed = True;
719                s[len++] = ':';
720             }
721             if (i > 0) {
722                s[len++] = ':';
723             }
724             len += VG_(sprintf)(s + len, "%x", word);
725          }
726       }
727 
728       if (compressing) {
729          s[len++] = ':';
730          s[len++] = ':';
731       }
732 
733       s[len++] = 0;
734    }
735 
736    return;
737 }
738 
739 static
inet6_to_name(struct vki_sockaddr_in6 * sa,UInt len,HChar * name)740 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
741 {
742    if (sa == NULL || len == 0) {
743       VG_(sprintf)(name, "<unknown>");
744    } else if (sa->sin6_port == 0) {
745       VG_(sprintf)(name, "<unbound>");
746    } else {
747       HChar addr[100];    // large enough
748       inet6_format(addr, (void *)&(sa->sin6_addr));
749       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
750    }
751 
752    return name;
753 }
754 
755 /*
756  * Try get some details about a socket.
757  */
758 static void
getsockdetails(Int fd)759 getsockdetails(Int fd)
760 {
761    union u {
762       struct vki_sockaddr a;
763       struct vki_sockaddr_in in;
764       struct vki_sockaddr_in6 in6;
765       struct vki_sockaddr_un un;
766    } laddr;
767    Int llen;
768 
769    llen = sizeof(laddr);
770    VG_(memset)(&laddr, 0, llen);
771 
772    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
773       switch(laddr.a.sa_family) {
774       case VKI_AF_INET: {
775          HChar lname[32];   // large enough
776          HChar pname[32];   // large enough
777          struct vki_sockaddr_in paddr;
778          Int plen = sizeof(struct vki_sockaddr_in);
779 
780          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
781             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
782                          inet_to_name(&(laddr.in), llen, lname),
783                          inet_to_name(&paddr, plen, pname));
784          } else {
785             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
786                          fd, inet_to_name(&(laddr.in), llen, lname));
787          }
788          return;
789          }
790       case VKI_AF_INET6: {
791          HChar lname[128];  // large enough
792          HChar pname[128];  // large enough
793          struct vki_sockaddr_in6 paddr;
794          Int plen = sizeof(struct vki_sockaddr_in6);
795 
796          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
797             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
798                          inet6_to_name(&(laddr.in6), llen, lname),
799                          inet6_to_name(&paddr, plen, pname));
800          } else {
801             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
802                          fd, inet6_to_name(&(laddr.in6), llen, lname));
803          }
804          return;
805          }
806       case VKI_AF_UNIX: {
807          static char lname[256];
808          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
809                       unix_to_name(&(laddr.un), llen, lname));
810          return;
811          }
812       default:
813          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
814                       laddr.a.sa_family, fd);
815          return;
816       }
817    }
818 
819    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
820 }
821 
822 
823 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)824 void VG_(show_open_fds) (const HChar* when)
825 {
826    OpenFd *i = allocated_fds;
827 
828    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
829 
830    while (i) {
831       if (i->pathname) {
832          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
833                       i->pathname);
834       } else {
835          Int val;
836          Int len = sizeof(val);
837 
838          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
839              == -1) {
840             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
841          } else {
842             getsockdetails(i->fd);
843          }
844       }
845 
846       if(i->where) {
847          VG_(pp_ExeContext)(i->where);
848          VG_(message)(Vg_UserMsg, "\n");
849       } else {
850          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
851          VG_(message)(Vg_UserMsg, "\n");
852       }
853 
854       i = i->next;
855    }
856 
857    VG_(message)(Vg_UserMsg, "\n");
858 }
859 
860 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
861    have /proc support compiled in, or a non-Linux kernel), then we need to
862    find out what file descriptors we inherited from our parent process the
863    hard way - by checking each fd in turn. */
864 static
init_preopened_fds_without_proc_self_fd(void)865 void init_preopened_fds_without_proc_self_fd(void)
866 {
867    struct vki_rlimit lim;
868    UInt count;
869    Int i;
870 
871    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
872       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
873          an arbitrarily high number.  1024 happens to be the limit in
874          the 2.4 Linux kernels. */
875       count = 1024;
876    } else {
877       count = lim.rlim_cur;
878    }
879 
880    for (i = 0; i < count; i++)
881       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
882          ML_(record_fd_open_named)(-1, i);
883 }
884 
885 /* Initialize the list of open file descriptors with the file descriptors
886    we inherited from out parent process. */
887 
VG_(init_preopened_fds)888 void VG_(init_preopened_fds)(void)
889 {
890 // DDD: should probably use HAVE_PROC here or similar, instead.
891 #if defined(VGO_linux)
892    Int ret;
893    struct vki_dirent64 d;
894    SysRes f;
895 
896    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
897    if (sr_isError(f)) {
898       init_preopened_fds_without_proc_self_fd();
899       return;
900    }
901 
902    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
903       if (ret == -1)
904          goto out;
905 
906       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
907          HChar* s;
908          Int fno = VG_(strtoll10)(d.d_name, &s);
909          if (*s == '\0') {
910             if (fno != sr_Res(f))
911                if (VG_(clo_track_fds))
912                   ML_(record_fd_open_named)(-1, fno);
913          } else {
914             VG_(message)(Vg_DebugMsg,
915                "Warning: invalid file name in /proc/self/fd: %s\n",
916                d.d_name);
917          }
918       }
919 
920       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
921    }
922 
923   out:
924    VG_(close)(sr_Res(f));
925 
926 #elif defined(VGO_darwin)
927    init_preopened_fds_without_proc_self_fd();
928 
929 #elif defined(VGO_solaris)
930    Int ret;
931    Char buf[VKI_MAXGETDENTS_SIZE];
932    SysRes f;
933 
934    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
935    if (sr_isError(f)) {
936       init_preopened_fds_without_proc_self_fd();
937       return;
938    }
939 
940    while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
941                                  sizeof(buf))) > 0) {
942       Int i = 0;
943       while (i < ret) {
944          /* Proceed one entry. */
945          struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
946          if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
947             HChar *s;
948             Int fno = VG_(strtoll10)(d->d_name, &s);
949             if (*s == '\0') {
950                if (fno != sr_Res(f))
951                   if (VG_(clo_track_fds))
952                      ML_(record_fd_open_named)(-1, fno);
953             } else {
954                VG_(message)(Vg_DebugMsg,
955                      "Warning: invalid file name in /proc/self/fd: %s\n",
956                      d->d_name);
957             }
958          }
959 
960          /* Move on the next entry. */
961          i += d->d_reclen;
962       }
963    }
964 
965    VG_(close)(sr_Res(f));
966 
967 #else
968 #  error Unknown OS
969 #endif
970 }
971 
972 static
strdupcat(const HChar * cc,const HChar * s1,const HChar * s2,ArenaId aid)973 HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
974                    ArenaId aid )
975 {
976    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
977    HChar *result = VG_(arena_malloc) ( aid, cc, len );
978    VG_(strcpy) ( result, s1 );
979    VG_(strcat) ( result, s2 );
980    return result;
981 }
982 
983 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)984 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
985                             const HChar *msg, Addr base, SizeT size )
986 {
987    HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
988                                "sendmsg", msg, VG_AR_CORE );
989    PRE_MEM_READ( outmsg, base, size );
990    VG_(free) ( outmsg );
991 }
992 
993 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)994 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
995                              const HChar *msg, Addr base, SizeT size )
996 {
997    HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
998                                "recvmsg", msg, VG_AR_CORE );
999    if ( read )
1000       PRE_MEM_READ( outmsg, base, size );
1001    else
1002       PRE_MEM_WRITE( outmsg, base, size );
1003    VG_(free) ( outmsg );
1004 }
1005 
1006 static
post_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * fieldName,Addr base,SizeT size)1007 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
1008                               const HChar *fieldName, Addr base, SizeT size )
1009 {
1010    if ( !read )
1011       POST_MEM_WRITE( base, size );
1012 }
1013 
1014 static
msghdr_foreachfield(ThreadId tid,const HChar * name,struct vki_msghdr * msg,UInt length,void (* foreach_func)(ThreadId,Bool,const HChar *,Addr,SizeT),Bool rekv)1015 void msghdr_foreachfield (
1016         ThreadId tid,
1017         const HChar *name,
1018         struct vki_msghdr *msg,
1019         UInt length,
1020         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
1021         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
1022      )
1023 {
1024    HChar *fieldName;
1025 
1026    if ( !msg )
1027       return;
1028 
1029    fieldName = VG_(malloc) ( "di.syswrap.mfef", VG_(strlen)(name) + 32 );
1030 
1031    VG_(sprintf) ( fieldName, "(%s)", name );
1032 
1033    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
1034    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
1035    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
1036    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
1037    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
1038    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
1039 
1040    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
1041       the field, but does write to it. */
1042    if ( rekv )
1043       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
1044 
1045    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
1046         && msg->msg_name ) {
1047       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
1048       foreach_func ( tid, False, fieldName,
1049                      (Addr)msg->msg_name, msg->msg_namelen );
1050    }
1051 
1052    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
1053         && msg->msg_iov ) {
1054       struct vki_iovec *iov = msg->msg_iov;
1055       UInt i;
1056 
1057       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
1058 
1059       foreach_func ( tid, True, fieldName,
1060                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
1061 
1062       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
1063          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
1064          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
1065          foreach_func ( tid, False, fieldName,
1066                         (Addr)iov->iov_base, iov_len );
1067          length = length - iov_len;
1068       }
1069    }
1070 
1071    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
1072         && msg->msg_control )
1073    {
1074       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
1075       foreach_func ( tid, False, fieldName,
1076                      (Addr)msg->msg_control, msg->msg_controllen );
1077    }
1078 
1079    VG_(free) ( fieldName );
1080 }
1081 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)1082 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
1083 {
1084    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
1085 
1086    while (cm) {
1087       if (cm->cmsg_level == VKI_SOL_SOCKET &&
1088           cm->cmsg_type == VKI_SCM_RIGHTS ) {
1089          Int *fds = (Int *) VKI_CMSG_DATA(cm);
1090          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1091                          / sizeof(int);
1092          Int i;
1093 
1094          for (i = 0; i < fdc; i++)
1095             if(VG_(clo_track_fds))
1096                // XXX: must we check the range on these fds with
1097                //      ML_(fd_allowed)()?
1098                ML_(record_fd_open_named)(tid, fds[i]);
1099       }
1100 
1101       cm = VKI_CMSG_NXTHDR(msg, cm);
1102    }
1103 }
1104 
1105 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1106 static
pre_mem_read_sockaddr(ThreadId tid,const HChar * description,struct vki_sockaddr * sa,UInt salen)1107 void pre_mem_read_sockaddr ( ThreadId tid,
1108                              const HChar *description,
1109                              struct vki_sockaddr *sa, UInt salen )
1110 {
1111    HChar *outmsg;
1112    struct vki_sockaddr_un*  saun = (struct vki_sockaddr_un *)sa;
1113    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
1114    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1115 #  ifdef VKI_AF_BLUETOOTH
1116    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
1117 #  endif
1118 #  ifdef VKI_AF_NETLINK
1119    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
1120 #  endif
1121 
1122    /* NULL/zero-length sockaddrs are legal */
1123    if ( sa == NULL || salen == 0 ) return;
1124 
1125    outmsg = VG_(malloc) ( "di.syswrap.pmr_sockaddr.1",
1126                           VG_(strlen)( description ) + 30 );
1127 
1128    VG_(sprintf) ( outmsg, description, "sa_family" );
1129    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1130 
1131    switch (sa->sa_family) {
1132 
1133       case VKI_AF_UNIX:
1134          VG_(sprintf) ( outmsg, description, "sun_path" );
1135          PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
1136          // GrP fixme max of sun_len-2? what about nul char?
1137          break;
1138 
1139       case VKI_AF_INET:
1140          VG_(sprintf) ( outmsg, description, "sin_port" );
1141          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1142          VG_(sprintf) ( outmsg, description, "sin_addr" );
1143          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1144          break;
1145 
1146       case VKI_AF_INET6:
1147          VG_(sprintf) ( outmsg, description, "sin6_port" );
1148          PRE_MEM_READ( outmsg,
1149             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1150          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1151          PRE_MEM_READ( outmsg,
1152             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1153          VG_(sprintf) ( outmsg, description, "sin6_addr" );
1154          PRE_MEM_READ( outmsg,
1155             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1156          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1157          PRE_MEM_READ( outmsg,
1158             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1159          break;
1160 
1161 #     ifdef VKI_AF_BLUETOOTH
1162       case VKI_AF_BLUETOOTH:
1163          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1164          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1165          VG_(sprintf) ( outmsg, description, "rc_channel" );
1166          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1167          break;
1168 #     endif
1169 
1170 #     ifdef VKI_AF_NETLINK
1171       case VKI_AF_NETLINK:
1172          VG_(sprintf)(outmsg, description, "nl_pid");
1173          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
1174          VG_(sprintf)(outmsg, description, "nl_groups");
1175          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
1176          break;
1177 #     endif
1178 
1179 #     ifdef VKI_AF_UNSPEC
1180       case VKI_AF_UNSPEC:
1181          break;
1182 #     endif
1183 
1184       default:
1185          /* No specific information about this address family.
1186             Let's just check the full data following the family.
1187             Note that this can give false positive if this (unknown)
1188             struct sockaddr_???? has padding bytes between its elements. */
1189          VG_(sprintf) ( outmsg, description, "sa_data" );
1190          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
1191                        salen -  sizeof(sa->sa_family));
1192          break;
1193    }
1194 
1195    VG_(free) ( outmsg );
1196 }
1197 
1198 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,const HChar * s)1199 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1200 {
1201    UInt* a_p = (UInt*)a;
1202    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1203    if (a_p == NULL)
1204       return 0;
1205    else
1206       return *a_p;
1207 }
1208 
ML_(buf_and_len_pre_check)1209 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1210                                   const HChar* buf_s, const HChar* buflen_s )
1211 {
1212    if (VG_(tdict).track_pre_mem_write) {
1213       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1214       if (buflen_in > 0) {
1215          VG_(tdict).track_pre_mem_write(
1216             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1217       }
1218    }
1219 }
1220 
ML_(buf_and_len_post_check)1221 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1222                                    Addr buf_p, Addr buflen_p, const HChar* s )
1223 {
1224    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1225       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1226       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1227          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1228       }
1229    }
1230 }
1231 
1232 /* ---------------------------------------------------------------------
1233    Data seg end, for brk()
1234    ------------------------------------------------------------------ */
1235 
1236 /*   +--------+------------+
1237      | anon   |    resvn   |
1238      +--------+------------+
1239 
1240      ^     ^  ^
1241      |     |  boundary is page aligned
1242      |     VG_(brk_limit) -- no alignment constraint
1243      VG_(brk_base) -- page aligned -- does not move
1244 
1245      Both the anon part and the reservation part are always at least
1246      one page.
1247 */
1248 
1249 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1250    NEWBRK, else return the current data segment end. */
1251 
do_brk(Addr newbrk,ThreadId tid)1252 static Addr do_brk ( Addr newbrk, ThreadId tid )
1253 {
1254    NSegment const* aseg;
1255    Addr newbrkP;
1256    SizeT delta;
1257    Bool debug = False;
1258 
1259    if (debug)
1260       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1261 		  VG_(brk_base), VG_(brk_limit), newbrk);
1262 
1263    if (0) VG_(am_show_nsegments)(0, "in_brk");
1264 
1265    if (newbrk < VG_(brk_base))
1266       /* Clearly impossible. */
1267       goto bad;
1268 
1269    if (newbrk < VG_(brk_limit)) {
1270       /* shrinking the data segment.  Be lazy and don't munmap the
1271          excess area. */
1272       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1273       vg_assert(seg);
1274 
1275       if (seg->hasT)
1276          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1277                                     "do_brk(shrink)" );
1278       /* Since we're being lazy and not unmapping pages, we have to
1279          zero out the area, so that if the area later comes back into
1280          circulation, it will be filled with zeroes, as if it really
1281          had been unmapped and later remapped.  Be a bit paranoid and
1282          try hard to ensure we're not going to segfault by doing the
1283          write - check both ends of the range are in the same segment
1284          and that segment is writable. */
1285       NSegment const * seg2;
1286 
1287       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
1288       vg_assert(seg2);
1289 
1290       if (seg == seg2 && seg->hasW)
1291          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1292 
1293       VG_(brk_limit) = newbrk;
1294       return newbrk;
1295    }
1296 
1297    /* otherwise we're expanding the brk segment. */
1298    if (VG_(brk_limit) > VG_(brk_base))
1299       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1300    else
1301       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1302 
1303    /* These should be assured by setup_client_dataseg in m_main. */
1304    vg_assert(aseg);
1305    vg_assert(aseg->kind == SkAnonC);
1306 
1307    if (newbrk <= aseg->end + 1) {
1308       /* still fits within the anon segment. */
1309       VG_(brk_limit) = newbrk;
1310       return newbrk;
1311    }
1312 
1313    newbrkP = VG_PGROUNDUP(newbrk);
1314    delta = newbrkP - (aseg->end + 1);
1315    vg_assert(delta > 0);
1316    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1317 
1318    Bool overflow;
1319    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
1320                                                           &overflow)) {
1321       if (overflow)
1322          VG_(umsg)("brk segment overflow in thread #%u: can't grow to %#lx\n",
1323                    tid, newbrkP);
1324       else
1325          VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
1326                    "to %#lx\n", tid, newbrkP);
1327       goto bad;
1328    }
1329 
1330    VG_(brk_limit) = newbrk;
1331    return newbrk;
1332 
1333   bad:
1334    return VG_(brk_limit);
1335 }
1336 
1337 
1338 /* ---------------------------------------------------------------------
1339    Vet file descriptors for sanity
1340    ------------------------------------------------------------------ */
1341 /*
1342 > - what does the "Bool soft" parameter mean?
1343 
1344 (Tom Hughes, 3 Oct 05):
1345 
1346 Whether or not to consider a file descriptor invalid if it is above
1347 the current soft limit.
1348 
1349 Basically if we are testing whether a newly created file descriptor is
1350 valid (in a post handler) then we set soft to true, and if we are
1351 testing whether a file descriptor that is about to be used (in a pre
1352 handler) is valid [viz, an already-existing fd] then we set it to false.
1353 
1354 The point is that if the (virtual) soft limit is lowered then any
1355 existing descriptors can still be read/written/closed etc (so long as
1356 they are below the valgrind reserved descriptors) but no new
1357 descriptors can be created above the new soft limit.
1358 
1359 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1360 */
1361 
1362 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1363 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1364                      Bool isNewFd)
1365 {
1366    Bool allowed = True;
1367 
1368    /* hard limits always apply */
1369    if (fd < 0 || fd >= VG_(fd_hard_limit))
1370       allowed = False;
1371 
1372    /* hijacking the output fds is never allowed */
1373    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1374       allowed = False;
1375 
1376    /* if creating a new fd (rather than using an existing one), the
1377       soft limit must also be observed */
1378    if (isNewFd && fd >= VG_(fd_soft_limit))
1379       allowed = False;
1380 
1381    /* this looks like it ought to be included, but causes problems: */
1382    /*
1383    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1384       allowed = False;
1385    */
1386    /* The difficulty is as follows: consider a program P which expects
1387       to be able to mess with (redirect) its own stderr (fd 2).
1388       Usually to deal with P we would issue command line flags to send
1389       logging somewhere other than stderr, so as not to disrupt P.
1390       The problem is that -d unilaterally hijacks stderr with no
1391       consultation with P.  And so, if this check is enabled, P will
1392       work OK normally but fail if -d is issued.
1393 
1394       Basically -d is a hack and you take your chances when using it.
1395       It's very useful for low level debugging -- particularly at
1396       startup -- and having its presence change the behaviour of the
1397       client is exactly what we don't want.  */
1398 
1399    /* croak? */
1400    if ((!allowed) && VG_(showing_core_errors)() ) {
1401       VG_(message)(Vg_UserMsg,
1402          "Warning: invalid file descriptor %d in syscall %s()\n",
1403          fd, syscallname);
1404       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1405 	 VG_(message)(Vg_UserMsg,
1406             "   Use --log-fd=<number> to select an alternative log fd.\n");
1407       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1408 	 VG_(message)(Vg_UserMsg,
1409             "   Use --xml-fd=<number> to select an alternative XML "
1410             "output fd.\n");
1411       // DDD: consider always printing this stack trace, it's useful.
1412       // Also consider also making this a proper core error, ie.
1413       // suppressible and all that.
1414       if (VG_(clo_verbosity) > 1) {
1415          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1416       }
1417    }
1418 
1419    return allowed;
1420 }
1421 
1422 
1423 /* ---------------------------------------------------------------------
1424    Deal with a bunch of socket-related syscalls
1425    ------------------------------------------------------------------ */
1426 
1427 /* ------ */
1428 
1429 void
ML_(generic_PRE_sys_socketpair)1430 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1431                                   UWord arg0, UWord arg1,
1432                                   UWord arg2, UWord arg3 )
1433 {
1434    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1435    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1436                   arg3, 2*sizeof(int) );
1437 }
1438 
1439 SysRes
ML_(generic_POST_sys_socketpair)1440 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1441                                    SysRes res,
1442                                    UWord arg0, UWord arg1,
1443                                    UWord arg2, UWord arg3 )
1444 {
1445    SysRes r = res;
1446    Int fd1 = ((Int*)arg3)[0];
1447    Int fd2 = ((Int*)arg3)[1];
1448    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1449    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1450    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1451        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1452       VG_(close)(fd1);
1453       VG_(close)(fd2);
1454       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1455    } else {
1456       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1457       if (VG_(clo_track_fds)) {
1458          ML_(record_fd_open_nameless)(tid, fd1);
1459          ML_(record_fd_open_nameless)(tid, fd2);
1460       }
1461    }
1462    return r;
1463 }
1464 
1465 /* ------ */
1466 
1467 SysRes
ML_(generic_POST_sys_socket)1468 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1469 {
1470    SysRes r = res;
1471    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1472    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1473       VG_(close)(sr_Res(res));
1474       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1475    } else {
1476       if (VG_(clo_track_fds))
1477          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1478    }
1479    return r;
1480 }
1481 
1482 /* ------ */
1483 
1484 void
ML_(generic_PRE_sys_bind)1485 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1486                             UWord arg0, UWord arg1, UWord arg2 )
1487 {
1488    /* int bind(int sockfd, struct sockaddr *my_addr,
1489                int addrlen); */
1490    pre_mem_read_sockaddr(
1491       tid, "socketcall.bind(my_addr.%s)",
1492       (struct vki_sockaddr *) arg1, arg2
1493    );
1494 }
1495 
1496 /* ------ */
1497 
1498 void
ML_(generic_PRE_sys_accept)1499 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1500                               UWord arg0, UWord arg1, UWord arg2 )
1501 {
1502    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1503    Addr addr_p     = arg1;
1504    Addr addrlen_p  = arg2;
1505    if (addr_p != (Addr)NULL)
1506       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1507                                    "socketcall.accept(addr)",
1508                                    "socketcall.accept(addrlen_in)" );
1509 }
1510 
1511 SysRes
ML_(generic_POST_sys_accept)1512 ML_(generic_POST_sys_accept) ( ThreadId tid,
1513                                SysRes res,
1514                                UWord arg0, UWord arg1, UWord arg2 )
1515 {
1516    SysRes r = res;
1517    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1518    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1519       VG_(close)(sr_Res(res));
1520       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1521    } else {
1522       Addr addr_p     = arg1;
1523       Addr addrlen_p  = arg2;
1524       if (addr_p != (Addr)NULL)
1525          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1526                                        "socketcall.accept(addrlen_out)" );
1527       if (VG_(clo_track_fds))
1528           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1529    }
1530    return r;
1531 }
1532 
1533 /* ------ */
1534 
1535 void
ML_(generic_PRE_sys_sendto)1536 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1537                               UWord arg0, UWord arg1, UWord arg2,
1538                               UWord arg3, UWord arg4, UWord arg5 )
1539 {
1540    /* int sendto(int s, const void *msg, int len,
1541                  unsigned int flags,
1542                  const struct sockaddr *to, int tolen); */
1543    PRE_MEM_READ( "socketcall.sendto(msg)",
1544                  arg1, /* msg */
1545                  arg2  /* len */ );
1546    pre_mem_read_sockaddr(
1547       tid, "socketcall.sendto(to.%s)",
1548       (struct vki_sockaddr *) arg4, arg5
1549    );
1550 }
1551 
1552 /* ------ */
1553 
1554 void
ML_(generic_PRE_sys_send)1555 ML_(generic_PRE_sys_send) ( ThreadId tid,
1556                             UWord arg0, UWord arg1, UWord arg2 )
1557 {
1558    /* int send(int s, const void *msg, size_t len, int flags); */
1559    PRE_MEM_READ( "socketcall.send(msg)",
1560                   arg1, /* msg */
1561                   arg2  /* len */ );
1562 
1563 }
1564 
1565 /* ------ */
1566 
1567 void
ML_(generic_PRE_sys_recvfrom)1568 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1569                                 UWord arg0, UWord arg1, UWord arg2,
1570                                 UWord arg3, UWord arg4, UWord arg5 )
1571 {
1572    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1573                    struct sockaddr *from, int *fromlen); */
1574    Addr buf_p      = arg1;
1575    Int  len        = arg2;
1576    Addr from_p     = arg4;
1577    Addr fromlen_p  = arg5;
1578    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1579    if (from_p != (Addr)NULL)
1580       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1581                                    "socketcall.recvfrom(from)",
1582                                    "socketcall.recvfrom(fromlen_in)" );
1583 }
1584 
1585 void
ML_(generic_POST_sys_recvfrom)1586 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1587                                  SysRes res,
1588                                  UWord arg0, UWord arg1, UWord arg2,
1589                                  UWord arg3, UWord arg4, UWord arg5 )
1590 {
1591    Addr buf_p      = arg1;
1592    Int  len        = arg2;
1593    Addr from_p     = arg4;
1594    Addr fromlen_p  = arg5;
1595 
1596    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1597    if (from_p != (Addr)NULL)
1598       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1599                                     "socketcall.recvfrom(fromlen_out)" );
1600    POST_MEM_WRITE( buf_p, len );
1601 }
1602 
1603 /* ------ */
1604 
1605 void
ML_(generic_PRE_sys_recv)1606 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1607                             UWord arg0, UWord arg1, UWord arg2 )
1608 {
1609    /* int recv(int s, void *buf, int len, unsigned int flags); */
1610    /* man 2 recv says:
1611       The  recv call is normally used only on a connected socket
1612       (see connect(2)) and is identical to recvfrom with a  NULL
1613       from parameter.
1614    */
1615    PRE_MEM_WRITE( "socketcall.recv(buf)",
1616                   arg1, /* buf */
1617                   arg2  /* len */ );
1618 }
1619 
1620 void
ML_(generic_POST_sys_recv)1621 ML_(generic_POST_sys_recv) ( ThreadId tid,
1622                              UWord res,
1623                              UWord arg0, UWord arg1, UWord arg2 )
1624 {
1625    if (res >= 0 && arg1 != 0) {
1626       POST_MEM_WRITE( arg1, /* buf */
1627                       arg2  /* len */ );
1628    }
1629 }
1630 
1631 /* ------ */
1632 
1633 void
ML_(generic_PRE_sys_connect)1634 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1635                                UWord arg0, UWord arg1, UWord arg2 )
1636 {
1637    /* int connect(int sockfd,
1638                   struct sockaddr *serv_addr, int addrlen ); */
1639    pre_mem_read_sockaddr( tid,
1640                           "socketcall.connect(serv_addr.%s)",
1641                           (struct vki_sockaddr *) arg1, arg2);
1642 }
1643 
1644 /* ------ */
1645 
1646 void
ML_(generic_PRE_sys_setsockopt)1647 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1648                                   UWord arg0, UWord arg1, UWord arg2,
1649                                   UWord arg3, UWord arg4 )
1650 {
1651    /* int setsockopt(int s, int level, int optname,
1652                      const void *optval, int optlen); */
1653    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1654                  arg3, /* optval */
1655                  arg4  /* optlen */ );
1656 }
1657 
1658 /* ------ */
1659 
1660 void
ML_(generic_PRE_sys_getsockname)1661 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1662                                    UWord arg0, UWord arg1, UWord arg2 )
1663 {
1664    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1665    Addr name_p     = arg1;
1666    Addr namelen_p  = arg2;
1667    /* Nb: name_p cannot be NULL */
1668    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1669                                 "socketcall.getsockname(name)",
1670                                 "socketcall.getsockname(namelen_in)" );
1671 }
1672 
1673 void
ML_(generic_POST_sys_getsockname)1674 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1675                                     SysRes res,
1676                                     UWord arg0, UWord arg1, UWord arg2 )
1677 {
1678    Addr name_p     = arg1;
1679    Addr namelen_p  = arg2;
1680    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1681    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1682                                  "socketcall.getsockname(namelen_out)" );
1683 }
1684 
1685 /* ------ */
1686 
1687 void
ML_(generic_PRE_sys_getpeername)1688 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1689                                    UWord arg0, UWord arg1, UWord arg2 )
1690 {
1691    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1692    Addr name_p     = arg1;
1693    Addr namelen_p  = arg2;
1694    /* Nb: name_p cannot be NULL */
1695    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1696                                 "socketcall.getpeername(name)",
1697                                 "socketcall.getpeername(namelen_in)" );
1698 }
1699 
1700 void
ML_(generic_POST_sys_getpeername)1701 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1702                                     SysRes res,
1703                                     UWord arg0, UWord arg1, UWord arg2 )
1704 {
1705    Addr name_p     = arg1;
1706    Addr namelen_p  = arg2;
1707    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1708    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1709                                  "socketcall.getpeername(namelen_out)" );
1710 }
1711 
1712 /* ------ */
1713 
1714 void
ML_(generic_PRE_sys_sendmsg)1715 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1716                                struct vki_msghdr *msg )
1717 {
1718    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1719 }
1720 
1721 /* ------ */
1722 
1723 void
ML_(generic_PRE_sys_recvmsg)1724 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1725                                struct vki_msghdr *msg )
1726 {
1727    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1728 }
1729 
1730 void
ML_(generic_POST_sys_recvmsg)1731 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1732                                 struct vki_msghdr *msg, UInt length )
1733 {
1734    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1735    check_cmsg_for_fds( tid, msg );
1736 }
1737 
1738 
1739 /* ---------------------------------------------------------------------
1740    Deal with a bunch of IPC related syscalls
1741    ------------------------------------------------------------------ */
1742 
1743 /* ------ */
1744 
1745 void
ML_(generic_PRE_sys_semop)1746 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1747                              UWord arg0, UWord arg1, UWord arg2 )
1748 {
1749    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1750    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1751 }
1752 
1753 /* ------ */
1754 
1755 void
ML_(generic_PRE_sys_semtimedop)1756 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1757                                   UWord arg0, UWord arg1,
1758                                   UWord arg2, UWord arg3 )
1759 {
1760    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1761                      struct timespec *timeout); */
1762    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1763    if (arg3 != 0)
1764       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1765 }
1766 
1767 /* ------ */
1768 
1769 static
get_sem_count(Int semid)1770 UInt get_sem_count( Int semid )
1771 {
1772    struct vki_semid_ds buf;
1773    union vki_semun arg;
1774    SysRes res;
1775 
1776    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1777       (experimental) otherwise complains that the use in the return
1778       statement below is uninitialised. */
1779    buf.sem_nsems = 0;
1780 
1781    arg.buf = &buf;
1782 
1783 #  if defined(__NR_semctl)
1784    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1785 #  elif defined(__NR_semsys) /* Solaris */
1786    res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
1787                           *(UWord *)&arg);
1788 #  else
1789    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1790                           VKI_IPC_STAT, (UWord)&arg);
1791 #  endif
1792    if (sr_isError(res))
1793       return 0;
1794 
1795    return buf.sem_nsems;
1796 }
1797 
1798 void
ML_(generic_PRE_sys_semctl)1799 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1800                               UWord arg0, UWord arg1,
1801                               UWord arg2, UWord arg3 )
1802 {
1803    /* int semctl(int semid, int semnum, int cmd, ...); */
1804    union vki_semun arg = *(union vki_semun *)&arg3;
1805    UInt nsems;
1806    switch (arg2 /* cmd */) {
1807 #if defined(VKI_IPC_INFO)
1808    case VKI_IPC_INFO:
1809    case VKI_SEM_INFO:
1810    case VKI_IPC_INFO|VKI_IPC_64:
1811    case VKI_SEM_INFO|VKI_IPC_64:
1812       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1813                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1814       break;
1815 #endif
1816 
1817    case VKI_IPC_STAT:
1818 #if defined(VKI_SEM_STAT)
1819    case VKI_SEM_STAT:
1820 #endif
1821       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1822                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1823       break;
1824 
1825 #if defined(VKI_IPC_64)
1826    case VKI_IPC_STAT|VKI_IPC_64:
1827 #if defined(VKI_SEM_STAT)
1828    case VKI_SEM_STAT|VKI_IPC_64:
1829 #endif
1830 #endif
1831 #if defined(VKI_IPC_STAT64)
1832    case VKI_IPC_STAT64:
1833 #endif
1834 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1835       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1836                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1837       break;
1838 #endif
1839 
1840    case VKI_IPC_SET:
1841       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1842                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1843       break;
1844 
1845 #if defined(VKI_IPC_64)
1846    case VKI_IPC_SET|VKI_IPC_64:
1847 #endif
1848 #if defined(VKI_IPC_SET64)
1849    case VKI_IPC_SET64:
1850 #endif
1851 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
1852       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1853                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1854       break;
1855 #endif
1856 
1857    case VKI_GETALL:
1858 #if defined(VKI_IPC_64)
1859    case VKI_GETALL|VKI_IPC_64:
1860 #endif
1861       nsems = get_sem_count( arg0 );
1862       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1863                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1864       break;
1865 
1866    case VKI_SETALL:
1867 #if defined(VKI_IPC_64)
1868    case VKI_SETALL|VKI_IPC_64:
1869 #endif
1870       nsems = get_sem_count( arg0 );
1871       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1872                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1873       break;
1874    }
1875 }
1876 
1877 void
ML_(generic_POST_sys_semctl)1878 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1879                                UWord res,
1880                                UWord arg0, UWord arg1,
1881                                UWord arg2, UWord arg3 )
1882 {
1883    union vki_semun arg = *(union vki_semun *)&arg3;
1884    UInt nsems;
1885    switch (arg2 /* cmd */) {
1886 #if defined(VKI_IPC_INFO)
1887    case VKI_IPC_INFO:
1888    case VKI_SEM_INFO:
1889    case VKI_IPC_INFO|VKI_IPC_64:
1890    case VKI_SEM_INFO|VKI_IPC_64:
1891       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1892       break;
1893 #endif
1894 
1895    case VKI_IPC_STAT:
1896 #if defined(VKI_SEM_STAT)
1897    case VKI_SEM_STAT:
1898 #endif
1899       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1900       break;
1901 
1902 #if defined(VKI_IPC_64)
1903    case VKI_IPC_STAT|VKI_IPC_64:
1904    case VKI_SEM_STAT|VKI_IPC_64:
1905 #endif
1906 #if defined(VKI_IPC_STAT64)
1907    case VKI_IPC_STAT64:
1908 #endif
1909 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1910       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1911       break;
1912 #endif
1913 
1914    case VKI_GETALL:
1915 #if defined(VKI_IPC_64)
1916    case VKI_GETALL|VKI_IPC_64:
1917 #endif
1918       nsems = get_sem_count( arg0 );
1919       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1920       break;
1921    }
1922 }
1923 
1924 /* ------ */
1925 
1926 /* ------ */
1927 
1928 static
get_shm_size(Int shmid)1929 SizeT get_shm_size ( Int shmid )
1930 {
1931 #if defined(__NR_shmctl)
1932 #  ifdef VKI_IPC_64
1933    struct vki_shmid64_ds buf;
1934 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
1935      /* See bug 222545 comment 7 */
1936      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1937                                      VKI_IPC_STAT, (UWord)&buf);
1938 #    else
1939      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1940                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1941 #    endif
1942 #  else /* !def VKI_IPC_64 */
1943    struct vki_shmid_ds buf;
1944    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1945 #  endif /* def VKI_IPC_64 */
1946 #elif defined(__NR_shmsys) /* Solaris */
1947    struct vki_shmid_ds buf;
1948    SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
1949                          (UWord)&buf);
1950 #else
1951    struct vki_shmid_ds buf;
1952    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1953                                  VKI_IPC_STAT, 0, (UWord)&buf);
1954 #endif
1955    if (sr_isError(__res))
1956       return 0;
1957 
1958    return (SizeT) buf.shm_segsz;
1959 }
1960 
1961 UWord
ML_(generic_PRE_sys_shmat)1962 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1963                              UWord arg0, UWord arg1, UWord arg2 )
1964 {
1965    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1966    SizeT  segmentSize = get_shm_size ( arg0 );
1967    UWord tmp;
1968    Bool  ok;
1969    if (arg1 == 0) {
1970       /* arm-linux only: work around the fact that
1971          VG_(am_get_advisory_client_simple) produces something that is
1972          VKI_PAGE_SIZE aligned, whereas what we want is something
1973          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1974          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1975          then round the result up to the next VKI_SHMLBA boundary.
1976          See bug 222545 comment 15.  So far, arm-linux is the only
1977          platform where this is known to be necessary. */
1978       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1979       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1980          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1981       }
1982       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1983       if (ok) {
1984          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1985             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1986          } else {
1987             arg1 = tmp;
1988          }
1989       }
1990    }
1991    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1992       arg1 = 0;
1993    return arg1;
1994 }
1995 
1996 void
ML_(generic_POST_sys_shmat)1997 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1998                               UWord res,
1999                               UWord arg0, UWord arg1, UWord arg2 )
2000 {
2001    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
2002    if ( segmentSize > 0 ) {
2003       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
2004       Bool d;
2005 
2006       if (arg2 & VKI_SHM_RDONLY)
2007          prot &= ~VKI_PROT_WRITE;
2008       /* It isn't exactly correct to pass 0 for the fd and offset
2009          here.  The kernel seems to think the corresponding section
2010          does have dev/ino numbers:
2011 
2012          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
2013 
2014          However there is no obvious way to find them.  In order to
2015          cope with the discrepancy, aspacem's sync checker omits the
2016          dev/ino correspondence check in cases where V does not know
2017          the dev/ino. */
2018       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
2019 
2020       /* we don't distinguish whether it's read-only or
2021        * read-write -- it doesn't matter really. */
2022       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
2023                               0/*di_handle*/ );
2024       if (d)
2025          VG_(discard_translations)( (Addr)res,
2026                                     (ULong)VG_PGROUNDUP(segmentSize),
2027                                     "ML_(generic_POST_sys_shmat)" );
2028    }
2029 }
2030 
2031 /* ------ */
2032 
2033 Bool
ML_(generic_PRE_sys_shmdt)2034 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
2035 {
2036    /* int shmdt(const void *shmaddr); */
2037    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
2038 }
2039 
2040 void
ML_(generic_POST_sys_shmdt)2041 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
2042 {
2043    NSegment const* s = VG_(am_find_nsegment)(arg0);
2044 
2045    if (s != NULL) {
2046       Addr  s_start = s->start;
2047       SizeT s_len   = s->end+1 - s->start;
2048       Bool  d;
2049 
2050       vg_assert(s->kind == SkShmC);
2051       vg_assert(s->start == arg0);
2052 
2053       d = VG_(am_notify_munmap)(s_start, s_len);
2054       s = NULL; /* s is now invalid */
2055       VG_TRACK( die_mem_munmap, s_start, s_len );
2056       if (d)
2057          VG_(discard_translations)( s_start,
2058                                     (ULong)s_len,
2059                                     "ML_(generic_POST_sys_shmdt)" );
2060    }
2061 }
2062 /* ------ */
2063 
2064 void
ML_(generic_PRE_sys_shmctl)2065 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
2066                               UWord arg0, UWord arg1, UWord arg2 )
2067 {
2068    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
2069    switch (arg1 /* cmd */) {
2070 #if defined(VKI_IPC_INFO)
2071    case VKI_IPC_INFO:
2072       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2073                      arg2, sizeof(struct vki_shminfo) );
2074       break;
2075 #if defined(VKI_IPC_64)
2076    case VKI_IPC_INFO|VKI_IPC_64:
2077       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2078                      arg2, sizeof(struct vki_shminfo64) );
2079       break;
2080 #endif
2081 #endif
2082 
2083 #if defined(VKI_SHM_INFO)
2084    case VKI_SHM_INFO:
2085 #if defined(VKI_IPC_64)
2086    case VKI_SHM_INFO|VKI_IPC_64:
2087 #endif
2088       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
2089                      arg2, sizeof(struct vki_shm_info) );
2090       break;
2091 #endif
2092 
2093    case VKI_IPC_STAT:
2094 #if defined(VKI_SHM_STAT)
2095    case VKI_SHM_STAT:
2096 #endif
2097       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
2098                      arg2, sizeof(struct vki_shmid_ds) );
2099       break;
2100 
2101 #if defined(VKI_IPC_64)
2102    case VKI_IPC_STAT|VKI_IPC_64:
2103    case VKI_SHM_STAT|VKI_IPC_64:
2104       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
2105                      arg2, sizeof(struct vki_shmid64_ds) );
2106       break;
2107 #endif
2108 
2109    case VKI_IPC_SET:
2110       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2111                     arg2, sizeof(struct vki_shmid_ds) );
2112       break;
2113 
2114 #if defined(VKI_IPC_64)
2115    case VKI_IPC_SET|VKI_IPC_64:
2116       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2117                     arg2, sizeof(struct vki_shmid64_ds) );
2118       break;
2119 #endif
2120    }
2121 }
2122 
2123 void
ML_(generic_POST_sys_shmctl)2124 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
2125                                UWord res,
2126                                UWord arg0, UWord arg1, UWord arg2 )
2127 {
2128    switch (arg1 /* cmd */) {
2129 #if defined(VKI_IPC_INFO)
2130    case VKI_IPC_INFO:
2131       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2132       break;
2133    case VKI_IPC_INFO|VKI_IPC_64:
2134       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2135       break;
2136 #endif
2137 
2138 #if defined(VKI_SHM_INFO)
2139    case VKI_SHM_INFO:
2140    case VKI_SHM_INFO|VKI_IPC_64:
2141       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2142       break;
2143 #endif
2144 
2145    case VKI_IPC_STAT:
2146 #if defined(VKI_SHM_STAT)
2147    case VKI_SHM_STAT:
2148 #endif
2149       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2150       break;
2151 
2152 #if defined(VKI_IPC_64)
2153    case VKI_IPC_STAT|VKI_IPC_64:
2154    case VKI_SHM_STAT|VKI_IPC_64:
2155       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2156       break;
2157 #endif
2158 
2159 
2160    }
2161 }
2162 
2163 /* ---------------------------------------------------------------------
2164    Generic handler for mmap
2165    ------------------------------------------------------------------ */
2166 
2167 /*
2168  * Although mmap is specified by POSIX and the argument are generally
2169  * consistent across platforms the precise details of the low level
2170  * argument passing conventions differ. For example:
2171  *
2172  * - On x86-linux there is mmap (aka old_mmap) which takes the
2173  *   arguments in a memory block and the offset in bytes; and
2174  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2175  *   way and the offset in pages.
2176  *
2177  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2178  *   arguments in the normal way and the offset in bytes; and
2179  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2180  *   way and the offset in pages.
2181  *
2182  * - On amd64-linux everything is simple and there is just the one
2183  *   call, mmap (aka sys_mmap)  which takes the arguments in the
2184  *   normal way and the offset in bytes.
2185  *
2186  * - On s390x-linux there is mmap (aka old_mmap) which takes the
2187  *   arguments in a memory block and the offset in bytes. mmap2
2188  *   is also available (but not exported via unistd.h) with
2189  *   arguments in a memory block and the offset in pages.
2190  *
2191  * To cope with all this we provide a generic handler function here
2192  * and then each platform implements one or more system call handlers
2193  * which call this generic routine after extracting and normalising
2194  * the arguments.
2195  */
2196 
2197 SysRes
ML_(generic_PRE_sys_mmap)2198 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2199                             UWord arg1, UWord arg2, UWord arg3,
2200                             UWord arg4, UWord arg5, Off64T arg6 )
2201 {
2202    Addr       advised;
2203    SysRes     sres;
2204    MapRequest mreq;
2205    Bool       mreq_ok;
2206 
2207 #  if defined(VGO_darwin)
2208    // Nb: we can't use this on Darwin, it has races:
2209    // * needs to RETRY if advisory succeeds but map fails
2210    //   (could have been some other thread in a nonblocking call)
2211    // * needs to not use fixed-position mmap() on Darwin
2212    //   (mmap will cheerfully smash whatever's already there, which might
2213    //   be a new mapping from some other thread in a nonblocking call)
2214    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2215 #  endif
2216 
2217    if (arg2 == 0) {
2218       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2219          shall be established. */
2220       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2221    }
2222 
2223    if (!VG_IS_PAGE_ALIGNED(arg1)) {
2224       /* zap any misaligned addresses. */
2225       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2226          to fail.   Here, we catch them all. */
2227       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2228    }
2229 
2230    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2231       /* zap any misaligned offsets. */
2232       /* SuSV3 says: The off argument is constrained to be aligned and
2233          sized according to the value returned by sysconf() when
2234          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2235       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2236    }
2237 
2238    /* Figure out what kind of allocation constraints there are
2239       (fixed/hint/any), and ask aspacem what we should do. */
2240    mreq.start = arg1;
2241    mreq.len   = arg2;
2242    if (arg4 & VKI_MAP_FIXED) {
2243       mreq.rkind = MFixed;
2244    } else
2245 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
2246    if (arg4 & VKI_MAP_ALIGN) {
2247       mreq.rkind = MAlign;
2248       if (mreq.start == 0) {
2249          mreq.start = VKI_PAGE_SIZE;
2250       }
2251       /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
2252       arg4 &= ~VKI_MAP_ALIGN;
2253    } else
2254 #endif
2255    if (arg1 != 0) {
2256       mreq.rkind = MHint;
2257    } else {
2258       mreq.rkind = MAny;
2259    }
2260 
2261    /* Enquire ... */
2262    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2263    if (!mreq_ok) {
2264       /* Our request was bounced, so we'd better fail. */
2265       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2266    }
2267 
2268 #  if defined(VKI_MAP_32BIT)
2269    /* MAP_32BIT is royally unportable, so if the client asks for it, try our
2270       best to make it work (but without complexifying aspacemgr).
2271       If the user requested MAP_32BIT, the mmap-ed space must be in the
2272       first 2GB of the address space. So, return ENOMEM if aspacemgr
2273       advisory is above the first 2GB. If MAP_FIXED is also requested,
2274       MAP_32BIT has to be ignored.
2275       Assumption about aspacemgr behaviour: aspacemgr scans the address space
2276       from low addresses to find a free segment. No special effort is done
2277       to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
2278       fail once the program has already allocated significant memory. */
2279    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
2280       if (advised + arg2 >= 0x80000000)
2281          return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2282    }
2283 #  endif
2284 
2285    /* Otherwise we're OK (so far).  Install aspacem's choice of
2286       address, and let the mmap go through.  */
2287    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2288                                     arg4 | VKI_MAP_FIXED,
2289                                     arg5, arg6);
2290 
2291 #  if defined(VKI_MAP_32BIT)
2292    /* No recovery trial if the advisory was not accepted. */
2293    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
2294        && sr_isError(sres)) {
2295       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2296    }
2297 #  endif
2298 
2299    /* A refinement: it may be that the kernel refused aspacem's choice
2300       of address.  If we were originally asked for a hinted mapping,
2301       there is still a last chance: try again at any address.
2302       Hence: */
2303    if (mreq.rkind == MHint && sr_isError(sres)) {
2304       mreq.start = 0;
2305       mreq.len   = arg2;
2306       mreq.rkind = MAny;
2307       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2308       if (!mreq_ok) {
2309          /* Our request was bounced, so we'd better fail. */
2310          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2311       }
2312       /* and try again with the kernel */
2313       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2314                                        arg4 | VKI_MAP_FIXED,
2315                                        arg5, arg6);
2316    }
2317 
2318    /* Yet another refinement : sometimes valgrind chooses an address
2319       which is not acceptable by the kernel. This at least happens
2320       when mmap-ing huge pages, using the flag MAP_HUGETLB.
2321       valgrind aspacem does not know about huge pages, and modifying
2322       it to handle huge pages is not straightforward (e.g. need
2323       to understand special file system mount options).
2324       So, let's just redo an mmap, without giving any constraint to
2325       the kernel. If that succeeds, check with aspacem that the returned
2326       address is acceptable.
2327       This will give a similar effect as if the user would have
2328       hinted that address.
2329       The aspacem state will be correctly updated afterwards.
2330       We however cannot do this last refinement when the user asked
2331       for a fixed mapping, as the user asked a specific address. */
2332    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
2333       advised = 0;
2334       /* try mmap with NULL address and without VKI_MAP_FIXED
2335          to let the kernel decide. */
2336       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2337                                        arg4,
2338                                        arg5, arg6);
2339       if (!sr_isError(sres)) {
2340          /* The kernel is supposed to know what it is doing, but let's
2341             do a last sanity check anyway, as if the chosen address had
2342             been initially hinted by the client. The whole point of this
2343             last try was to allow mmap of huge pages to succeed without
2344             making aspacem understand them, on the other hand the kernel
2345             does not know about valgrind reservations, so this mapping
2346             can end up in free space and reservations. */
2347          mreq.start = (Addr)sr_Res(sres);
2348          mreq.len   = arg2;
2349          mreq.rkind = MHint;
2350          advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2351          vg_assert(mreq_ok && advised == mreq.start);
2352       }
2353    }
2354 
2355    if (!sr_isError(sres)) {
2356       ULong di_handle;
2357       /* Notify aspacem. */
2358       notify_core_of_mmap(
2359          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2360          arg2, /* length */
2361          arg3, /* prot */
2362          arg4, /* the original flags value */
2363          arg5, /* fd */
2364          arg6  /* offset */
2365       );
2366       /* Load symbols? */
2367       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2368                                        False/*allow_SkFileV*/, (Int)arg5 );
2369       /* Notify the tool. */
2370       notify_tool_of_mmap(
2371          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2372          arg2, /* length */
2373          arg3, /* prot */
2374          di_handle /* so the tool can refer to the read debuginfo later,
2375                       if it wants. */
2376       );
2377    }
2378 
2379    /* Stay sane */
2380    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2381       vg_assert(sr_Res(sres) == arg1);
2382 
2383    return sres;
2384 }
2385 
2386 
2387 /* ---------------------------------------------------------------------
2388    The Main Entertainment ... syscall wrappers
2389    ------------------------------------------------------------------ */
2390 
2391 /* Note: the PRE() and POST() wrappers are for the actual functions
2392    implementing the system calls in the OS kernel.  These mostly have
2393    names like sys_write();  a few have names like old_mmap().  See the
2394    comment for ML_(syscall_table)[] for important info about the __NR_foo
2395    constants and their relationship to the sys_foo() functions.
2396 
2397    Some notes about names used for syscalls and args:
2398    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2399      ambiguity.
2400 
2401    - For error messages, we generally use a somewhat generic name
2402      for the syscall (eg. "write" rather than "sys_write").  This should be
2403      good enough for the average user to understand what is happening,
2404      without confusing them with names like "sys_write".
2405 
2406    - Also, for error messages the arg names are mostly taken from the man
2407      pages (even though many of those man pages are really for glibc
2408      functions of the same name), rather than from the OS kernel source,
2409      for the same reason -- a user presented with a "bogus foo(bar)" arg
2410      will most likely look at the "foo" man page to see which is the "bar"
2411      arg.
2412 
2413    Note that we use our own vki_* types.  The one exception is in
2414    PRE_REG_READn calls, where pointer types haven't been changed, because
2415    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2416    be visible.
2417 
2418    XXX: some of these are arch-specific, and should be factored out.
2419 */
2420 
2421 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2422 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2423 
2424 // Macros to support 64-bit syscall args split into two 32 bit values
2425 #if defined(VG_LITTLEENDIAN)
2426 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2427 #define MERGE64_FIRST(name) name##_low
2428 #define MERGE64_SECOND(name) name##_high
2429 #elif defined(VG_BIGENDIAN)
2430 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2431 #define MERGE64_FIRST(name) name##_high
2432 #define MERGE64_SECOND(name) name##_low
2433 #else
2434 #error Unknown endianness
2435 #endif
2436 
PRE(sys_exit)2437 PRE(sys_exit)
2438 {
2439    ThreadState* tst;
2440    /* simple; just make this thread exit */
2441    PRINT("exit( %ld )", SARG1);
2442    PRE_REG_READ1(void, "exit", int, status);
2443    tst = VG_(get_ThreadState)(tid);
2444    /* Set the thread's status to be exiting, then claim that the
2445       syscall succeeded. */
2446    tst->exitreason = VgSrc_ExitThread;
2447    tst->os_state.exitcode = ARG1;
2448    SET_STATUS_Success(0);
2449 }
2450 
PRE(sys_ni_syscall)2451 PRE(sys_ni_syscall)
2452 {
2453    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2454       VG_SYSNUM_STRING(SYSNO));
2455    PRE_REG_READ0(long, "ni_syscall");
2456    SET_STATUS_Failure( VKI_ENOSYS );
2457 }
2458 
PRE(sys_iopl)2459 PRE(sys_iopl)
2460 {
2461    PRINT("sys_iopl ( %lu )", ARG1);
2462    PRE_REG_READ1(long, "iopl", unsigned long, level);
2463 }
2464 
PRE(sys_fsync)2465 PRE(sys_fsync)
2466 {
2467    *flags |= SfMayBlock;
2468    PRINT("sys_fsync ( %lu )", ARG1);
2469    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2470 }
2471 
PRE(sys_fdatasync)2472 PRE(sys_fdatasync)
2473 {
2474    *flags |= SfMayBlock;
2475    PRINT("sys_fdatasync ( %lu )", ARG1);
2476    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2477 }
2478 
PRE(sys_msync)2479 PRE(sys_msync)
2480 {
2481    *flags |= SfMayBlock;
2482    PRINT("sys_msync ( %#lx, %lu, %#lx )", ARG1, ARG2, ARG3);
2483    PRE_REG_READ3(long, "msync",
2484                  unsigned long, start, vki_size_t, length, int, flags);
2485    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2486 }
2487 
2488 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2489 // versions of LiS (Linux Streams).  They are not part of the kernel.
2490 // Therefore, we have to provide this type ourself, rather than getting it
2491 // from the kernel sources.
2492 struct vki_pmsg_strbuf {
2493    int     maxlen;         /* no. of bytes in buffer */
2494    int     len;            /* no. of bytes returned */
2495    vki_caddr_t buf;        /* pointer to data */
2496 };
PRE(sys_getpmsg)2497 PRE(sys_getpmsg)
2498 {
2499    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2500    struct vki_pmsg_strbuf *ctrl;
2501    struct vki_pmsg_strbuf *data;
2502    *flags |= SfMayBlock;
2503    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
2504          ARG4, ARG5);
2505    PRE_REG_READ5(int, "getpmsg",
2506                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2507                  int *, bandp, int *, flagsp);
2508    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2509    data = (struct vki_pmsg_strbuf *)ARG3;
2510    if (ctrl && ctrl->maxlen > 0)
2511       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2512    if (data && data->maxlen > 0)
2513       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2514    if (ARG4)
2515       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2516    if (ARG5)
2517       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2518 }
POST(sys_getpmsg)2519 POST(sys_getpmsg)
2520 {
2521    struct vki_pmsg_strbuf *ctrl;
2522    struct vki_pmsg_strbuf *data;
2523    vg_assert(SUCCESS);
2524    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2525    data = (struct vki_pmsg_strbuf *)ARG3;
2526    if (RES == 0 && ctrl && ctrl->len > 0) {
2527       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2528    }
2529    if (RES == 0 && data && data->len > 0) {
2530       POST_MEM_WRITE( (Addr)data->buf, data->len);
2531    }
2532 }
2533 
PRE(sys_putpmsg)2534 PRE(sys_putpmsg)
2535 {
2536    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2537    struct vki_pmsg_strbuf *ctrl;
2538    struct vki_pmsg_strbuf *data;
2539    *flags |= SfMayBlock;
2540    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", SARG1, ARG2, ARG3,
2541          SARG4, SARG5);
2542    PRE_REG_READ5(int, "putpmsg",
2543                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2544                  int, band, int, flags);
2545    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2546    data = (struct vki_pmsg_strbuf *)ARG3;
2547    if (ctrl && ctrl->len > 0)
2548       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2549    if (data && data->len > 0)
2550       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2551 }
2552 
PRE(sys_getitimer)2553 PRE(sys_getitimer)
2554 {
2555    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2556    PRINT("sys_getitimer ( %ld, %#lx )", SARG1, ARG2);
2557    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2558 
2559    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2560    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2561 }
2562 
POST(sys_getitimer)2563 POST(sys_getitimer)
2564 {
2565    if (ARG2 != (Addr)NULL) {
2566       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2567       POST_timeval_WRITE( &(value->it_interval) );
2568       POST_timeval_WRITE( &(value->it_value) );
2569    }
2570 }
2571 
PRE(sys_setitimer)2572 PRE(sys_setitimer)
2573 {
2574    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", SARG1, ARG2, ARG3);
2575    PRE_REG_READ3(long, "setitimer",
2576                  int, which,
2577                  struct itimerval *, value, struct itimerval *, ovalue);
2578    if (ARG2 != (Addr)NULL) {
2579       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2580       PRE_timeval_READ( "setitimer(&value->it_interval)",
2581                          &(value->it_interval));
2582       PRE_timeval_READ( "setitimer(&value->it_value)",
2583                          &(value->it_value));
2584    }
2585    if (ARG3 != (Addr)NULL) {
2586       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2587       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2588                          &(ovalue->it_interval));
2589       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2590                          &(ovalue->it_value));
2591    }
2592 }
2593 
POST(sys_setitimer)2594 POST(sys_setitimer)
2595 {
2596    if (ARG3 != (Addr)NULL) {
2597       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2598       POST_timeval_WRITE( &(ovalue->it_interval) );
2599       POST_timeval_WRITE( &(ovalue->it_value) );
2600    }
2601 }
2602 
PRE(sys_chroot)2603 PRE(sys_chroot)
2604 {
2605    PRINT("sys_chroot ( %#lx )", ARG1);
2606    PRE_REG_READ1(long, "chroot", const char *, path);
2607    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2608 }
2609 
PRE(sys_madvise)2610 PRE(sys_madvise)
2611 {
2612    *flags |= SfMayBlock;
2613    PRINT("sys_madvise ( %#lx, %lu, %ld )", ARG1, ARG2, SARG3);
2614    PRE_REG_READ3(long, "madvise",
2615                  unsigned long, start, vki_size_t, length, int, advice);
2616 }
2617 
2618 #if HAVE_MREMAP
PRE(sys_mremap)2619 PRE(sys_mremap)
2620 {
2621    // Nb: this is different to the glibc version described in the man pages,
2622    // which lacks the fifth 'new_address' argument.
2623    if (ARG4 & VKI_MREMAP_FIXED) {
2624       PRINT("sys_mremap ( %#lx, %lu, %lu, %#lx, %#lx )",
2625             ARG1, ARG2, ARG3, ARG4, ARG5);
2626       PRE_REG_READ5(unsigned long, "mremap",
2627                     unsigned long, old_addr, unsigned long, old_size,
2628                     unsigned long, new_size, unsigned long, flags,
2629                     unsigned long, new_addr);
2630    } else {
2631       PRINT("sys_mremap ( %#lx, %lu, %lu, 0x%lx )",
2632             ARG1, ARG2, ARG3, ARG4);
2633       PRE_REG_READ4(unsigned long, "mremap",
2634                     unsigned long, old_addr, unsigned long, old_size,
2635                     unsigned long, new_size, unsigned long, flags);
2636    }
2637    SET_STATUS_from_SysRes(
2638       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2639    );
2640 }
2641 #endif /* HAVE_MREMAP */
2642 
PRE(sys_nice)2643 PRE(sys_nice)
2644 {
2645    PRINT("sys_nice ( %ld )", SARG1);
2646    PRE_REG_READ1(long, "nice", int, inc);
2647 }
2648 
PRE(sys_mlock)2649 PRE(sys_mlock)
2650 {
2651    *flags |= SfMayBlock;
2652    PRINT("sys_mlock ( %#lx, %lu )", ARG1, ARG2);
2653    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2654 }
2655 
PRE(sys_munlock)2656 PRE(sys_munlock)
2657 {
2658    *flags |= SfMayBlock;
2659    PRINT("sys_munlock ( %#lx, %lu )", ARG1, ARG2);
2660    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2661 }
2662 
PRE(sys_mlockall)2663 PRE(sys_mlockall)
2664 {
2665    *flags |= SfMayBlock;
2666    PRINT("sys_mlockall ( %lx )", ARG1);
2667    PRE_REG_READ1(long, "mlockall", int, flags);
2668 }
2669 
PRE(sys_setpriority)2670 PRE(sys_setpriority)
2671 {
2672    PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
2673    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2674 }
2675 
PRE(sys_getpriority)2676 PRE(sys_getpriority)
2677 {
2678    PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
2679    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2680 }
2681 
PRE(sys_pwrite64)2682 PRE(sys_pwrite64)
2683 {
2684    *flags |= SfMayBlock;
2685 #if VG_WORDSIZE == 4
2686    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %lld )",
2687          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2688    PRE_REG_READ5(ssize_t, "pwrite64",
2689                  unsigned int, fd, const char *, buf, vki_size_t, count,
2690                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2691 #elif VG_WORDSIZE == 8
2692    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
2693          ARG1, ARG2, ARG3, SARG4);
2694    PRE_REG_READ4(ssize_t, "pwrite64",
2695                  unsigned int, fd, const char *, buf, vki_size_t, count,
2696                  Word, offset);
2697 #else
2698 #  error Unexpected word size
2699 #endif
2700    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2701 }
2702 
PRE(sys_sync)2703 PRE(sys_sync)
2704 {
2705    *flags |= SfMayBlock;
2706    PRINT("sys_sync ( )");
2707    PRE_REG_READ0(long, "sync");
2708 }
2709 
PRE(sys_fstatfs)2710 PRE(sys_fstatfs)
2711 {
2712    FUSE_COMPATIBLE_MAY_BLOCK();
2713    PRINT("sys_fstatfs ( %lu, %#lx )", ARG1, ARG2);
2714    PRE_REG_READ2(long, "fstatfs",
2715                  unsigned int, fd, struct statfs *, buf);
2716    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2717 }
2718 
POST(sys_fstatfs)2719 POST(sys_fstatfs)
2720 {
2721    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2722 }
2723 
PRE(sys_fstatfs64)2724 PRE(sys_fstatfs64)
2725 {
2726    FUSE_COMPATIBLE_MAY_BLOCK();
2727    PRINT("sys_fstatfs64 ( %lu, %lu, %#lx )", ARG1, ARG2, ARG3);
2728    PRE_REG_READ3(long, "fstatfs64",
2729                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2730    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2731 }
POST(sys_fstatfs64)2732 POST(sys_fstatfs64)
2733 {
2734    POST_MEM_WRITE( ARG3, ARG2 );
2735 }
2736 
PRE(sys_getsid)2737 PRE(sys_getsid)
2738 {
2739    PRINT("sys_getsid ( %ld )", SARG1);
2740    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2741 }
2742 
PRE(sys_pread64)2743 PRE(sys_pread64)
2744 {
2745    *flags |= SfMayBlock;
2746 #if VG_WORDSIZE == 4
2747    PRINT("sys_pread64 ( %lu, %#lx, %lu, %lld )",
2748          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2749    PRE_REG_READ5(ssize_t, "pread64",
2750                  unsigned int, fd, char *, buf, vki_size_t, count,
2751                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2752 #elif VG_WORDSIZE == 8
2753    PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
2754          ARG1, ARG2, ARG3, SARG4);
2755    PRE_REG_READ4(ssize_t, "pread64",
2756                  unsigned int, fd, char *, buf, vki_size_t, count,
2757                  Word, offset);
2758 #else
2759 #  error Unexpected word size
2760 #endif
2761    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2762 }
POST(sys_pread64)2763 POST(sys_pread64)
2764 {
2765    vg_assert(SUCCESS);
2766    if (RES > 0) {
2767       POST_MEM_WRITE( ARG2, RES );
2768    }
2769 }
2770 
PRE(sys_mknod)2771 PRE(sys_mknod)
2772 {
2773    FUSE_COMPATIBLE_MAY_BLOCK();
2774    PRINT("sys_mknod ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3 );
2775    PRE_REG_READ3(long, "mknod",
2776                  const char *, pathname, int, mode, unsigned, dev);
2777    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2778 }
2779 
PRE(sys_flock)2780 PRE(sys_flock)
2781 {
2782    *flags |= SfMayBlock;
2783    PRINT("sys_flock ( %lu, %lu )", ARG1, ARG2 );
2784    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2785 }
2786 
2787 // Pre_read a char** argument.
ML_(pre_argv_envp)2788 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
2789 {
2790    while (True) {
2791       Addr a_deref;
2792       Addr* a_p = (Addr*)a;
2793       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2794       a_deref = *a_p;
2795       if (0 == a_deref)
2796          break;
2797       PRE_MEM_RASCIIZ( s2, a_deref );
2798       a += sizeof(char*);
2799    }
2800 }
2801 
i_am_the_only_thread(void)2802 static Bool i_am_the_only_thread ( void )
2803 {
2804    Int c = VG_(count_living_threads)();
2805    vg_assert(c >= 1); /* stay sane */
2806    return c == 1;
2807 }
2808 
2809 /* Wait until all other threads disappear. */
VG_(reap_threads)2810 void VG_(reap_threads)(ThreadId self)
2811 {
2812    while (!i_am_the_only_thread()) {
2813       /* Let other thread(s) run */
2814       VG_(vg_yield)();
2815       VG_(poll_signals)(self);
2816    }
2817    vg_assert(i_am_the_only_thread());
2818 }
2819 
2820 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2821 // but it seems to work nonetheless...
PRE(sys_execve)2822 PRE(sys_execve)
2823 {
2824    HChar*       path = NULL;       /* path to executable */
2825    HChar**      envp = NULL;
2826    HChar**      argv = NULL;
2827    HChar**      arg2copy;
2828    HChar*       launcher_basename = NULL;
2829    ThreadState* tst;
2830    Int          i, j, tot_args;
2831    SysRes       res;
2832    Bool         setuid_allowed, trace_this_child;
2833 
2834    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2835    PRE_REG_READ3(vki_off_t, "execve",
2836                  char *, filename, char **, argv, char **, envp);
2837    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2838    if (ARG2 != 0)
2839       ML_(pre_argv_envp)( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2840    if (ARG3 != 0)
2841       ML_(pre_argv_envp)( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2842 
2843    vg_assert(VG_(is_valid_tid)(tid));
2844    tst = VG_(get_ThreadState)(tid);
2845 
2846    /* Erk.  If the exec fails, then the following will have made a
2847       mess of things which makes it hard for us to continue.  The
2848       right thing to do is piece everything together again in
2849       POST(execve), but that's close to impossible.  Instead, we make
2850       an effort to check that the execve will work before actually
2851       doing it. */
2852 
2853    /* Check that the name at least begins in client-accessible storage. */
2854    if (ARG1 == 0 /* obviously bogus */
2855        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2856       SET_STATUS_Failure( VKI_EFAULT );
2857       return;
2858    }
2859 
2860    // debug-only printing
2861    if (0) {
2862       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2863       if (ARG2) {
2864          VG_(printf)("ARG2 = ");
2865          Int q;
2866          HChar** vec = (HChar**)ARG2;
2867          for (q = 0; vec[q]; q++)
2868             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2869          VG_(printf)("\n");
2870       } else {
2871          VG_(printf)("ARG2 = null\n");
2872       }
2873    }
2874 
2875    // Decide whether or not we want to follow along
2876    { // Make 'child_argv' be a pointer to the child's arg vector
2877      // (skipping the exe name)
2878      const HChar** child_argv = (const HChar**)ARG2;
2879      if (child_argv && child_argv[0] == NULL)
2880         child_argv = NULL;
2881      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2882    }
2883 
2884    // Do the important checks:  it is a file, is executable, permissions are
2885    // ok, etc.  We allow setuid executables to run only in the case when
2886    // we are not simulating them, that is, they to be run natively.
2887    setuid_allowed = trace_this_child  ? False  : True;
2888    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
2889    if (sr_isError(res)) {
2890       SET_STATUS_Failure( sr_Err(res) );
2891       return;
2892    }
2893 
2894    /* If we're tracing the child, and the launcher name looks bogus
2895       (possibly because launcher.c couldn't figure it out, see
2896       comments therein) then we have no option but to fail. */
2897    if (trace_this_child
2898        && (VG_(name_of_launcher) == NULL
2899            || VG_(name_of_launcher)[0] != '/')) {
2900       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2901       return;
2902    }
2903 
2904    /* After this point, we can't recover if the execve fails. */
2905    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
2906 
2907 
2908    // Terminate gdbserver if it is active.
2909    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2910       // If the child will not be traced, we need to terminate gdbserver
2911       // to cleanup the gdbserver resources (e.g. the FIFO files).
2912       // If child will be traced, we also terminate gdbserver: the new
2913       // Valgrind will start a fresh gdbserver after exec.
2914       VG_(gdbserver) (0);
2915    }
2916 
2917    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2918       this. (Really, nuke them all, since the new process will make
2919       its own new thread.) */
2920    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2921    VG_(reap_threads)(tid);
2922 
2923    // Set up the child's exe path.
2924    //
2925    if (trace_this_child) {
2926 
2927       // We want to exec the launcher.  Get its pre-remembered path.
2928       path = VG_(name_of_launcher);
2929       // VG_(name_of_launcher) should have been acquired by m_main at
2930       // startup.
2931       vg_assert(path);
2932 
2933       launcher_basename = VG_(strrchr)(path, '/');
2934       if (launcher_basename == NULL || launcher_basename[1] == 0) {
2935          launcher_basename = path;  // hmm, tres dubious
2936       } else {
2937          launcher_basename++;
2938       }
2939 
2940    } else {
2941       path = (HChar*)ARG1;
2942    }
2943 
2944    // Set up the child's environment.
2945    //
2946    // Remove the valgrind-specific stuff from the environment so the
2947    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2948    // This is done unconditionally, since if we are tracing the child,
2949    // the child valgrind will set up the appropriate client environment.
2950    // Nb: we make a copy of the environment before trying to mangle it
2951    // as it might be in read-only memory (this was bug #101881).
2952    //
2953    // Then, if tracing the child, set VALGRIND_LIB for it.
2954    //
2955    if (ARG3 == 0) {
2956       envp = NULL;
2957    } else {
2958       envp = VG_(env_clone)( (HChar**)ARG3 );
2959       if (envp == NULL) goto hosed;
2960       VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
2961    }
2962 
2963    if (trace_this_child) {
2964       // Set VALGRIND_LIB in ARG3 (the environment)
2965       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2966    }
2967 
2968    // Set up the child's args.  If not tracing it, they are
2969    // simply ARG2.  Otherwise, they are
2970    //
2971    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2972    //
2973    // except that the first VG_(args_for_valgrind_noexecpass) args
2974    // are omitted.
2975    //
2976    if (!trace_this_child) {
2977       argv = (HChar**)ARG2;
2978    } else {
2979       vg_assert( VG_(args_for_valgrind) );
2980       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2981       vg_assert( VG_(args_for_valgrind_noexecpass)
2982                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2983       /* how many args in total will there be? */
2984       // launcher basename
2985       tot_args = 1;
2986       // V's args
2987       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2988       tot_args -= VG_(args_for_valgrind_noexecpass);
2989       // name of client exe
2990       tot_args++;
2991       // args for client exe, skipping [0]
2992       arg2copy = (HChar**)ARG2;
2993       if (arg2copy && arg2copy[0]) {
2994          for (i = 1; arg2copy[i]; i++)
2995             tot_args++;
2996       }
2997       // allocate
2998       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2999                           (tot_args+1) * sizeof(HChar*) );
3000       // copy
3001       j = 0;
3002       argv[j++] = launcher_basename;
3003       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
3004          if (i < VG_(args_for_valgrind_noexecpass))
3005             continue;
3006          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
3007       }
3008       argv[j++] = (HChar*)ARG1;
3009       if (arg2copy && arg2copy[0])
3010          for (i = 1; arg2copy[i]; i++)
3011             argv[j++] = arg2copy[i];
3012       argv[j++] = NULL;
3013       // check
3014       vg_assert(j == tot_args+1);
3015    }
3016 
3017    /* restore the DATA rlimit for the child */
3018    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
3019 
3020    /*
3021       Set the signal state up for exec.
3022 
3023       We need to set the real signal state to make sure the exec'd
3024       process gets SIG_IGN properly.
3025 
3026       Also set our real sigmask to match the client's sigmask so that
3027       the exec'd child will get the right mask.  First we need to
3028       clear out any pending signals so they they don't get delivered,
3029       which would confuse things.
3030 
3031       XXX This is a bug - the signals should remain pending, and be
3032       delivered to the new process after exec.  There's also a
3033       race-condition, since if someone delivers us a signal between
3034       the sigprocmask and the execve, we'll still get the signal. Oh
3035       well.
3036    */
3037    {
3038       vki_sigset_t allsigs;
3039       vki_siginfo_t info;
3040 
3041       /* What this loop does: it queries SCSS (the signal state that
3042          the client _thinks_ the kernel is in) by calling
3043          VG_(do_sys_sigaction), and modifies the real kernel signal
3044          state accordingly. */
3045       for (i = 1; i < VG_(max_signal); i++) {
3046          vki_sigaction_fromK_t sa_f;
3047          vki_sigaction_toK_t   sa_t;
3048          VG_(do_sys_sigaction)(i, NULL, &sa_f);
3049          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
3050          if (sa_t.ksa_handler == VKI_SIG_IGN)
3051             VG_(sigaction)(i, &sa_t, NULL);
3052          else {
3053             sa_t.ksa_handler = VKI_SIG_DFL;
3054             VG_(sigaction)(i, &sa_t, NULL);
3055          }
3056       }
3057 
3058       VG_(sigfillset)(&allsigs);
3059       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
3060          ;
3061 
3062       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
3063    }
3064 
3065    if (0) {
3066       HChar **cpp;
3067       VG_(printf)("exec: %s\n", path);
3068       for (cpp = argv; cpp && *cpp; cpp++)
3069          VG_(printf)("argv: %s\n", *cpp);
3070       if (0)
3071          for (cpp = envp; cpp && *cpp; cpp++)
3072             VG_(printf)("env: %s\n", *cpp);
3073    }
3074 
3075    SET_STATUS_from_SysRes(
3076       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
3077    );
3078 
3079    /* If we got here, then the execve failed.  We've already made way
3080       too much of a mess to continue, so we have to abort. */
3081   hosed:
3082    vg_assert(FAILURE);
3083    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %lu\n",
3084                 ARG1, (HChar*)ARG1, ARG2, ARG3, ERR);
3085    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
3086                             "execve() failing, so I'm dying.\n");
3087    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
3088                             "or work out how to recover.\n");
3089    VG_(exit)(101);
3090 }
3091 
PRE(sys_access)3092 PRE(sys_access)
3093 {
3094    PRINT("sys_access ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
3095    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
3096    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
3097 }
3098 
PRE(sys_alarm)3099 PRE(sys_alarm)
3100 {
3101    PRINT("sys_alarm ( %lu )", ARG1);
3102    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
3103 }
3104 
PRE(sys_brk)3105 PRE(sys_brk)
3106 {
3107    Addr brk_limit = VG_(brk_limit);
3108    Addr brk_new;
3109 
3110    /* libc   says: int   brk(void *end_data_segment);
3111       kernel says: void* brk(void* end_data_segment);  (more or less)
3112 
3113       libc returns 0 on success, and -1 (and sets errno) on failure.
3114       Nb: if you ask to shrink the dataseg end below what it
3115       currently is, that always succeeds, even if the dataseg end
3116       doesn't actually change (eg. brk(0)).  Unless it seg faults.
3117 
3118       Kernel returns the new dataseg end.  If the brk() failed, this
3119       will be unchanged from the old one.  That's why calling (kernel)
3120       brk(0) gives the current dataseg end (libc brk() just returns
3121       zero in that case).
3122 
3123       Both will seg fault if you shrink it back into a text segment.
3124    */
3125    PRINT("sys_brk ( %#lx )", ARG1);
3126    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
3127 
3128    brk_new = do_brk(ARG1, tid);
3129    SET_STATUS_Success( brk_new );
3130 
3131    if (brk_new == ARG1) {
3132       /* brk() succeeded */
3133       if (brk_new < brk_limit) {
3134          /* successfully shrunk the data segment. */
3135          VG_TRACK( die_mem_brk, (Addr)ARG1,
3136 		   brk_limit-ARG1 );
3137       } else
3138       if (brk_new > brk_limit) {
3139          /* successfully grew the data segment */
3140          VG_TRACK( new_mem_brk, brk_limit,
3141                    ARG1-brk_limit, tid );
3142       }
3143    } else {
3144       /* brk() failed */
3145       vg_assert(brk_limit == brk_new);
3146    }
3147 }
3148 
PRE(sys_chdir)3149 PRE(sys_chdir)
3150 {
3151    FUSE_COMPATIBLE_MAY_BLOCK();
3152    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3153    PRE_REG_READ1(long, "chdir", const char *, path);
3154    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
3155 }
3156 
PRE(sys_chmod)3157 PRE(sys_chmod)
3158 {
3159    FUSE_COMPATIBLE_MAY_BLOCK();
3160    PRINT("sys_chmod ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
3161    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
3162    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
3163 }
3164 
PRE(sys_chown)3165 PRE(sys_chown)
3166 {
3167    FUSE_COMPATIBLE_MAY_BLOCK();
3168    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3169    PRE_REG_READ3(long, "chown",
3170                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3171    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
3172 }
3173 
PRE(sys_lchown)3174 PRE(sys_lchown)
3175 {
3176    FUSE_COMPATIBLE_MAY_BLOCK();
3177    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3178    PRE_REG_READ3(long, "lchown",
3179                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3180    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
3181 }
3182 
PRE(sys_close)3183 PRE(sys_close)
3184 {
3185    FUSE_COMPATIBLE_MAY_BLOCK();
3186    PRINT("sys_close ( %lu )", ARG1);
3187    PRE_REG_READ1(long, "close", unsigned int, fd);
3188 
3189    /* Detect and negate attempts by the client to close Valgrind's log fd */
3190    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
3191         /* If doing -d style logging (which is to fd=2), don't
3192            allow that to be closed either. */
3193         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3194       SET_STATUS_Failure( VKI_EBADF );
3195 }
3196 
POST(sys_close)3197 POST(sys_close)
3198 {
3199    if (VG_(clo_track_fds)) ML_(record_fd_close)(ARG1);
3200 }
3201 
PRE(sys_dup)3202 PRE(sys_dup)
3203 {
3204    PRINT("sys_dup ( %lu )", ARG1);
3205    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3206 }
3207 
POST(sys_dup)3208 POST(sys_dup)
3209 {
3210    vg_assert(SUCCESS);
3211    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3212       VG_(close)(RES);
3213       SET_STATUS_Failure( VKI_EMFILE );
3214    } else {
3215       if (VG_(clo_track_fds))
3216          ML_(record_fd_open_named)(tid, RES);
3217    }
3218 }
3219 
PRE(sys_dup2)3220 PRE(sys_dup2)
3221 {
3222    PRINT("sys_dup2 ( %lu, %lu )", ARG1, ARG2);
3223    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3224    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3225       SET_STATUS_Failure( VKI_EBADF );
3226 }
3227 
POST(sys_dup2)3228 POST(sys_dup2)
3229 {
3230    vg_assert(SUCCESS);
3231    if (VG_(clo_track_fds))
3232       ML_(record_fd_open_named)(tid, RES);
3233 }
3234 
PRE(sys_fchdir)3235 PRE(sys_fchdir)
3236 {
3237    FUSE_COMPATIBLE_MAY_BLOCK();
3238    PRINT("sys_fchdir ( %lu )", ARG1);
3239    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3240 }
3241 
PRE(sys_fchown)3242 PRE(sys_fchown)
3243 {
3244    FUSE_COMPATIBLE_MAY_BLOCK();
3245    PRINT("sys_fchown ( %lu, %lu, %lu )", ARG1, ARG2, ARG3);
3246    PRE_REG_READ3(long, "fchown",
3247                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3248 }
3249 
PRE(sys_fchmod)3250 PRE(sys_fchmod)
3251 {
3252    FUSE_COMPATIBLE_MAY_BLOCK();
3253    PRINT("sys_fchmod ( %lu, %lu )", ARG1, ARG2);
3254    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3255 }
3256 
PRE(sys_newfstat)3257 PRE(sys_newfstat)
3258 {
3259    FUSE_COMPATIBLE_MAY_BLOCK();
3260    PRINT("sys_newfstat ( %lu, %#lx )", ARG1, ARG2);
3261    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3262    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3263 }
3264 
POST(sys_newfstat)3265 POST(sys_newfstat)
3266 {
3267    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3268 }
3269 
3270 #if !defined(VGO_solaris)
3271 static vki_sigset_t fork_saved_mask;
3272 
3273 // In Linux, the sys_fork() function varies across architectures, but we
3274 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)3275 PRE(sys_fork)
3276 {
3277    Bool is_child;
3278    Int child_pid;
3279    vki_sigset_t mask;
3280 
3281    PRINT("sys_fork ( )");
3282    PRE_REG_READ0(long, "fork");
3283 
3284    /* Block all signals during fork, so that we can fix things up in
3285       the child without being interrupted. */
3286    VG_(sigfillset)(&mask);
3287    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3288 
3289    VG_(do_atfork_pre)(tid);
3290 
3291    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3292 
3293    if (!SUCCESS) return;
3294 
3295 #if defined(VGO_linux)
3296    // RES is 0 for child, non-0 (the child's PID) for parent.
3297    is_child = ( RES == 0 ? True : False );
3298    child_pid = ( is_child ? -1 : RES );
3299 #elif defined(VGO_darwin)
3300    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3301    is_child = RESHI;
3302    child_pid = RES;
3303 #else
3304 #  error Unknown OS
3305 #endif
3306 
3307    if (is_child) {
3308       VG_(do_atfork_child)(tid);
3309 
3310       /* restore signal mask */
3311       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3312 
3313       /* If --child-silent-after-fork=yes was specified, set the
3314          output file descriptors to 'impossible' values.  This is
3315          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3316          duly stops writing any further output. */
3317       if (VG_(clo_child_silent_after_fork)) {
3318          if (!VG_(log_output_sink).is_socket)
3319             VG_(log_output_sink).fd = -1;
3320          if (!VG_(xml_output_sink).is_socket)
3321             VG_(xml_output_sink).fd = -1;
3322       }
3323 
3324    } else {
3325       VG_(do_atfork_parent)(tid);
3326 
3327       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3328 
3329       /* restore signal mask */
3330       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3331    }
3332 }
3333 #endif // !defined(VGO_solaris)
3334 
PRE(sys_ftruncate)3335 PRE(sys_ftruncate)
3336 {
3337    *flags |= SfMayBlock;
3338    PRINT("sys_ftruncate ( %lu, %lu )", ARG1, ARG2);
3339    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3340 }
3341 
PRE(sys_truncate)3342 PRE(sys_truncate)
3343 {
3344    *flags |= SfMayBlock;
3345    PRINT("sys_truncate ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
3346    PRE_REG_READ2(long, "truncate",
3347                  const char *, path, unsigned long, length);
3348    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3349 }
3350 
PRE(sys_ftruncate64)3351 PRE(sys_ftruncate64)
3352 {
3353    *flags |= SfMayBlock;
3354 #if VG_WORDSIZE == 4
3355    PRINT("sys_ftruncate64 ( %lu, %llu )", ARG1, MERGE64(ARG2,ARG3));
3356    PRE_REG_READ3(long, "ftruncate64",
3357                  unsigned int, fd,
3358                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3359 #else
3360    PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
3361    PRE_REG_READ2(long, "ftruncate64",
3362                  unsigned int,fd, UWord,length);
3363 #endif
3364 }
3365 
PRE(sys_truncate64)3366 PRE(sys_truncate64)
3367 {
3368    *flags |= SfMayBlock;
3369 #if VG_WORDSIZE == 4
3370    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3371    PRE_REG_READ3(long, "truncate64",
3372                  const char *, path,
3373                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3374 #else
3375    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3376    PRE_REG_READ2(long, "truncate64",
3377                  const char *,path, UWord,length);
3378 #endif
3379    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3380 }
3381 
PRE(sys_getdents)3382 PRE(sys_getdents)
3383 {
3384    *flags |= SfMayBlock;
3385    PRINT("sys_getdents ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
3386    PRE_REG_READ3(long, "getdents",
3387                  unsigned int, fd, struct vki_dirent *, dirp,
3388                  unsigned int, count);
3389    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3390 }
3391 
POST(sys_getdents)3392 POST(sys_getdents)
3393 {
3394    vg_assert(SUCCESS);
3395    if (RES > 0)
3396       POST_MEM_WRITE( ARG2, RES );
3397 }
3398 
PRE(sys_getdents64)3399 PRE(sys_getdents64)
3400 {
3401    *flags |= SfMayBlock;
3402    PRINT("sys_getdents64 ( %lu, %#lx, %lu )",ARG1, ARG2, ARG3);
3403    PRE_REG_READ3(long, "getdents64",
3404                  unsigned int, fd, struct vki_dirent64 *, dirp,
3405                  unsigned int, count);
3406    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3407 }
3408 
POST(sys_getdents64)3409 POST(sys_getdents64)
3410 {
3411    vg_assert(SUCCESS);
3412    if (RES > 0)
3413       POST_MEM_WRITE( ARG2, RES );
3414 }
3415 
PRE(sys_getgroups)3416 PRE(sys_getgroups)
3417 {
3418    PRINT("sys_getgroups ( %ld, %#lx )", SARG1, ARG2);
3419    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3420    if (ARG1 > 0)
3421       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3422 }
3423 
POST(sys_getgroups)3424 POST(sys_getgroups)
3425 {
3426    vg_assert(SUCCESS);
3427    if (ARG1 > 0 && RES > 0)
3428       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3429 }
3430 
PRE(sys_getcwd)3431 PRE(sys_getcwd)
3432 {
3433    // Comment from linux/fs/dcache.c:
3434    //   NOTE! The user-level library version returns a character pointer.
3435    //   The kernel system call just returns the length of the buffer filled
3436    //   (which includes the ending '\0' character), or a negative error
3437    //   value.
3438    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3439    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3440    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3441    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3442 }
3443 
POST(sys_getcwd)3444 POST(sys_getcwd)
3445 {
3446    vg_assert(SUCCESS);
3447    if (RES != (Addr)NULL)
3448       POST_MEM_WRITE( ARG1, RES );
3449 }
3450 
PRE(sys_geteuid)3451 PRE(sys_geteuid)
3452 {
3453    PRINT("sys_geteuid ( )");
3454    PRE_REG_READ0(long, "geteuid");
3455 }
3456 
PRE(sys_getegid)3457 PRE(sys_getegid)
3458 {
3459    PRINT("sys_getegid ( )");
3460    PRE_REG_READ0(long, "getegid");
3461 }
3462 
PRE(sys_getgid)3463 PRE(sys_getgid)
3464 {
3465    PRINT("sys_getgid ( )");
3466    PRE_REG_READ0(long, "getgid");
3467 }
3468 
PRE(sys_getpid)3469 PRE(sys_getpid)
3470 {
3471    PRINT("sys_getpid ()");
3472    PRE_REG_READ0(long, "getpid");
3473 }
3474 
PRE(sys_getpgid)3475 PRE(sys_getpgid)
3476 {
3477    PRINT("sys_getpgid ( %ld )", SARG1);
3478    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3479 }
3480 
PRE(sys_getpgrp)3481 PRE(sys_getpgrp)
3482 {
3483    PRINT("sys_getpgrp ()");
3484    PRE_REG_READ0(long, "getpgrp");
3485 }
3486 
PRE(sys_getppid)3487 PRE(sys_getppid)
3488 {
3489    PRINT("sys_getppid ()");
3490    PRE_REG_READ0(long, "getppid");
3491 }
3492 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3493 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3494 {
3495    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3496 
3497 #ifdef _RLIMIT_POSIX_FLAG
3498    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3499    // Unset it here to make the switch case below work correctly.
3500    a1 &= ~_RLIMIT_POSIX_FLAG;
3501 #endif
3502 
3503    switch (a1) {
3504    case VKI_RLIMIT_NOFILE:
3505       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3506       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3507       break;
3508 
3509    case VKI_RLIMIT_DATA:
3510       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3511       break;
3512 
3513    case VKI_RLIMIT_STACK:
3514       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3515       break;
3516    }
3517 }
3518 
PRE(sys_old_getrlimit)3519 PRE(sys_old_getrlimit)
3520 {
3521    PRINT("sys_old_getrlimit ( %lu, %#lx )", ARG1, ARG2);
3522    PRE_REG_READ2(long, "old_getrlimit",
3523                  unsigned int, resource, struct rlimit *, rlim);
3524    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3525 }
3526 
POST(sys_old_getrlimit)3527 POST(sys_old_getrlimit)
3528 {
3529    common_post_getrlimit(tid, ARG1, ARG2);
3530 }
3531 
PRE(sys_getrlimit)3532 PRE(sys_getrlimit)
3533 {
3534    PRINT("sys_getrlimit ( %lu, %#lx )", ARG1, ARG2);
3535    PRE_REG_READ2(long, "getrlimit",
3536                  unsigned int, resource, struct rlimit *, rlim);
3537    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3538 }
3539 
POST(sys_getrlimit)3540 POST(sys_getrlimit)
3541 {
3542    common_post_getrlimit(tid, ARG1, ARG2);
3543 }
3544 
PRE(sys_getrusage)3545 PRE(sys_getrusage)
3546 {
3547    PRINT("sys_getrusage ( %ld, %#lx )", SARG1, ARG2);
3548    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3549    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3550 }
3551 
POST(sys_getrusage)3552 POST(sys_getrusage)
3553 {
3554    vg_assert(SUCCESS);
3555    if (RES == 0)
3556       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3557 }
3558 
PRE(sys_gettimeofday)3559 PRE(sys_gettimeofday)
3560 {
3561    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3562    PRE_REG_READ2(long, "gettimeofday",
3563                  struct timeval *, tv, struct timezone *, tz);
3564    // GrP fixme does darwin write to *tz anymore?
3565    if (ARG1 != 0)
3566       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3567    if (ARG2 != 0)
3568       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3569 }
3570 
POST(sys_gettimeofday)3571 POST(sys_gettimeofday)
3572 {
3573    vg_assert(SUCCESS);
3574    if (RES == 0) {
3575       if (ARG1 != 0)
3576          POST_timeval_WRITE( ARG1 );
3577       if (ARG2 != 0)
3578 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3579    }
3580 }
3581 
PRE(sys_settimeofday)3582 PRE(sys_settimeofday)
3583 {
3584    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3585    PRE_REG_READ2(long, "settimeofday",
3586                  struct timeval *, tv, struct timezone *, tz);
3587    if (ARG1 != 0)
3588       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3589    if (ARG2 != 0) {
3590       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3591       /* maybe should warn if tz->tz_dsttime is non-zero? */
3592    }
3593 }
3594 
PRE(sys_getuid)3595 PRE(sys_getuid)
3596 {
3597    PRINT("sys_getuid ( )");
3598    PRE_REG_READ0(long, "getuid");
3599 }
3600 
ML_(PRE_unknown_ioctl)3601 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3602 {
3603    /* We don't have any specific information on it, so
3604       try to do something reasonable based on direction and
3605       size bits.  The encoding scheme is described in
3606       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3607 
3608       According to Simon Hausmann, _IOC_READ means the kernel
3609       writes a value to the ioctl value passed from the user
3610       space and the other way around with _IOC_WRITE. */
3611 
3612 #if defined(VGO_solaris)
3613    /* Majority of Solaris ioctl requests does not honour direction hints. */
3614    UInt dir  = _VKI_IOC_NONE;
3615 #else
3616    UInt dir  = _VKI_IOC_DIR(request);
3617 #endif
3618    UInt size = _VKI_IOC_SIZE(request);
3619 
3620    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
3621       /*
3622        * Be very lax about ioctl handling; the only
3623        * assumption is that the size is correct. Doesn't
3624        * require the full buffer to be initialized when
3625        * writing.  Without this, using some device
3626        * drivers with a large number of strange ioctl
3627        * commands becomes very tiresome.
3628        */
3629    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3630       static UWord unknown_ioctl[10];
3631       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
3632 
3633       if (moans > 0 && !VG_(clo_xml)) {
3634          /* Check if have not already moaned for this request. */
3635          UInt i;
3636          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
3637             if (unknown_ioctl[i] == request)
3638                break;
3639             if (unknown_ioctl[i] == 0) {
3640                unknown_ioctl[i] = request;
3641                moans--;
3642                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3643                          " with no size/direction hints.\n", request);
3644                VG_(umsg)("   This could cause spurious value errors to appear.\n");
3645                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3646                          "guidance on writing a proper wrapper.\n" );
3647                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3648                return;
3649             }
3650          }
3651       }
3652    } else {
3653       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3654       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3655       if ((dir & _VKI_IOC_WRITE) && size > 0)
3656          PRE_MEM_READ( "ioctl(generic)", arg, size);
3657       if ((dir & _VKI_IOC_READ) && size > 0)
3658          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3659    }
3660 }
3661 
ML_(POST_unknown_ioctl)3662 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3663 {
3664    /* We don't have any specific information on it, so
3665       try to do something reasonable based on direction and
3666       size bits.  The encoding scheme is described in
3667       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3668 
3669       According to Simon Hausmann, _IOC_READ means the kernel
3670       writes a value to the ioctl value passed from the user
3671       space and the other way around with _IOC_WRITE. */
3672 
3673    UInt dir  = _VKI_IOC_DIR(request);
3674    UInt size = _VKI_IOC_SIZE(request);
3675    if (size > 0 && (dir & _VKI_IOC_READ)
3676        && res == 0
3677        && arg != (Addr)NULL)
3678    {
3679       POST_MEM_WRITE(arg, size);
3680    }
3681 }
3682 
3683 /*
3684    If we're sending a SIGKILL to one of our own threads, then simulate
3685    it rather than really sending the signal, so that the target thread
3686    gets a chance to clean up.  Returns True if we did the killing (or
3687    no killing is necessary), and False if the caller should use the
3688    normal kill syscall.
3689 
3690    "pid" is any pid argument which can be passed to kill; group kills
3691    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3692    they'll most likely hit all the threads and we won't need to worry
3693    about cleanup.  In truth, we can't fully emulate these multicast
3694    kills.
3695 
3696    "tgid" is a thread group id.  If it is not -1, then the target
3697    thread must be in that thread group.
3698  */
ML_(do_sigkill)3699 Bool ML_(do_sigkill)(Int pid, Int tgid)
3700 {
3701    ThreadState *tst;
3702    ThreadId tid;
3703 
3704    if (pid <= 0)
3705       return False;
3706 
3707    tid = VG_(lwpid_to_vgtid)(pid);
3708    if (tid == VG_INVALID_THREADID)
3709       return False;		/* none of our threads */
3710 
3711    tst = VG_(get_ThreadState)(tid);
3712    if (tst == NULL || tst->status == VgTs_Empty)
3713       return False;		/* hm, shouldn't happen */
3714 
3715    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3716       return False;		/* not the right thread group */
3717 
3718    /* Check to see that the target isn't already exiting. */
3719    if (!VG_(is_exiting)(tid)) {
3720       if (VG_(clo_trace_signals))
3721 	 VG_(message)(Vg_DebugMsg,
3722                       "Thread %u being killed with SIGKILL\n",
3723                       tst->tid);
3724 
3725       tst->exitreason = VgSrc_FatalSig;
3726       tst->os_state.fatalsig = VKI_SIGKILL;
3727 
3728       if (!VG_(is_running_thread)(tid))
3729 	 VG_(get_thread_out_of_syscall)(tid);
3730    }
3731 
3732    return True;
3733 }
3734 
PRE(sys_kill)3735 PRE(sys_kill)
3736 {
3737    PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
3738    PRE_REG_READ2(long, "kill", int, pid, int, signal);
3739    if (!ML_(client_signal_OK)(ARG2)) {
3740       SET_STATUS_Failure( VKI_EINVAL );
3741       return;
3742    }
3743 
3744    /* If we're sending SIGKILL, check to see if the target is one of
3745       our threads and handle it specially. */
3746    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3747       SET_STATUS_Success(0);
3748    else
3749       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3750          affecting how posix-compliant the call is.  I guess it is
3751          harmless to pass the 3rd arg on other platforms; hence pass
3752          it on all. */
3753       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3754 
3755    if (VG_(clo_trace_signals))
3756       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3757 		   SARG2, SARG1);
3758 
3759    /* This kill might have given us a pending signal.  Ask for a check once
3760       the syscall is done. */
3761    *flags |= SfPollAfter;
3762 }
3763 
PRE(sys_link)3764 PRE(sys_link)
3765 {
3766    *flags |= SfMayBlock;
3767    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3768    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3769    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3770    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3771 }
3772 
PRE(sys_newlstat)3773 PRE(sys_newlstat)
3774 {
3775    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3776    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3777    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3778    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3779 }
3780 
POST(sys_newlstat)3781 POST(sys_newlstat)
3782 {
3783    vg_assert(SUCCESS);
3784    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3785 }
3786 
PRE(sys_mkdir)3787 PRE(sys_mkdir)
3788 {
3789    *flags |= SfMayBlock;
3790    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
3791    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3792    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3793 }
3794 
PRE(sys_mprotect)3795 PRE(sys_mprotect)
3796 {
3797    PRINT("sys_mprotect ( %#lx, %lu, %lu )", ARG1, ARG2, ARG3);
3798    PRE_REG_READ3(long, "mprotect",
3799                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3800 
3801    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3802       SET_STATUS_Failure( VKI_ENOMEM );
3803    }
3804 #if defined(VKI_PROT_GROWSDOWN)
3805    else
3806    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3807       /* Deal with mprotects on growable stack areas.
3808 
3809          The critical files to understand all this are mm/mprotect.c
3810          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3811          glibc.
3812 
3813          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3814          round the start/end address of mprotect to the start/end of
3815          the underlying vma and glibc uses that as an easy way to
3816          change the protection of the stack by calling mprotect on the
3817          last page of the stack with PROT_GROWSDOWN set.
3818 
3819          The sanity check provided by the kernel is that the vma must
3820          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3821       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3822       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3823       NSegment const *rseg;
3824 
3825       vg_assert(aseg);
3826 
3827       if (grows == VKI_PROT_GROWSDOWN) {
3828          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
3829          if (rseg &&
3830              rseg->kind == SkResvn &&
3831              rseg->smode == SmUpper &&
3832              rseg->end+1 == aseg->start) {
3833             Addr end = ARG1 + ARG2;
3834             ARG1 = aseg->start;
3835             ARG2 = end - aseg->start;
3836             ARG3 &= ~VKI_PROT_GROWSDOWN;
3837          } else {
3838             SET_STATUS_Failure( VKI_EINVAL );
3839          }
3840       } else if (grows == VKI_PROT_GROWSUP) {
3841          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
3842          if (rseg &&
3843              rseg->kind == SkResvn &&
3844              rseg->smode == SmLower &&
3845              aseg->end+1 == rseg->start) {
3846             ARG2 = aseg->end - ARG1 + 1;
3847             ARG3 &= ~VKI_PROT_GROWSUP;
3848          } else {
3849             SET_STATUS_Failure( VKI_EINVAL );
3850          }
3851       } else {
3852          /* both GROWSUP and GROWSDOWN */
3853          SET_STATUS_Failure( VKI_EINVAL );
3854       }
3855    }
3856 #endif   // defined(VKI_PROT_GROWSDOWN)
3857 }
3858 
POST(sys_mprotect)3859 POST(sys_mprotect)
3860 {
3861    Addr a    = ARG1;
3862    SizeT len = ARG2;
3863    Int  prot = ARG3;
3864 
3865    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3866 }
3867 
PRE(sys_munmap)3868 PRE(sys_munmap)
3869 {
3870    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3871    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3872    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3873 
3874    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3875       SET_STATUS_Failure( VKI_EINVAL );
3876 }
3877 
POST(sys_munmap)3878 POST(sys_munmap)
3879 {
3880    Addr  a   = ARG1;
3881    SizeT len = ARG2;
3882 
3883    ML_(notify_core_and_tool_of_munmap)( a, len );
3884 }
3885 
PRE(sys_mincore)3886 PRE(sys_mincore)
3887 {
3888    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3889    PRE_REG_READ3(long, "mincore",
3890                  unsigned long, start, vki_size_t, length,
3891                  unsigned char *, vec);
3892    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3893 }
POST(sys_mincore)3894 POST(sys_mincore)
3895 {
3896    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3897 }
3898 
PRE(sys_nanosleep)3899 PRE(sys_nanosleep)
3900 {
3901    *flags |= SfMayBlock|SfPostOnFail;
3902    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3903    PRE_REG_READ2(long, "nanosleep",
3904                  struct timespec *, req, struct timespec *, rem);
3905    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3906    if (ARG2 != 0)
3907       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3908 }
3909 
POST(sys_nanosleep)3910 POST(sys_nanosleep)
3911 {
3912    vg_assert(SUCCESS || FAILURE);
3913    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3914       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3915 }
3916 
3917 #if defined(VGO_linux) || defined(VGO_solaris)
3918 /* Handles the case where the open is of /proc/self/auxv or
3919    /proc/<pid>/auxv, and just gives out a copy of the fd for the
3920    fake file we cooked up at startup (in m_main).  Also, seeks the
3921    cloned fd back to the start.
3922    Returns True if auxv open was handled (status is set). */
ML_(handle_auxv_open)3923 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
3924                            int flags)
3925 {
3926    HChar  name[30];   // large enough
3927 
3928    if (!ML_(safe_to_deref)((const void *) filename, 1))
3929       return False;
3930 
3931    /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
3932    VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
3933    if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
3934       return False;
3935 
3936    /* Allow to open the file only for reading. */
3937    if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
3938       SET_STATUS_Failure(VKI_EACCES);
3939       return True;
3940    }
3941 
3942 #  if defined(VGO_solaris)
3943    VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
3944    SysRes sres = VG_(open)(name, flags, 0);
3945    SET_STATUS_from_SysRes(sres);
3946 #  else
3947    SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
3948    SET_STATUS_from_SysRes(sres);
3949    if (!sr_isError(sres)) {
3950       OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
3951       if (off < 0)
3952          SET_STATUS_Failure(VKI_EMFILE);
3953    }
3954 #  endif
3955 
3956    return True;
3957 }
3958 #endif // defined(VGO_linux) || defined(VGO_solaris)
3959 
PRE(sys_open)3960 PRE(sys_open)
3961 {
3962    if (ARG2 & VKI_O_CREAT) {
3963       // 3-arg version
3964       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1, (HChar*)ARG1, SARG2, SARG3);
3965       PRE_REG_READ3(long, "open",
3966                     const char *, filename, int, flags, int, mode);
3967    } else {
3968       // 2-arg version
3969       PRINT("sys_open ( %#lx(%s), %ld )",ARG1, (HChar*)ARG1, SARG2);
3970       PRE_REG_READ2(long, "open",
3971                     const char *, filename, int, flags);
3972    }
3973    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3974 
3975 #if defined(VGO_linux)
3976    /* Handle the case where the open is of /proc/self/cmdline or
3977       /proc/<pid>/cmdline, and just give it a copy of the fd for the
3978       fake file we cooked up at startup (in m_main).  Also, seek the
3979       cloned fd back to the start. */
3980    {
3981       HChar  name[30];   // large enough
3982       HChar* arg1s = (HChar*) ARG1;
3983       SysRes sres;
3984 
3985       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3986       if (ML_(safe_to_deref)( arg1s, 1 ) &&
3987           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3988          )
3989       {
3990          sres = VG_(dup)( VG_(cl_cmdline_fd) );
3991          SET_STATUS_from_SysRes( sres );
3992          if (!sr_isError(sres)) {
3993             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3994             if (off < 0)
3995                SET_STATUS_Failure( VKI_EMFILE );
3996          }
3997          return;
3998       }
3999    }
4000 
4001    /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv. */
4002    if (ML_(handle_auxv_open)(status, (const HChar *)ARG1, ARG2))
4003       return;
4004 #endif // defined(VGO_linux)
4005 
4006    /* Otherwise handle normally */
4007    *flags |= SfMayBlock;
4008 }
4009 
POST(sys_open)4010 POST(sys_open)
4011 {
4012    vg_assert(SUCCESS);
4013    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
4014       VG_(close)(RES);
4015       SET_STATUS_Failure( VKI_EMFILE );
4016    } else {
4017       if (VG_(clo_track_fds))
4018          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
4019    }
4020 }
4021 
PRE(sys_read)4022 PRE(sys_read)
4023 {
4024    *flags |= SfMayBlock;
4025    PRINT("sys_read ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4026    PRE_REG_READ3(ssize_t, "read",
4027                  unsigned int, fd, char *, buf, vki_size_t, count);
4028 
4029    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
4030       SET_STATUS_Failure( VKI_EBADF );
4031    else
4032       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
4033 }
4034 
POST(sys_read)4035 POST(sys_read)
4036 {
4037    vg_assert(SUCCESS);
4038    POST_MEM_WRITE( ARG2, RES );
4039 }
4040 
PRE(sys_write)4041 PRE(sys_write)
4042 {
4043    Bool ok;
4044    *flags |= SfMayBlock;
4045    PRINT("sys_write ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4046    PRE_REG_READ3(ssize_t, "write",
4047                  unsigned int, fd, const char *, buf, vki_size_t, count);
4048    /* check to see if it is allowed.  If not, try for an exemption from
4049       --sim-hints=enable-outer (used for self hosting). */
4050    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
4051    if (!ok && ARG1 == 2/*stderr*/
4052            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
4053       ok = True;
4054 #if defined(VGO_solaris)
4055    if (!ok && VG_(vfork_fildes_addr) != NULL &&
4056        *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
4057       ok = True;
4058 #endif
4059    if (!ok)
4060       SET_STATUS_Failure( VKI_EBADF );
4061    else
4062       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
4063 }
4064 
PRE(sys_creat)4065 PRE(sys_creat)
4066 {
4067    *flags |= SfMayBlock;
4068    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
4069    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
4070    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
4071 }
4072 
POST(sys_creat)4073 POST(sys_creat)
4074 {
4075    vg_assert(SUCCESS);
4076    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
4077       VG_(close)(RES);
4078       SET_STATUS_Failure( VKI_EMFILE );
4079    } else {
4080       if (VG_(clo_track_fds))
4081          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
4082    }
4083 }
4084 
PRE(sys_poll)4085 PRE(sys_poll)
4086 {
4087    /* struct pollfd {
4088         int fd;           -- file descriptor
4089         short events;     -- requested events
4090         short revents;    -- returned events
4091       };
4092       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
4093    */
4094    UInt i;
4095    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
4096    *flags |= SfMayBlock;
4097    PRINT("sys_poll ( %#lx, %lu, %ld )\n", ARG1, ARG2, SARG3);
4098    PRE_REG_READ3(long, "poll",
4099                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
4100 
4101    for (i = 0; i < ARG2; i++) {
4102       PRE_MEM_READ( "poll(ufds.fd)",
4103                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
4104       PRE_MEM_READ( "poll(ufds.events)",
4105                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
4106       PRE_MEM_WRITE( "poll(ufds.revents)",
4107                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4108    }
4109 }
4110 
POST(sys_poll)4111 POST(sys_poll)
4112 {
4113    if (RES >= 0) {
4114       UInt i;
4115       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
4116       for (i = 0; i < ARG2; i++)
4117 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4118    }
4119 }
4120 
PRE(sys_readlink)4121 PRE(sys_readlink)
4122 {
4123    FUSE_COMPATIBLE_MAY_BLOCK();
4124    Word saved = SYSNO;
4125 
4126    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
4127    PRE_REG_READ3(long, "readlink",
4128                  const char *, path, char *, buf, int, bufsiz);
4129    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
4130    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
4131 
4132    {
4133 #if defined(VGO_linux)
4134       /*
4135        * Handle the case where readlink is looking at /proc/self/exe or
4136        * /proc/<pid>/exe.
4137        */
4138       HChar  name[30];   // large enough
4139       HChar* arg1s = (HChar*) ARG1;
4140       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
4141       if (ML_(safe_to_deref)(arg1s, 1) &&
4142           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
4143          )
4144       {
4145          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
4146          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
4147                                                          ARG2, ARG3));
4148       } else
4149 #elif defined(VGO_solaris)
4150       /* Same for Solaris, but /proc/self/path/a.out and
4151          /proc/<pid>/path/a.out. */
4152       HChar  name[30];   // large enough
4153       HChar* arg1s = (HChar*) ARG1;
4154       VG_(sprintf)(name, "/proc/%d/path/a.out", VG_(getpid)());
4155       if (ML_(safe_to_deref)(arg1s, 1) &&
4156           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/path/a.out"))
4157          )
4158       {
4159          VG_(sprintf)(name, "/proc/self/path/%d", VG_(cl_exec_fd));
4160          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
4161                                                          ARG2, ARG3));
4162       } else
4163 #endif
4164       {
4165          /* Normal case */
4166          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
4167       }
4168    }
4169 
4170    if (SUCCESS && RES > 0)
4171       POST_MEM_WRITE( ARG2, RES );
4172 }
4173 
PRE(sys_readv)4174 PRE(sys_readv)
4175 {
4176    Int i;
4177    struct vki_iovec * vec;
4178    *flags |= SfMayBlock;
4179    PRINT("sys_readv ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4180    PRE_REG_READ3(ssize_t, "readv",
4181                  unsigned long, fd, const struct iovec *, vector,
4182                  unsigned long, count);
4183    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
4184       SET_STATUS_Failure( VKI_EBADF );
4185    } else {
4186       if ((Int)ARG3 >= 0)
4187          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
4188 
4189       if (ARG2 != 0) {
4190          /* ToDo: don't do any of the following if the vector is invalid */
4191          vec = (struct vki_iovec *)ARG2;
4192          for (i = 0; i < (Int)ARG3; i++)
4193             PRE_MEM_WRITE( "readv(vector[...])",
4194                            (Addr)vec[i].iov_base, vec[i].iov_len );
4195       }
4196    }
4197 }
4198 
POST(sys_readv)4199 POST(sys_readv)
4200 {
4201    vg_assert(SUCCESS);
4202    if (RES > 0) {
4203       Int i;
4204       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
4205       Int remains = RES;
4206 
4207       /* RES holds the number of bytes read. */
4208       for (i = 0; i < (Int)ARG3; i++) {
4209 	 Int nReadThisBuf = vec[i].iov_len;
4210 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
4211 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
4212 	 remains -= nReadThisBuf;
4213 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
4214       }
4215    }
4216 }
4217 
PRE(sys_rename)4218 PRE(sys_rename)
4219 {
4220    FUSE_COMPATIBLE_MAY_BLOCK();
4221    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4222    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
4223    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
4224    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
4225 }
4226 
PRE(sys_rmdir)4227 PRE(sys_rmdir)
4228 {
4229    *flags |= SfMayBlock;
4230    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
4231    PRE_REG_READ1(long, "rmdir", const char *, pathname);
4232    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
4233 }
4234 
PRE(sys_select)4235 PRE(sys_select)
4236 {
4237    *flags |= SfMayBlock;
4238    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
4239          ARG4, ARG5);
4240    PRE_REG_READ5(long, "select",
4241                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
4242                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
4243    // XXX: this possibly understates how much memory is read.
4244    if (ARG2 != 0)
4245       PRE_MEM_READ( "select(readfds)",
4246 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
4247    if (ARG3 != 0)
4248       PRE_MEM_READ( "select(writefds)",
4249 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
4250    if (ARG4 != 0)
4251       PRE_MEM_READ( "select(exceptfds)",
4252 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
4253    if (ARG5 != 0)
4254       PRE_timeval_READ( "select(timeout)", ARG5 );
4255 }
4256 
PRE(sys_setgid)4257 PRE(sys_setgid)
4258 {
4259    PRINT("sys_setgid ( %lu )", ARG1);
4260    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4261 }
4262 
PRE(sys_setsid)4263 PRE(sys_setsid)
4264 {
4265    PRINT("sys_setsid ( )");
4266    PRE_REG_READ0(long, "setsid");
4267 }
4268 
PRE(sys_setgroups)4269 PRE(sys_setgroups)
4270 {
4271    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
4272    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4273    if (ARG1 > 0)
4274       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4275 }
4276 
PRE(sys_setpgid)4277 PRE(sys_setpgid)
4278 {
4279    PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
4280    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4281 }
4282 
PRE(sys_setregid)4283 PRE(sys_setregid)
4284 {
4285    PRINT("sys_setregid ( %lu, %lu )", ARG1, ARG2);
4286    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4287 }
4288 
PRE(sys_setreuid)4289 PRE(sys_setreuid)
4290 {
4291    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
4292    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4293 }
4294 
PRE(sys_setrlimit)4295 PRE(sys_setrlimit)
4296 {
4297    UWord arg1 = ARG1;
4298    PRINT("sys_setrlimit ( %lu, %#lx )", ARG1, ARG2);
4299    PRE_REG_READ2(long, "setrlimit",
4300                  unsigned int, resource, struct rlimit *, rlim);
4301    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4302 
4303 #ifdef _RLIMIT_POSIX_FLAG
4304    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4305    // Unset it here to make the if statements below work correctly.
4306    arg1 &= ~_RLIMIT_POSIX_FLAG;
4307 #endif
4308 
4309    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
4310                                     VKI_PROT_READ)) {
4311       SET_STATUS_Failure( VKI_EFAULT );
4312    }
4313    else if (((struct vki_rlimit *)ARG2)->rlim_cur
4314             > ((struct vki_rlimit *)ARG2)->rlim_max) {
4315       SET_STATUS_Failure( VKI_EINVAL );
4316    }
4317    else if (arg1 == VKI_RLIMIT_NOFILE) {
4318       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4319           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4320          SET_STATUS_Failure( VKI_EPERM );
4321       }
4322       else {
4323          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
4324          SET_STATUS_Success( 0 );
4325       }
4326    }
4327    else if (arg1 == VKI_RLIMIT_DATA) {
4328       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
4329           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
4330          SET_STATUS_Failure( VKI_EPERM );
4331       }
4332       else {
4333          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
4334          SET_STATUS_Success( 0 );
4335       }
4336    }
4337    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4338       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
4339           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
4340          SET_STATUS_Failure( VKI_EPERM );
4341       }
4342       else {
4343          /* Change the value of client_stack_szB to the rlim_cur value but
4344             only if it is smaller than the size of the allocated stack for the
4345             client.
4346             TODO: All platforms should set VG_(clstk_max_size) as part of their
4347                   setup_client_stack(). */
4348          if ((VG_(clstk_max_size) == 0)
4349              || (((struct vki_rlimit *) ARG2)->rlim_cur <= VG_(clstk_max_size)))
4350             VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
4351 
4352          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
4353          SET_STATUS_Success( 0 );
4354       }
4355    }
4356 }
4357 
PRE(sys_setuid)4358 PRE(sys_setuid)
4359 {
4360    PRINT("sys_setuid ( %lu )", ARG1);
4361    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4362 }
4363 
PRE(sys_newstat)4364 PRE(sys_newstat)
4365 {
4366    FUSE_COMPATIBLE_MAY_BLOCK();
4367    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4368    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4369    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4370    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4371 }
4372 
POST(sys_newstat)4373 POST(sys_newstat)
4374 {
4375    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4376 }
4377 
PRE(sys_statfs)4378 PRE(sys_statfs)
4379 {
4380    FUSE_COMPATIBLE_MAY_BLOCK();
4381    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
4382    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4383    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4384    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4385 }
POST(sys_statfs)4386 POST(sys_statfs)
4387 {
4388    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4389 }
4390 
PRE(sys_statfs64)4391 PRE(sys_statfs64)
4392 {
4393    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4394    PRE_REG_READ3(long, "statfs64",
4395                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
4396    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4397    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4398 }
POST(sys_statfs64)4399 POST(sys_statfs64)
4400 {
4401    POST_MEM_WRITE( ARG3, ARG2 );
4402 }
4403 
PRE(sys_symlink)4404 PRE(sys_symlink)
4405 {
4406    *flags |= SfMayBlock;
4407    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4408    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4409    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4410    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4411 }
4412 
PRE(sys_time)4413 PRE(sys_time)
4414 {
4415    /* time_t time(time_t *t); */
4416    PRINT("sys_time ( %#lx )",ARG1);
4417    PRE_REG_READ1(long, "time", int *, t);
4418    if (ARG1 != 0) {
4419       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4420    }
4421 }
4422 
POST(sys_time)4423 POST(sys_time)
4424 {
4425    if (ARG1 != 0) {
4426       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4427    }
4428 }
4429 
PRE(sys_times)4430 PRE(sys_times)
4431 {
4432    PRINT("sys_times ( %#lx )", ARG1);
4433    PRE_REG_READ1(long, "times", struct tms *, buf);
4434    if (ARG1 != 0) {
4435       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4436    }
4437 }
4438 
POST(sys_times)4439 POST(sys_times)
4440 {
4441    if (ARG1 != 0) {
4442       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4443    }
4444 }
4445 
PRE(sys_umask)4446 PRE(sys_umask)
4447 {
4448    PRINT("sys_umask ( %ld )", SARG1);
4449    PRE_REG_READ1(long, "umask", int, mask);
4450 }
4451 
PRE(sys_unlink)4452 PRE(sys_unlink)
4453 {
4454    *flags |= SfMayBlock;
4455    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4456    PRE_REG_READ1(long, "unlink", const char *, pathname);
4457    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4458 }
4459 
PRE(sys_newuname)4460 PRE(sys_newuname)
4461 {
4462    PRINT("sys_newuname ( %#lx )", ARG1);
4463    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4464    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4465 }
4466 
POST(sys_newuname)4467 POST(sys_newuname)
4468 {
4469    if (ARG1 != 0) {
4470       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4471    }
4472 }
4473 
PRE(sys_waitpid)4474 PRE(sys_waitpid)
4475 {
4476    *flags |= SfMayBlock;
4477    PRINT("sys_waitpid ( %ld, %#lx, %ld )", SARG1, ARG2, SARG3);
4478    PRE_REG_READ3(long, "waitpid",
4479                  vki_pid_t, pid, unsigned int *, status, int, options);
4480 
4481    if (ARG2 != (Addr)NULL)
4482       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4483 }
4484 
POST(sys_waitpid)4485 POST(sys_waitpid)
4486 {
4487    if (ARG2 != (Addr)NULL)
4488       POST_MEM_WRITE( ARG2, sizeof(int) );
4489 }
4490 
PRE(sys_wait4)4491 PRE(sys_wait4)
4492 {
4493    *flags |= SfMayBlock;
4494    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", SARG1, ARG2, SARG3, ARG4);
4495 
4496    PRE_REG_READ4(long, "wait4",
4497                  vki_pid_t, pid, unsigned int *, status, int, options,
4498                  struct rusage *, rusage);
4499    if (ARG2 != (Addr)NULL)
4500       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4501    if (ARG4 != (Addr)NULL)
4502       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4503 }
4504 
POST(sys_wait4)4505 POST(sys_wait4)
4506 {
4507    if (ARG2 != (Addr)NULL)
4508       POST_MEM_WRITE( ARG2, sizeof(int) );
4509    if (ARG4 != (Addr)NULL)
4510       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4511 }
4512 
PRE(sys_writev)4513 PRE(sys_writev)
4514 {
4515    Int i;
4516    struct vki_iovec * vec;
4517    *flags |= SfMayBlock;
4518    PRINT("sys_writev ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4519    PRE_REG_READ3(ssize_t, "writev",
4520                  unsigned long, fd, const struct iovec *, vector,
4521                  unsigned long, count);
4522    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4523       SET_STATUS_Failure( VKI_EBADF );
4524    } else {
4525       if ((Int)ARG3 >= 0)
4526          PRE_MEM_READ( "writev(vector)",
4527                        ARG2, ARG3 * sizeof(struct vki_iovec) );
4528       if (ARG2 != 0) {
4529          /* ToDo: don't do any of the following if the vector is invalid */
4530          vec = (struct vki_iovec *)ARG2;
4531          for (i = 0; i < (Int)ARG3; i++)
4532             PRE_MEM_READ( "writev(vector[...])",
4533                            (Addr)vec[i].iov_base, vec[i].iov_len );
4534       }
4535    }
4536 }
4537 
PRE(sys_utimes)4538 PRE(sys_utimes)
4539 {
4540    FUSE_COMPATIBLE_MAY_BLOCK();
4541    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4542    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4543    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4544    if (ARG2 != 0) {
4545       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4546       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4547    }
4548 }
4549 
PRE(sys_acct)4550 PRE(sys_acct)
4551 {
4552    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4553    PRE_REG_READ1(long, "acct", const char *, filename);
4554    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4555 }
4556 
PRE(sys_pause)4557 PRE(sys_pause)
4558 {
4559    *flags |= SfMayBlock;
4560    PRINT("sys_pause ( )");
4561    PRE_REG_READ0(long, "pause");
4562 }
4563 
PRE(sys_sigaltstack)4564 PRE(sys_sigaltstack)
4565 {
4566    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4567    PRE_REG_READ2(int, "sigaltstack",
4568                  const vki_stack_t *, ss, vki_stack_t *, oss);
4569    if (ARG1 != 0) {
4570       const vki_stack_t *ss = (vki_stack_t *)ARG1;
4571       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4572       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4573       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4574    }
4575    if (ARG2 != 0) {
4576       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4577    }
4578 
4579    /* Be safe. */
4580    if (ARG1 && !ML_(safe_to_deref((void*)ARG1, sizeof(vki_stack_t)))) {
4581       SET_STATUS_Failure(VKI_EFAULT);
4582       return;
4583    }
4584    if (ARG2 && !ML_(safe_to_deref((void*)ARG2, sizeof(vki_stack_t)))) {
4585       SET_STATUS_Failure(VKI_EFAULT);
4586       return;
4587    }
4588 
4589    SET_STATUS_from_SysRes(
4590       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4591                               (vki_stack_t*)ARG2)
4592    );
4593 }
POST(sys_sigaltstack)4594 POST(sys_sigaltstack)
4595 {
4596    vg_assert(SUCCESS);
4597    if (RES == 0 && ARG2 != 0)
4598       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4599 }
4600 
PRE(sys_sethostname)4601 PRE(sys_sethostname)
4602 {
4603    PRINT("sys_sethostname ( %#lx, %ld )", ARG1, SARG2);
4604    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
4605    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
4606 }
4607 
4608 #undef PRE
4609 #undef POST
4610 
4611 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
4612 
4613 /*--------------------------------------------------------------------*/
4614 /*--- end                                                          ---*/
4615 /*--------------------------------------------------------------------*/
4616