1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Wrappers for generic Unix system calls                       ---*/
5 /*---                                            syswrap-generic.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2013 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #if defined(VGO_linux) || defined(VGO_darwin)
34 
35 #include "pub_core_basics.h"
36 #include "pub_core_vki.h"
37 #include "pub_core_vkiscnums.h"
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 #include "pub_core_stacks.h"
64 
65 #include "priv_types_n_macros.h"
66 #include "priv_syswrap-generic.h"
67 
68 #include "config.h"
69 
70 
ML_(guess_and_register_stack)71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
72 {
73    Bool debug = False;
74    NSegment const* seg;
75 
76    /* We don't really know where the client stack is, because its
77       allocated by the client.  The best we can do is look at the
78       memory mappings and try to derive some useful information.  We
79       assume that sp starts near its highest possible value, and can
80       only go down to the start of the mmaped segment. */
81    seg = VG_(am_find_nsegment)(sp);
82    if (seg &&
83        VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
86 
87       VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
88 
89       if (debug)
90 	 VG_(printf)("tid %d: guessed client stack range [%#lx-%#lx]\n",
91 		     tst->tid, seg->start, tst->client_stack_highest_byte);
92    } else {
93       VG_(message)(Vg_UserMsg,
94                    "!? New thread %d starts with SP(%#lx) unmapped\n",
95 		   tst->tid, sp);
96       tst->client_stack_highest_byte = 0;
97       tst->client_stack_szB  = 0;
98    }
99 }
100 
101 /* Returns True iff address range is something the client can
102    plausibly mess with: all of it is either already belongs to the
103    client or is free or a reservation. */
104 
ML_(valid_client_addr)105 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
106                                    const HChar *syscallname)
107 {
108    Bool ret;
109 
110    if (size == 0)
111       return True;
112 
113    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
114             (start,size,VKI_PROT_NONE);
115 
116    if (0)
117       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
118 		  syscallname, start, start+size-1, (Int)ret);
119 
120    if (!ret && syscallname != NULL) {
121       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
122                                "to modify addresses %#lx-%#lx\n",
123                                syscallname, start, start+size-1);
124       if (VG_(clo_verbosity) > 1) {
125          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
126       }
127    }
128 
129    return ret;
130 }
131 
132 
ML_(client_signal_OK)133 Bool ML_(client_signal_OK)(Int sigNo)
134 {
135    /* signal 0 is OK for kill */
136    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
137 
138    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
139 
140    return ret;
141 }
142 
143 
144 /* Handy small function to help stop wrappers from segfaulting when
145    presented with bogus client addresses.  Is not used for generating
146    user-visible errors. */
147 
ML_(safe_to_deref)148 Bool ML_(safe_to_deref) ( void* start, SizeT size )
149 {
150    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
151 }
152 
153 
154 /* ---------------------------------------------------------------------
155    Doing mmap, mremap
156    ------------------------------------------------------------------ */
157 
158 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
159    munmap, mprotect (and mremap??) work at the page level.  So addresses
160    and lengths must be adjusted for this. */
161 
162 /* Mash around start and length so that the area exactly covers
163    an integral number of pages.  If we don't do that, memcheck's
164    idea of addressible memory diverges from that of the
165    kernel's, which causes the leak detector to crash. */
166 static
page_align_addr_and_len(Addr * a,SizeT * len)167 void page_align_addr_and_len( Addr* a, SizeT* len)
168 {
169    Addr ra;
170 
171    ra = VG_PGROUNDDN(*a);
172    *len = VG_PGROUNDUP(*a + *len) - ra;
173    *a = ra;
174 }
175 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)176 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
177                                 UInt flags, Int fd, Off64T offset)
178 {
179    Bool d;
180 
181    /* 'a' is the return value from a real kernel mmap, hence: */
182    vg_assert(VG_IS_PAGE_ALIGNED(a));
183    /* whereas len is whatever the syscall supplied.  So: */
184    len = VG_PGROUNDUP(len);
185 
186    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
187 
188    if (d)
189       VG_(discard_translations)( a, (ULong)len,
190                                  "notify_core_of_mmap" );
191 }
192 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)193 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
194 {
195    Bool rr, ww, xx;
196 
197    /* 'a' is the return value from a real kernel mmap, hence: */
198    vg_assert(VG_IS_PAGE_ALIGNED(a));
199    /* whereas len is whatever the syscall supplied.  So: */
200    len = VG_PGROUNDUP(len);
201 
202    rr = toBool(prot & VKI_PROT_READ);
203    ww = toBool(prot & VKI_PROT_WRITE);
204    xx = toBool(prot & VKI_PROT_EXEC);
205 
206    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
207 }
208 
209 
210 /* When a client mmap has been successfully done, this function must
211    be called.  It notifies both aspacem and the tool of the new
212    mapping.
213 
214    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
215    it is called from is POST(sys_io_setup).  In particular,
216    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
217    client mmap.  But it doesn't call this function; instead it does the
218    relevant notifications itself.  Here, we just pass di_handle=0 to
219    notify_tool_of_mmap as we have no better information.  But really this
220    function should be done away with; problem is I don't understand what
221    POST(sys_io_setup) does or how it works.
222 
223    [However, this function is used lots for Darwin, because
224     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
225  */
226 void
ML_(notify_core_and_tool_of_mmap)227 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
228                                     UInt flags, Int fd, Off64T offset )
229 {
230    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
231    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
232    // Should it?  --njn
233    notify_core_of_mmap(a, len, prot, flags, fd, offset);
234    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
235 }
236 
237 void
ML_(notify_core_and_tool_of_munmap)238 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
239 {
240    Bool d;
241 
242    page_align_addr_and_len(&a, &len);
243    d = VG_(am_notify_munmap)(a, len);
244    VG_TRACK( die_mem_munmap, a, len );
245    VG_(di_notify_munmap)( a, len );
246    if (d)
247       VG_(discard_translations)( a, (ULong)len,
248                                  "ML_(notify_core_and_tool_of_munmap)" );
249 }
250 
251 void
ML_(notify_core_and_tool_of_mprotect)252 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
253 {
254    Bool rr = toBool(prot & VKI_PROT_READ);
255    Bool ww = toBool(prot & VKI_PROT_WRITE);
256    Bool xx = toBool(prot & VKI_PROT_EXEC);
257    Bool d;
258 
259    page_align_addr_and_len(&a, &len);
260    d = VG_(am_notify_mprotect)(a, len, prot);
261    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
262    VG_(di_notify_mprotect)( a, len, prot );
263    if (d)
264       VG_(discard_translations)( a, (ULong)len,
265                                  "ML_(notify_core_and_tool_of_mprotect)" );
266 }
267 
268 
269 
270 #if HAVE_MREMAP
271 /* Expand (or shrink) an existing mapping, potentially moving it at
272    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
273 */
274 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)275 SysRes do_mremap( Addr old_addr, SizeT old_len,
276                   Addr new_addr, SizeT new_len,
277                   UWord flags, ThreadId tid )
278 {
279 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
280 
281    Bool      ok, d;
282    NSegment const* old_seg;
283    Addr      advised;
284    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
285    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
286 
287    if (0)
288       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
289                   old_addr,old_len,new_addr,new_len,
290                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
291                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
292    if (0)
293       VG_(am_show_nsegments)(0, "do_remap: before");
294 
295    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
296       goto eINVAL;
297 
298    if (!VG_IS_PAGE_ALIGNED(old_addr))
299       goto eINVAL;
300 
301    old_len = VG_PGROUNDUP(old_len);
302    new_len = VG_PGROUNDUP(new_len);
303 
304    if (new_len == 0)
305       goto eINVAL;
306 
307    /* kernel doesn't reject this, but we do. */
308    if (old_len == 0)
309       goto eINVAL;
310 
311    /* reject wraparounds */
312    if (old_addr + old_len < old_addr)
313       goto eINVAL;
314    if (f_fixed == True && new_addr + new_len < new_len)
315       goto eINVAL;
316 
317    /* kernel rejects all fixed, no-move requests (which are
318       meaningless). */
319    if (f_fixed == True && f_maymove == False)
320       goto eINVAL;
321 
322    /* Stay away from non-client areas. */
323    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
324       goto eINVAL;
325 
326    /* In all remaining cases, if the old range does not fall within a
327       single segment, fail. */
328    old_seg = VG_(am_find_nsegment)( old_addr );
329    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
330       goto eINVAL;
331    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC &&
332        old_seg->kind != SkShmC)
333       goto eINVAL;
334 
335    vg_assert(old_len > 0);
336    vg_assert(new_len > 0);
337    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
338    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
339    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
340 
341    /* There are 3 remaining cases:
342 
343       * maymove == False
344 
345         new space has to be at old address, so:
346             - shrink    -> unmap end
347             - same size -> do nothing
348             - grow      -> if can grow in-place, do so, else fail
349 
350       * maymove == True, fixed == False
351 
352         new space can be anywhere, so:
353             - shrink    -> unmap end
354             - same size -> do nothing
355             - grow      -> if can grow in-place, do so, else
356                            move to anywhere large enough, else fail
357 
358       * maymove == True, fixed == True
359 
360         new space must be at new address, so:
361 
362             - if new address is not page aligned, fail
363             - if new address range overlaps old one, fail
364             - if new address range cannot be allocated, fail
365             - else move to new address range with new size
366             - else fail
367    */
368 
369    if (f_maymove == False) {
370       /* new space has to be at old address */
371       if (new_len < old_len)
372          goto shrink_in_place;
373       if (new_len > old_len)
374          goto grow_in_place_or_fail;
375       goto same_in_place;
376    }
377 
378    if (f_maymove == True && f_fixed == False) {
379       /* new space can be anywhere */
380       if (new_len < old_len)
381          goto shrink_in_place;
382       if (new_len > old_len)
383          goto grow_in_place_or_move_anywhere_or_fail;
384       goto same_in_place;
385    }
386 
387    if (f_maymove == True && f_fixed == True) {
388       /* new space can only be at the new address */
389       if (!VG_IS_PAGE_ALIGNED(new_addr))
390          goto eINVAL;
391       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
392          /* no overlap */
393       } else {
394          goto eINVAL;
395       }
396       if (new_addr == 0)
397          goto eINVAL;
398          /* VG_(am_get_advisory_client_simple) interprets zero to mean
399             non-fixed, which is not what we want */
400       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
401       if (!ok || advised != new_addr)
402          goto eNOMEM;
403       ok = VG_(am_relocate_nooverlap_client)
404               ( &d, old_addr, old_len, new_addr, new_len );
405       if (ok) {
406          VG_TRACK( copy_mem_remap, old_addr, new_addr,
407                                    MIN_SIZET(old_len,new_len) );
408          if (new_len > old_len)
409             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
410                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
411                       0/*di_handle*/ );
412          VG_TRACK(die_mem_munmap, old_addr, old_len);
413          if (d) {
414             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
415             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
416          }
417          return VG_(mk_SysRes_Success)( new_addr );
418       }
419       goto eNOMEM;
420    }
421 
422    /* end of the 3 cases */
423    /*NOTREACHED*/ vg_assert(0);
424 
425   grow_in_place_or_move_anywhere_or_fail:
426    {
427    /* try growing it in-place */
428    Addr   needA = old_addr + old_len;
429    SSizeT needL = new_len - old_len;
430 
431    vg_assert(needL > 0);
432    vg_assert(needA > 0);
433 
434    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
435    if (ok) {
436       /* Fixes bug #129866. */
437       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
438    }
439    if (ok && advised == needA) {
440       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
441       if (new_seg) {
442          VG_TRACK( new_mem_mmap, needA, needL,
443                                  new_seg->hasR,
444                                  new_seg->hasW, new_seg->hasX,
445                                  0/*di_handle*/ );
446          return VG_(mk_SysRes_Success)( old_addr );
447       }
448    }
449 
450    /* that failed.  Look elsewhere. */
451    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
452    if (ok) {
453       Bool oldR = old_seg->hasR;
454       Bool oldW = old_seg->hasW;
455       Bool oldX = old_seg->hasX;
456       /* assert new area does not overlap old */
457       vg_assert(advised+new_len-1 < old_addr
458                 || advised > old_addr+old_len-1);
459       ok = VG_(am_relocate_nooverlap_client)
460               ( &d, old_addr, old_len, advised, new_len );
461       if (ok) {
462          VG_TRACK( copy_mem_remap, old_addr, advised,
463                                    MIN_SIZET(old_len,new_len) );
464          if (new_len > old_len)
465             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
466                       oldR, oldW, oldX, 0/*di_handle*/ );
467          VG_TRACK(die_mem_munmap, old_addr, old_len);
468          if (d) {
469             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
470             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
471          }
472          return VG_(mk_SysRes_Success)( advised );
473       }
474    }
475    goto eNOMEM;
476    }
477    /*NOTREACHED*/ vg_assert(0);
478 
479   grow_in_place_or_fail:
480    {
481    Addr  needA = old_addr + old_len;
482    SizeT needL = new_len - old_len;
483 
484    vg_assert(needA > 0);
485 
486    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
487    if (ok) {
488       /* Fixes bug #129866. */
489       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
490    }
491    if (!ok || advised != needA)
492       goto eNOMEM;
493    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
494    if (!new_seg)
495       goto eNOMEM;
496    VG_TRACK( new_mem_mmap, needA, needL,
497                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
498                            0/*di_handle*/ );
499 
500    return VG_(mk_SysRes_Success)( old_addr );
501    }
502    /*NOTREACHED*/ vg_assert(0);
503 
504   shrink_in_place:
505    {
506    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
507    if (sr_isError(sres))
508       return sres;
509    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
510    if (d)
511       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
512                                  "do_remap(7)" );
513    return VG_(mk_SysRes_Success)( old_addr );
514    }
515    /*NOTREACHED*/ vg_assert(0);
516 
517   same_in_place:
518    return VG_(mk_SysRes_Success)( old_addr );
519    /*NOTREACHED*/ vg_assert(0);
520 
521   eINVAL:
522    return VG_(mk_SysRes_Error)( VKI_EINVAL );
523   eNOMEM:
524    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
525 
526 #  undef MIN_SIZET
527 }
528 #endif /* HAVE_MREMAP */
529 
530 
531 /* ---------------------------------------------------------------------
532    File-descriptor tracking
533    ------------------------------------------------------------------ */
534 
535 /* One of these is allocated for each open file descriptor.  */
536 typedef struct OpenFd
537 {
538    Int fd;                        /* The file descriptor */
539    HChar *pathname;               /* NULL if not a regular file or unknown */
540    ExeContext *where;             /* NULL if inherited from parent */
541    struct OpenFd *next, *prev;
542 } OpenFd;
543 
544 /* List of allocated file descriptors. */
545 static OpenFd *allocated_fds = NULL;
546 
547 /* Count of open file descriptors. */
548 static Int fd_count = 0;
549 
550 
551 /* Note the fact that a file descriptor was just closed. */
552 static
record_fd_close(Int fd)553 void record_fd_close(Int fd)
554 {
555    OpenFd *i = allocated_fds;
556 
557    if (fd >= VG_(fd_hard_limit))
558       return;			/* Valgrind internal */
559 
560    while(i) {
561       if(i->fd == fd) {
562          if(i->prev)
563             i->prev->next = i->next;
564          else
565             allocated_fds = i->next;
566          if(i->next)
567             i->next->prev = i->prev;
568          if(i->pathname)
569             VG_(free) (i->pathname);
570          VG_(free) (i);
571          fd_count--;
572          break;
573       }
574       i = i->next;
575    }
576 }
577 
578 /* Note the fact that a file descriptor was just opened.  If the
579    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
580    this either indicates a non-standard file (i.e. a pipe or socket or
581    some such thing) or that we don't know the filename.  If the fd is
582    already open, then we're probably doing a dup2() to an existing fd,
583    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)584 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
585                                          const HChar *pathname)
586 {
587    OpenFd *i;
588 
589    if (fd >= VG_(fd_hard_limit))
590       return;			/* Valgrind internal */
591 
592    /* Check to see if this fd is already open. */
593    i = allocated_fds;
594    while (i) {
595       if (i->fd == fd) {
596          if (i->pathname) VG_(free)(i->pathname);
597          break;
598       }
599       i = i->next;
600    }
601 
602    /* Not already one: allocate an OpenFd */
603    if (i == NULL) {
604       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
605 
606       i->prev = NULL;
607       i->next = allocated_fds;
608       if(allocated_fds) allocated_fds->prev = i;
609       allocated_fds = i;
610       fd_count++;
611    }
612 
613    i->fd = fd;
614    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
615    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
616 }
617 
618 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)619 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
620 {
621    const HChar* buf;
622    const HChar* name;
623    if (VG_(resolve_filename)(fd, &buf))
624       name = buf;
625    else
626       name = NULL;
627 
628    ML_(record_fd_open_with_given_name)(tid, fd, name);
629 }
630 
631 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)632 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
633 {
634    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
635 }
636 
637 static
unix_to_name(struct vki_sockaddr_un * sa,UInt len,HChar * name)638 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
639 {
640    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
641       VG_(sprintf)(name, "<unknown>");
642    } else {
643       VG_(sprintf)(name, "%s", sa->sun_path);
644    }
645 
646    return name;
647 }
648 
649 static
inet_to_name(struct vki_sockaddr_in * sa,UInt len,HChar * name)650 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
651 {
652    if (sa == NULL || len == 0) {
653       VG_(sprintf)(name, "<unknown>");
654    } else if (sa->sin_port == 0) {
655       VG_(sprintf)(name, "<unbound>");
656    } else {
657       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
658       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
659                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
660                    (addr>>8) & 0xFF, addr & 0xFF,
661                    VG_(ntohs)(sa->sin_port));
662    }
663 
664    return name;
665 }
666 
667 static
inet6_format(HChar * s,const UChar ip[16])668 void inet6_format(HChar *s, const UChar ip[16])
669 {
670    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
671 
672    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
673       const struct vki_in_addr *sin_addr =
674           (const struct vki_in_addr *)(ip + 12);
675       UInt addr = VG_(ntohl)(sin_addr->s_addr);
676 
677       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
678                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
679                    (addr>>8) & 0xFF, addr & 0xFF);
680    } else {
681       Bool compressing = False;
682       Bool compressed = False;
683       Int len = 0;
684       Int i;
685 
686       for (i = 0; i < 16; i += 2) {
687          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
688          if (word == 0 && !compressed) {
689             compressing = True;
690          } else {
691             if (compressing) {
692                compressing = False;
693                compressed = True;
694                s[len++] = ':';
695             }
696             if (i > 0) {
697                s[len++] = ':';
698             }
699             len += VG_(sprintf)(s + len, "%x", word);
700          }
701       }
702 
703       if (compressing) {
704          s[len++] = ':';
705          s[len++] = ':';
706       }
707 
708       s[len++] = 0;
709    }
710 
711    return;
712 }
713 
714 static
inet6_to_name(struct vki_sockaddr_in6 * sa,UInt len,HChar * name)715 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
716 {
717    if (sa == NULL || len == 0) {
718       VG_(sprintf)(name, "<unknown>");
719    } else if (sa->sin6_port == 0) {
720       VG_(sprintf)(name, "<unbound>");
721    } else {
722       HChar addr[100];    // large enough
723       inet6_format(addr, (void *)&(sa->sin6_addr));
724       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
725    }
726 
727    return name;
728 }
729 
730 /*
731  * Try get some details about a socket.
732  */
733 static void
getsockdetails(Int fd)734 getsockdetails(Int fd)
735 {
736    union u {
737       struct vki_sockaddr a;
738       struct vki_sockaddr_in in;
739       struct vki_sockaddr_in6 in6;
740       struct vki_sockaddr_un un;
741    } laddr;
742    Int llen;
743 
744    llen = sizeof(laddr);
745    VG_(memset)(&laddr, 0, llen);
746 
747    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
748       switch(laddr.a.sa_family) {
749       case VKI_AF_INET: {
750          HChar lname[32];   // large enough
751          HChar pname[32];   // large enough
752          struct vki_sockaddr_in paddr;
753          Int plen = sizeof(struct vki_sockaddr_in);
754 
755          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
756             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
757                          inet_to_name(&(laddr.in), llen, lname),
758                          inet_to_name(&paddr, plen, pname));
759          } else {
760             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
761                          fd, inet_to_name(&(laddr.in), llen, lname));
762          }
763          return;
764          }
765       case VKI_AF_INET6: {
766          HChar lname[128];  // large enough
767          HChar pname[128];  // large enough
768          struct vki_sockaddr_in6 paddr;
769          Int plen = sizeof(struct vki_sockaddr_in6);
770 
771          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
772             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
773                          inet6_to_name(&(laddr.in6), llen, lname),
774                          inet6_to_name(&paddr, plen, pname));
775          } else {
776             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
777                          fd, inet6_to_name(&(laddr.in6), llen, lname));
778          }
779          return;
780          }
781       case VKI_AF_UNIX: {
782          static char lname[256];
783          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
784                       unix_to_name(&(laddr.un), llen, lname));
785          return;
786          }
787       default:
788          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
789                       laddr.a.sa_family, fd);
790          return;
791       }
792    }
793 
794    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
795 }
796 
797 
798 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)799 void VG_(show_open_fds) (const HChar* when)
800 {
801    OpenFd *i = allocated_fds;
802 
803    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
804 
805    while (i) {
806       if (i->pathname) {
807          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
808                       i->pathname);
809       } else {
810          Int val;
811          Int len = sizeof(val);
812 
813          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
814              == -1) {
815             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
816          } else {
817             getsockdetails(i->fd);
818          }
819       }
820 
821       if(i->where) {
822          VG_(pp_ExeContext)(i->where);
823          VG_(message)(Vg_UserMsg, "\n");
824       } else {
825          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
826          VG_(message)(Vg_UserMsg, "\n");
827       }
828 
829       i = i->next;
830    }
831 
832    VG_(message)(Vg_UserMsg, "\n");
833 }
834 
835 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
836    have /proc support compiled in, or a non-Linux kernel), then we need to
837    find out what file descriptors we inherited from our parent process the
838    hard way - by checking each fd in turn. */
839 static
init_preopened_fds_without_proc_self_fd(void)840 void init_preopened_fds_without_proc_self_fd(void)
841 {
842    struct vki_rlimit lim;
843    UInt count;
844    Int i;
845 
846    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
847       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
848          an arbitrarily high number.  1024 happens to be the limit in
849          the 2.4 Linux kernels. */
850       count = 1024;
851    } else {
852       count = lim.rlim_cur;
853    }
854 
855    for (i = 0; i < count; i++)
856       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
857          ML_(record_fd_open_named)(-1, i);
858 }
859 
860 /* Initialize the list of open file descriptors with the file descriptors
861    we inherited from out parent process. */
862 
VG_(init_preopened_fds)863 void VG_(init_preopened_fds)(void)
864 {
865 // DDD: should probably use HAVE_PROC here or similar, instead.
866 #if defined(VGO_linux)
867    Int ret;
868    struct vki_dirent64 d;
869    SysRes f;
870 
871    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
872    if (sr_isError(f)) {
873       init_preopened_fds_without_proc_self_fd();
874       return;
875    }
876 
877    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
878       if (ret == -1)
879          goto out;
880 
881       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
882          HChar* s;
883          Int fno = VG_(strtoll10)(d.d_name, &s);
884          if (*s == '\0') {
885             if (fno != sr_Res(f))
886                if (VG_(clo_track_fds))
887                   ML_(record_fd_open_named)(-1, fno);
888          } else {
889             VG_(message)(Vg_DebugMsg,
890                "Warning: invalid file name in /proc/self/fd: %s\n",
891                d.d_name);
892          }
893       }
894 
895       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
896    }
897 
898   out:
899    VG_(close)(sr_Res(f));
900 
901 #elif defined(VGO_darwin)
902    init_preopened_fds_without_proc_self_fd();
903 
904 #else
905 #  error Unknown OS
906 #endif
907 }
908 
909 static
strdupcat(const HChar * cc,const HChar * s1,const HChar * s2,ArenaId aid)910 HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
911                    ArenaId aid )
912 {
913    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
914    HChar *result = VG_(arena_malloc) ( aid, cc, len );
915    VG_(strcpy) ( result, s1 );
916    VG_(strcat) ( result, s2 );
917    return result;
918 }
919 
920 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)921 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
922                             const HChar *msg, Addr base, SizeT size )
923 {
924    HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
925                                "sendmsg", msg, VG_AR_CORE );
926    PRE_MEM_READ( outmsg, base, size );
927    VG_(free) ( outmsg );
928 }
929 
930 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)931 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
932                              const HChar *msg, Addr base, SizeT size )
933 {
934    HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
935                                "recvmsg", msg, VG_AR_CORE );
936    if ( read )
937       PRE_MEM_READ( outmsg, base, size );
938    else
939       PRE_MEM_WRITE( outmsg, base, size );
940    VG_(free) ( outmsg );
941 }
942 
943 static
post_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * fieldName,Addr base,SizeT size)944 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
945                               const HChar *fieldName, Addr base, SizeT size )
946 {
947    if ( !read )
948       POST_MEM_WRITE( base, size );
949 }
950 
951 static
msghdr_foreachfield(ThreadId tid,const HChar * name,struct vki_msghdr * msg,UInt length,void (* foreach_func)(ThreadId,Bool,const HChar *,Addr,SizeT),Bool rekv)952 void msghdr_foreachfield (
953         ThreadId tid,
954         const HChar *name,
955         struct vki_msghdr *msg,
956         UInt length,
957         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
958         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
959      )
960 {
961    HChar *fieldName;
962 
963    if ( !msg )
964       return;
965 
966    fieldName = VG_(malloc) ( "di.syswrap.mfef", VG_(strlen)(name) + 32 );
967 
968    VG_(sprintf) ( fieldName, "(%s)", name );
969 
970    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
971    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
972    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
973    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
974    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
975    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
976 
977    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
978       the field, but does write to it. */
979    if ( rekv )
980       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
981 
982    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
983         && msg->msg_name ) {
984       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
985       foreach_func ( tid, False, fieldName,
986                      (Addr)msg->msg_name, msg->msg_namelen );
987    }
988 
989    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
990         && msg->msg_iov ) {
991       struct vki_iovec *iov = msg->msg_iov;
992       UInt i;
993 
994       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
995 
996       foreach_func ( tid, True, fieldName,
997                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
998 
999       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
1000          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
1001          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
1002          foreach_func ( tid, False, fieldName,
1003                         (Addr)iov->iov_base, iov_len );
1004          length = length - iov_len;
1005       }
1006    }
1007 
1008    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
1009         && msg->msg_control )
1010    {
1011       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
1012       foreach_func ( tid, False, fieldName,
1013                      (Addr)msg->msg_control, msg->msg_controllen );
1014    }
1015 
1016    VG_(free) ( fieldName );
1017 }
1018 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)1019 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
1020 {
1021    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
1022 
1023    while (cm) {
1024       if (cm->cmsg_level == VKI_SOL_SOCKET &&
1025           cm->cmsg_type == VKI_SCM_RIGHTS ) {
1026          Int *fds = (Int *) VKI_CMSG_DATA(cm);
1027          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1028                          / sizeof(int);
1029          Int i;
1030 
1031          for (i = 0; i < fdc; i++)
1032             if(VG_(clo_track_fds))
1033                // XXX: must we check the range on these fds with
1034                //      ML_(fd_allowed)()?
1035                ML_(record_fd_open_named)(tid, fds[i]);
1036       }
1037 
1038       cm = VKI_CMSG_NXTHDR(msg, cm);
1039    }
1040 }
1041 
1042 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1043 static
pre_mem_read_sockaddr(ThreadId tid,const HChar * description,struct vki_sockaddr * sa,UInt salen)1044 void pre_mem_read_sockaddr ( ThreadId tid,
1045                              const HChar *description,
1046                              struct vki_sockaddr *sa, UInt salen )
1047 {
1048    HChar *outmsg;
1049    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
1050    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
1051    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1052 #  ifdef VKI_AF_BLUETOOTH
1053    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
1054 #  endif
1055 #  ifdef VKI_AF_NETLINK
1056    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
1057 #  endif
1058 
1059    /* NULL/zero-length sockaddrs are legal */
1060    if ( sa == NULL || salen == 0 ) return;
1061 
1062    outmsg = VG_(malloc) ( "di.syswrap.pmr_sockaddr.1",
1063                           VG_(strlen)( description ) + 30 );
1064 
1065    VG_(sprintf) ( outmsg, description, "sa_family" );
1066    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1067 
1068    switch (sa->sa_family) {
1069 
1070       case VKI_AF_UNIX:
1071          VG_(sprintf) ( outmsg, description, "sun_path" );
1072          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
1073          // GrP fixme max of sun_len-2? what about nul char?
1074          break;
1075 
1076       case VKI_AF_INET:
1077          VG_(sprintf) ( outmsg, description, "sin_port" );
1078          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1079          VG_(sprintf) ( outmsg, description, "sin_addr" );
1080          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1081          break;
1082 
1083       case VKI_AF_INET6:
1084          VG_(sprintf) ( outmsg, description, "sin6_port" );
1085          PRE_MEM_READ( outmsg,
1086             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1087          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1088          PRE_MEM_READ( outmsg,
1089             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1090          VG_(sprintf) ( outmsg, description, "sin6_addr" );
1091          PRE_MEM_READ( outmsg,
1092             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1093          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1094          PRE_MEM_READ( outmsg,
1095             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1096          break;
1097 
1098 #     ifdef VKI_AF_BLUETOOTH
1099       case VKI_AF_BLUETOOTH:
1100          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1101          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1102          VG_(sprintf) ( outmsg, description, "rc_channel" );
1103          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1104          break;
1105 #     endif
1106 
1107 #     ifdef VKI_AF_NETLINK
1108       case VKI_AF_NETLINK:
1109          VG_(sprintf)(outmsg, description, "nl_pid");
1110          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
1111          VG_(sprintf)(outmsg, description, "nl_groups");
1112          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
1113          break;
1114 #     endif
1115 
1116 #     ifdef VKI_AF_UNSPEC
1117       case VKI_AF_UNSPEC:
1118          break;
1119 #     endif
1120 
1121       default:
1122          /* No specific information about this address family.
1123             Let's just check the full data following the family.
1124             Note that this can give false positive if this (unknown)
1125             struct sockaddr_???? has padding bytes between its elements. */
1126          VG_(sprintf) ( outmsg, description, "sa_data" );
1127          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
1128                        salen -  sizeof(sa->sa_family));
1129          break;
1130    }
1131 
1132    VG_(free) ( outmsg );
1133 }
1134 
1135 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,const HChar * s)1136 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1137 {
1138    UInt* a_p = (UInt*)a;
1139    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1140    if (a_p == NULL)
1141       return 0;
1142    else
1143       return *a_p;
1144 }
1145 
ML_(buf_and_len_pre_check)1146 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1147                                   const HChar* buf_s, const HChar* buflen_s )
1148 {
1149    if (VG_(tdict).track_pre_mem_write) {
1150       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1151       if (buflen_in > 0) {
1152          VG_(tdict).track_pre_mem_write(
1153             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1154       }
1155    }
1156 }
1157 
ML_(buf_and_len_post_check)1158 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1159                                    Addr buf_p, Addr buflen_p, const HChar* s )
1160 {
1161    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1162       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1163       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1164          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1165       }
1166    }
1167 }
1168 
1169 /* ---------------------------------------------------------------------
1170    Data seg end, for brk()
1171    ------------------------------------------------------------------ */
1172 
1173 /*   +--------+------------+
1174      | anon   |    resvn   |
1175      +--------+------------+
1176 
1177      ^     ^  ^
1178      |     |  boundary is page aligned
1179      |     VG_(brk_limit) -- no alignment constraint
1180      VG_(brk_base) -- page aligned -- does not move
1181 
1182      Both the anon part and the reservation part are always at least
1183      one page.
1184 */
1185 
1186 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1187    NEWBRK, else return the current data segment end. */
1188 
do_brk(Addr newbrk,ThreadId tid)1189 static Addr do_brk ( Addr newbrk, ThreadId tid )
1190 {
1191    NSegment const* aseg;
1192    Addr newbrkP;
1193    SizeT delta;
1194    Bool debug = False;
1195 
1196    if (debug)
1197       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1198 		  VG_(brk_base), VG_(brk_limit), newbrk);
1199 
1200    if (0) VG_(am_show_nsegments)(0, "in_brk");
1201 
1202    if (newbrk < VG_(brk_base))
1203       /* Clearly impossible. */
1204       goto bad;
1205 
1206    if (newbrk < VG_(brk_limit)) {
1207       /* shrinking the data segment.  Be lazy and don't munmap the
1208          excess area. */
1209       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1210       vg_assert(seg);
1211 
1212       if (seg->hasT)
1213          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1214                                     "do_brk(shrink)" );
1215       /* Since we're being lazy and not unmapping pages, we have to
1216          zero out the area, so that if the area later comes back into
1217          circulation, it will be filled with zeroes, as if it really
1218          had been unmapped and later remapped.  Be a bit paranoid and
1219          try hard to ensure we're not going to segfault by doing the
1220          write - check both ends of the range are in the same segment
1221          and that segment is writable. */
1222       NSegment const * seg2;
1223 
1224       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
1225       vg_assert(seg2);
1226 
1227       if (seg == seg2 && seg->hasW)
1228          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1229 
1230       VG_(brk_limit) = newbrk;
1231       return newbrk;
1232    }
1233 
1234    /* otherwise we're expanding the brk segment. */
1235    if (VG_(brk_limit) > VG_(brk_base))
1236       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1237    else
1238       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1239 
1240    /* These should be assured by setup_client_dataseg in m_main. */
1241    vg_assert(aseg);
1242    vg_assert(aseg->kind == SkAnonC);
1243 
1244    if (newbrk <= aseg->end + 1) {
1245       /* still fits within the anon segment. */
1246       VG_(brk_limit) = newbrk;
1247       return newbrk;
1248    }
1249 
1250    newbrkP = VG_PGROUNDUP(newbrk);
1251    delta = newbrkP - (aseg->end + 1);
1252    vg_assert(delta > 0);
1253    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1254 
1255    Bool overflow;
1256    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
1257                                                           &overflow)) {
1258       if (overflow)
1259          VG_(umsg)("brk segment overflow in thread #%d: can't grow to %#lx\n",
1260                    tid, newbrkP);
1261       else
1262          VG_(umsg)("Cannot map memory to grow brk segment in thread #%d "
1263                    "to %#lx\n", tid, newbrkP);
1264       goto bad;
1265    }
1266 
1267    VG_(brk_limit) = newbrk;
1268    return newbrk;
1269 
1270   bad:
1271    return VG_(brk_limit);
1272 }
1273 
1274 
1275 /* ---------------------------------------------------------------------
1276    Vet file descriptors for sanity
1277    ------------------------------------------------------------------ */
1278 /*
1279 > - what does the "Bool soft" parameter mean?
1280 
1281 (Tom Hughes, 3 Oct 05):
1282 
1283 Whether or not to consider a file descriptor invalid if it is above
1284 the current soft limit.
1285 
1286 Basically if we are testing whether a newly created file descriptor is
1287 valid (in a post handler) then we set soft to true, and if we are
1288 testing whether a file descriptor that is about to be used (in a pre
1289 handler) is valid [viz, an already-existing fd] then we set it to false.
1290 
1291 The point is that if the (virtual) soft limit is lowered then any
1292 existing descriptors can still be read/written/closed etc (so long as
1293 they are below the valgrind reserved descriptors) but no new
1294 descriptors can be created above the new soft limit.
1295 
1296 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1297 */
1298 
1299 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1300 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1301                      Bool isNewFd)
1302 {
1303    Bool allowed = True;
1304 
1305    /* hard limits always apply */
1306    if (fd < 0 || fd >= VG_(fd_hard_limit))
1307       allowed = False;
1308 
1309    /* hijacking the output fds is never allowed */
1310    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1311       allowed = False;
1312 
1313    /* if creating a new fd (rather than using an existing one), the
1314       soft limit must also be observed */
1315    if (isNewFd && fd >= VG_(fd_soft_limit))
1316       allowed = False;
1317 
1318    /* this looks like it ought to be included, but causes problems: */
1319    /*
1320    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1321       allowed = False;
1322    */
1323    /* The difficulty is as follows: consider a program P which expects
1324       to be able to mess with (redirect) its own stderr (fd 2).
1325       Usually to deal with P we would issue command line flags to send
1326       logging somewhere other than stderr, so as not to disrupt P.
1327       The problem is that -d unilaterally hijacks stderr with no
1328       consultation with P.  And so, if this check is enabled, P will
1329       work OK normally but fail if -d is issued.
1330 
1331       Basically -d is a hack and you take your chances when using it.
1332       It's very useful for low level debugging -- particularly at
1333       startup -- and having its presence change the behaviour of the
1334       client is exactly what we don't want.  */
1335 
1336    /* croak? */
1337    if ((!allowed) && VG_(showing_core_errors)() ) {
1338       VG_(message)(Vg_UserMsg,
1339          "Warning: invalid file descriptor %d in syscall %s()\n",
1340          fd, syscallname);
1341       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1342 	 VG_(message)(Vg_UserMsg,
1343             "   Use --log-fd=<number> to select an alternative log fd.\n");
1344       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1345 	 VG_(message)(Vg_UserMsg,
1346             "   Use --xml-fd=<number> to select an alternative XML "
1347             "output fd.\n");
1348       // DDD: consider always printing this stack trace, it's useful.
1349       // Also consider also making this a proper core error, ie.
1350       // suppressible and all that.
1351       if (VG_(clo_verbosity) > 1) {
1352          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1353       }
1354    }
1355 
1356    return allowed;
1357 }
1358 
1359 
1360 /* ---------------------------------------------------------------------
1361    Deal with a bunch of socket-related syscalls
1362    ------------------------------------------------------------------ */
1363 
1364 /* ------ */
1365 
1366 void
ML_(generic_PRE_sys_socketpair)1367 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1368                                   UWord arg0, UWord arg1,
1369                                   UWord arg2, UWord arg3 )
1370 {
1371    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1372    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1373                   arg3, 2*sizeof(int) );
1374 }
1375 
1376 SysRes
ML_(generic_POST_sys_socketpair)1377 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1378                                    SysRes res,
1379                                    UWord arg0, UWord arg1,
1380                                    UWord arg2, UWord arg3 )
1381 {
1382    SysRes r = res;
1383    Int fd1 = ((Int*)arg3)[0];
1384    Int fd2 = ((Int*)arg3)[1];
1385    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1386    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1387    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1388        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1389       VG_(close)(fd1);
1390       VG_(close)(fd2);
1391       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1392    } else {
1393       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1394       if (VG_(clo_track_fds)) {
1395          ML_(record_fd_open_nameless)(tid, fd1);
1396          ML_(record_fd_open_nameless)(tid, fd2);
1397       }
1398    }
1399    return r;
1400 }
1401 
1402 /* ------ */
1403 
1404 SysRes
ML_(generic_POST_sys_socket)1405 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1406 {
1407    SysRes r = res;
1408    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1409    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1410       VG_(close)(sr_Res(res));
1411       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1412    } else {
1413       if (VG_(clo_track_fds))
1414          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1415    }
1416    return r;
1417 }
1418 
1419 /* ------ */
1420 
1421 void
ML_(generic_PRE_sys_bind)1422 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1423                             UWord arg0, UWord arg1, UWord arg2 )
1424 {
1425    /* int bind(int sockfd, struct sockaddr *my_addr,
1426                int addrlen); */
1427    pre_mem_read_sockaddr(
1428       tid, "socketcall.bind(my_addr.%s)",
1429       (struct vki_sockaddr *) arg1, arg2
1430    );
1431 }
1432 
1433 /* ------ */
1434 
1435 void
ML_(generic_PRE_sys_accept)1436 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1437                               UWord arg0, UWord arg1, UWord arg2 )
1438 {
1439    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1440    Addr addr_p     = arg1;
1441    Addr addrlen_p  = arg2;
1442    if (addr_p != (Addr)NULL)
1443       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1444                                    "socketcall.accept(addr)",
1445                                    "socketcall.accept(addrlen_in)" );
1446 }
1447 
1448 SysRes
ML_(generic_POST_sys_accept)1449 ML_(generic_POST_sys_accept) ( ThreadId tid,
1450                                SysRes res,
1451                                UWord arg0, UWord arg1, UWord arg2 )
1452 {
1453    SysRes r = res;
1454    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1455    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1456       VG_(close)(sr_Res(res));
1457       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1458    } else {
1459       Addr addr_p     = arg1;
1460       Addr addrlen_p  = arg2;
1461       if (addr_p != (Addr)NULL)
1462          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1463                                        "socketcall.accept(addrlen_out)" );
1464       if (VG_(clo_track_fds))
1465           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1466    }
1467    return r;
1468 }
1469 
1470 /* ------ */
1471 
1472 void
ML_(generic_PRE_sys_sendto)1473 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1474                               UWord arg0, UWord arg1, UWord arg2,
1475                               UWord arg3, UWord arg4, UWord arg5 )
1476 {
1477    /* int sendto(int s, const void *msg, int len,
1478                  unsigned int flags,
1479                  const struct sockaddr *to, int tolen); */
1480    PRE_MEM_READ( "socketcall.sendto(msg)",
1481                  arg1, /* msg */
1482                  arg2  /* len */ );
1483    pre_mem_read_sockaddr(
1484       tid, "socketcall.sendto(to.%s)",
1485       (struct vki_sockaddr *) arg4, arg5
1486    );
1487 }
1488 
1489 /* ------ */
1490 
1491 void
ML_(generic_PRE_sys_send)1492 ML_(generic_PRE_sys_send) ( ThreadId tid,
1493                             UWord arg0, UWord arg1, UWord arg2 )
1494 {
1495    /* int send(int s, const void *msg, size_t len, int flags); */
1496    PRE_MEM_READ( "socketcall.send(msg)",
1497                   arg1, /* msg */
1498                   arg2  /* len */ );
1499 
1500 }
1501 
1502 /* ------ */
1503 
1504 void
ML_(generic_PRE_sys_recvfrom)1505 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1506                                 UWord arg0, UWord arg1, UWord arg2,
1507                                 UWord arg3, UWord arg4, UWord arg5 )
1508 {
1509    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1510                    struct sockaddr *from, int *fromlen); */
1511    Addr buf_p      = arg1;
1512    Int  len        = arg2;
1513    Addr from_p     = arg4;
1514    Addr fromlen_p  = arg5;
1515    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1516    if (from_p != (Addr)NULL)
1517       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1518                                    "socketcall.recvfrom(from)",
1519                                    "socketcall.recvfrom(fromlen_in)" );
1520 }
1521 
1522 void
ML_(generic_POST_sys_recvfrom)1523 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1524                                  SysRes res,
1525                                  UWord arg0, UWord arg1, UWord arg2,
1526                                  UWord arg3, UWord arg4, UWord arg5 )
1527 {
1528    Addr buf_p      = arg1;
1529    Int  len        = arg2;
1530    Addr from_p     = arg4;
1531    Addr fromlen_p  = arg5;
1532 
1533    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1534    if (from_p != (Addr)NULL)
1535       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1536                                     "socketcall.recvfrom(fromlen_out)" );
1537    POST_MEM_WRITE( buf_p, len );
1538 }
1539 
1540 /* ------ */
1541 
1542 void
ML_(generic_PRE_sys_recv)1543 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1544                             UWord arg0, UWord arg1, UWord arg2 )
1545 {
1546    /* int recv(int s, void *buf, int len, unsigned int flags); */
1547    /* man 2 recv says:
1548       The  recv call is normally used only on a connected socket
1549       (see connect(2)) and is identical to recvfrom with a  NULL
1550       from parameter.
1551    */
1552    PRE_MEM_WRITE( "socketcall.recv(buf)",
1553                   arg1, /* buf */
1554                   arg2  /* len */ );
1555 }
1556 
1557 void
ML_(generic_POST_sys_recv)1558 ML_(generic_POST_sys_recv) ( ThreadId tid,
1559                              UWord res,
1560                              UWord arg0, UWord arg1, UWord arg2 )
1561 {
1562    if (res >= 0 && arg1 != 0) {
1563       POST_MEM_WRITE( arg1, /* buf */
1564                       arg2  /* len */ );
1565    }
1566 }
1567 
1568 /* ------ */
1569 
1570 void
ML_(generic_PRE_sys_connect)1571 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1572                                UWord arg0, UWord arg1, UWord arg2 )
1573 {
1574    /* int connect(int sockfd,
1575                   struct sockaddr *serv_addr, int addrlen ); */
1576    pre_mem_read_sockaddr( tid,
1577                           "socketcall.connect(serv_addr.%s)",
1578                           (struct vki_sockaddr *) arg1, arg2);
1579 }
1580 
1581 /* ------ */
1582 
1583 void
ML_(generic_PRE_sys_setsockopt)1584 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1585                                   UWord arg0, UWord arg1, UWord arg2,
1586                                   UWord arg3, UWord arg4 )
1587 {
1588    /* int setsockopt(int s, int level, int optname,
1589                      const void *optval, int optlen); */
1590    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1591                  arg3, /* optval */
1592                  arg4  /* optlen */ );
1593 }
1594 
1595 /* ------ */
1596 
1597 void
ML_(generic_PRE_sys_getsockname)1598 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1599                                    UWord arg0, UWord arg1, UWord arg2 )
1600 {
1601    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1602    Addr name_p     = arg1;
1603    Addr namelen_p  = arg2;
1604    /* Nb: name_p cannot be NULL */
1605    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1606                                 "socketcall.getsockname(name)",
1607                                 "socketcall.getsockname(namelen_in)" );
1608 }
1609 
1610 void
ML_(generic_POST_sys_getsockname)1611 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1612                                     SysRes res,
1613                                     UWord arg0, UWord arg1, UWord arg2 )
1614 {
1615    Addr name_p     = arg1;
1616    Addr namelen_p  = arg2;
1617    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1618    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1619                                  "socketcall.getsockname(namelen_out)" );
1620 }
1621 
1622 /* ------ */
1623 
1624 void
ML_(generic_PRE_sys_getpeername)1625 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1626                                    UWord arg0, UWord arg1, UWord arg2 )
1627 {
1628    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1629    Addr name_p     = arg1;
1630    Addr namelen_p  = arg2;
1631    /* Nb: name_p cannot be NULL */
1632    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1633                                 "socketcall.getpeername(name)",
1634                                 "socketcall.getpeername(namelen_in)" );
1635 }
1636 
1637 void
ML_(generic_POST_sys_getpeername)1638 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1639                                     SysRes res,
1640                                     UWord arg0, UWord arg1, UWord arg2 )
1641 {
1642    Addr name_p     = arg1;
1643    Addr namelen_p  = arg2;
1644    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1645    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1646                                  "socketcall.getpeername(namelen_out)" );
1647 }
1648 
1649 /* ------ */
1650 
1651 void
ML_(generic_PRE_sys_sendmsg)1652 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1653                                struct vki_msghdr *msg )
1654 {
1655    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1656 }
1657 
1658 /* ------ */
1659 
1660 void
ML_(generic_PRE_sys_recvmsg)1661 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1662                                struct vki_msghdr *msg )
1663 {
1664    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1665 }
1666 
1667 void
ML_(generic_POST_sys_recvmsg)1668 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1669                                 struct vki_msghdr *msg, UInt length )
1670 {
1671    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1672    check_cmsg_for_fds( tid, msg );
1673 }
1674 
1675 
1676 /* ---------------------------------------------------------------------
1677    Deal with a bunch of IPC related syscalls
1678    ------------------------------------------------------------------ */
1679 
1680 /* ------ */
1681 
1682 void
ML_(generic_PRE_sys_semop)1683 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1684                              UWord arg0, UWord arg1, UWord arg2 )
1685 {
1686    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1687    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1688 }
1689 
1690 /* ------ */
1691 
1692 void
ML_(generic_PRE_sys_semtimedop)1693 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1694                                   UWord arg0, UWord arg1,
1695                                   UWord arg2, UWord arg3 )
1696 {
1697    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1698                      struct timespec *timeout); */
1699    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1700    if (arg3 != 0)
1701       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1702 }
1703 
1704 /* ------ */
1705 
1706 static
get_sem_count(Int semid)1707 UInt get_sem_count( Int semid )
1708 {
1709    struct vki_semid_ds buf;
1710    union vki_semun arg;
1711    SysRes res;
1712 
1713    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1714       (experimental) otherwise complains that the use in the return
1715       statement below is uninitialised. */
1716    buf.sem_nsems = 0;
1717 
1718    arg.buf = &buf;
1719 
1720 #  ifdef __NR_semctl
1721    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1722 #  else
1723    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1724                           VKI_IPC_STAT, (UWord)&arg);
1725 #  endif
1726    if (sr_isError(res))
1727       return 0;
1728 
1729    return buf.sem_nsems;
1730 }
1731 
1732 void
ML_(generic_PRE_sys_semctl)1733 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1734                               UWord arg0, UWord arg1,
1735                               UWord arg2, UWord arg3 )
1736 {
1737    /* int semctl(int semid, int semnum, int cmd, ...); */
1738    union vki_semun arg = *(union vki_semun *)&arg3;
1739    UInt nsems;
1740    switch (arg2 /* cmd */) {
1741 #if defined(VKI_IPC_INFO)
1742    case VKI_IPC_INFO:
1743    case VKI_SEM_INFO:
1744    case VKI_IPC_INFO|VKI_IPC_64:
1745    case VKI_SEM_INFO|VKI_IPC_64:
1746       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1747                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1748       break;
1749 #endif
1750 
1751    case VKI_IPC_STAT:
1752 #if defined(VKI_SEM_STAT)
1753    case VKI_SEM_STAT:
1754 #endif
1755       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1756                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1757       break;
1758 
1759 #if defined(VKI_IPC_64)
1760    case VKI_IPC_STAT|VKI_IPC_64:
1761 #if defined(VKI_SEM_STAT)
1762    case VKI_SEM_STAT|VKI_IPC_64:
1763 #endif
1764       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1765                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1766       break;
1767 #endif
1768 
1769    case VKI_IPC_SET:
1770       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1771                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1772       break;
1773 
1774 #if defined(VKI_IPC_64)
1775    case VKI_IPC_SET|VKI_IPC_64:
1776       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1777                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1778       break;
1779 #endif
1780 
1781    case VKI_GETALL:
1782 #if defined(VKI_IPC_64)
1783    case VKI_GETALL|VKI_IPC_64:
1784 #endif
1785       nsems = get_sem_count( arg0 );
1786       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1787                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1788       break;
1789 
1790    case VKI_SETALL:
1791 #if defined(VKI_IPC_64)
1792    case VKI_SETALL|VKI_IPC_64:
1793 #endif
1794       nsems = get_sem_count( arg0 );
1795       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1796                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1797       break;
1798    }
1799 }
1800 
1801 void
ML_(generic_POST_sys_semctl)1802 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1803                                UWord res,
1804                                UWord arg0, UWord arg1,
1805                                UWord arg2, UWord arg3 )
1806 {
1807    union vki_semun arg = *(union vki_semun *)&arg3;
1808    UInt nsems;
1809    switch (arg2 /* cmd */) {
1810 #if defined(VKI_IPC_INFO)
1811    case VKI_IPC_INFO:
1812    case VKI_SEM_INFO:
1813    case VKI_IPC_INFO|VKI_IPC_64:
1814    case VKI_SEM_INFO|VKI_IPC_64:
1815       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1816       break;
1817 #endif
1818 
1819    case VKI_IPC_STAT:
1820 #if defined(VKI_SEM_STAT)
1821    case VKI_SEM_STAT:
1822 #endif
1823       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1824       break;
1825 
1826 #if defined(VKI_IPC_64)
1827    case VKI_IPC_STAT|VKI_IPC_64:
1828    case VKI_SEM_STAT|VKI_IPC_64:
1829       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1830       break;
1831 #endif
1832 
1833    case VKI_GETALL:
1834 #if defined(VKI_IPC_64)
1835    case VKI_GETALL|VKI_IPC_64:
1836 #endif
1837       nsems = get_sem_count( arg0 );
1838       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1839       break;
1840    }
1841 }
1842 
1843 /* ------ */
1844 
1845 /* ------ */
1846 
1847 static
get_shm_size(Int shmid)1848 SizeT get_shm_size ( Int shmid )
1849 {
1850 #ifdef __NR_shmctl
1851 #  ifdef VKI_IPC_64
1852    struct vki_shmid64_ds buf;
1853 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
1854      /* See bug 222545 comment 7 */
1855      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1856                                      VKI_IPC_STAT, (UWord)&buf);
1857 #    else
1858      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1859                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1860 #    endif
1861 #  else /* !def VKI_IPC_64 */
1862    struct vki_shmid_ds buf;
1863    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1864 #  endif /* def VKI_IPC_64 */
1865 #else
1866    struct vki_shmid_ds buf;
1867    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1868                                  VKI_IPC_STAT, 0, (UWord)&buf);
1869 #endif
1870    if (sr_isError(__res))
1871       return 0;
1872 
1873    return (SizeT) buf.shm_segsz;
1874 }
1875 
1876 UWord
ML_(generic_PRE_sys_shmat)1877 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1878                              UWord arg0, UWord arg1, UWord arg2 )
1879 {
1880    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1881    SizeT  segmentSize = get_shm_size ( arg0 );
1882    UWord tmp;
1883    Bool  ok;
1884    if (arg1 == 0) {
1885       /* arm-linux only: work around the fact that
1886          VG_(am_get_advisory_client_simple) produces something that is
1887          VKI_PAGE_SIZE aligned, whereas what we want is something
1888          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1889          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1890          then round the result up to the next VKI_SHMLBA boundary.
1891          See bug 222545 comment 15.  So far, arm-linux is the only
1892          platform where this is known to be necessary. */
1893       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1894       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1895          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1896       }
1897       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1898       if (ok) {
1899          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1900             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1901          } else {
1902             arg1 = tmp;
1903          }
1904       }
1905    }
1906    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1907       arg1 = 0;
1908    return arg1;
1909 }
1910 
1911 void
ML_(generic_POST_sys_shmat)1912 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1913                               UWord res,
1914                               UWord arg0, UWord arg1, UWord arg2 )
1915 {
1916    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1917    if ( segmentSize > 0 ) {
1918       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1919       Bool d;
1920 
1921       if (arg2 & VKI_SHM_RDONLY)
1922          prot &= ~VKI_PROT_WRITE;
1923       /* It isn't exactly correct to pass 0 for the fd and offset
1924          here.  The kernel seems to think the corresponding section
1925          does have dev/ino numbers:
1926 
1927          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1928 
1929          However there is no obvious way to find them.  In order to
1930          cope with the discrepancy, aspacem's sync checker omits the
1931          dev/ino correspondence check in cases where V does not know
1932          the dev/ino. */
1933       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1934 
1935       /* we don't distinguish whether it's read-only or
1936        * read-write -- it doesn't matter really. */
1937       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1938                               0/*di_handle*/ );
1939       if (d)
1940          VG_(discard_translations)( (Addr)res,
1941                                     (ULong)VG_PGROUNDUP(segmentSize),
1942                                     "ML_(generic_POST_sys_shmat)" );
1943    }
1944 }
1945 
1946 /* ------ */
1947 
1948 Bool
ML_(generic_PRE_sys_shmdt)1949 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1950 {
1951    /* int shmdt(const void *shmaddr); */
1952    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1953 }
1954 
1955 void
ML_(generic_POST_sys_shmdt)1956 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1957 {
1958    NSegment const* s = VG_(am_find_nsegment)(arg0);
1959 
1960    if (s != NULL) {
1961       Addr  s_start = s->start;
1962       SizeT s_len   = s->end+1 - s->start;
1963       Bool  d;
1964 
1965       vg_assert(s->kind == SkShmC);
1966       vg_assert(s->start == arg0);
1967 
1968       d = VG_(am_notify_munmap)(s_start, s_len);
1969       s = NULL; /* s is now invalid */
1970       VG_TRACK( die_mem_munmap, s_start, s_len );
1971       if (d)
1972          VG_(discard_translations)( s_start,
1973                                     (ULong)s_len,
1974                                     "ML_(generic_POST_sys_shmdt)" );
1975    }
1976 }
1977 /* ------ */
1978 
1979 void
ML_(generic_PRE_sys_shmctl)1980 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1981                               UWord arg0, UWord arg1, UWord arg2 )
1982 {
1983    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1984    switch (arg1 /* cmd */) {
1985 #if defined(VKI_IPC_INFO)
1986    case VKI_IPC_INFO:
1987       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1988                      arg2, sizeof(struct vki_shminfo) );
1989       break;
1990 #if defined(VKI_IPC_64)
1991    case VKI_IPC_INFO|VKI_IPC_64:
1992       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1993                      arg2, sizeof(struct vki_shminfo64) );
1994       break;
1995 #endif
1996 #endif
1997 
1998 #if defined(VKI_SHM_INFO)
1999    case VKI_SHM_INFO:
2000 #if defined(VKI_IPC_64)
2001    case VKI_SHM_INFO|VKI_IPC_64:
2002 #endif
2003       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
2004                      arg2, sizeof(struct vki_shm_info) );
2005       break;
2006 #endif
2007 
2008    case VKI_IPC_STAT:
2009 #if defined(VKI_SHM_STAT)
2010    case VKI_SHM_STAT:
2011 #endif
2012       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
2013                      arg2, sizeof(struct vki_shmid_ds) );
2014       break;
2015 
2016 #if defined(VKI_IPC_64)
2017    case VKI_IPC_STAT|VKI_IPC_64:
2018    case VKI_SHM_STAT|VKI_IPC_64:
2019       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
2020                      arg2, sizeof(struct vki_shmid64_ds) );
2021       break;
2022 #endif
2023 
2024    case VKI_IPC_SET:
2025       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2026                     arg2, sizeof(struct vki_shmid_ds) );
2027       break;
2028 
2029 #if defined(VKI_IPC_64)
2030    case VKI_IPC_SET|VKI_IPC_64:
2031       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2032                     arg2, sizeof(struct vki_shmid64_ds) );
2033       break;
2034 #endif
2035    }
2036 }
2037 
2038 void
ML_(generic_POST_sys_shmctl)2039 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
2040                                UWord res,
2041                                UWord arg0, UWord arg1, UWord arg2 )
2042 {
2043    switch (arg1 /* cmd */) {
2044 #if defined(VKI_IPC_INFO)
2045    case VKI_IPC_INFO:
2046       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2047       break;
2048    case VKI_IPC_INFO|VKI_IPC_64:
2049       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2050       break;
2051 #endif
2052 
2053 #if defined(VKI_SHM_INFO)
2054    case VKI_SHM_INFO:
2055    case VKI_SHM_INFO|VKI_IPC_64:
2056       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2057       break;
2058 #endif
2059 
2060    case VKI_IPC_STAT:
2061 #if defined(VKI_SHM_STAT)
2062    case VKI_SHM_STAT:
2063 #endif
2064       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2065       break;
2066 
2067 #if defined(VKI_IPC_64)
2068    case VKI_IPC_STAT|VKI_IPC_64:
2069    case VKI_SHM_STAT|VKI_IPC_64:
2070       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2071       break;
2072 #endif
2073 
2074 
2075    }
2076 }
2077 
2078 /* ---------------------------------------------------------------------
2079    Generic handler for mmap
2080    ------------------------------------------------------------------ */
2081 
2082 /*
2083  * Although mmap is specified by POSIX and the argument are generally
2084  * consistent across platforms the precise details of the low level
2085  * argument passing conventions differ. For example:
2086  *
2087  * - On x86-linux there is mmap (aka old_mmap) which takes the
2088  *   arguments in a memory block and the offset in bytes; and
2089  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2090  *   way and the offset in pages.
2091  *
2092  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2093  *   arguments in the normal way and the offset in bytes; and
2094  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2095  *   way and the offset in pages.
2096  *
2097  * - On amd64-linux everything is simple and there is just the one
2098  *   call, mmap (aka sys_mmap)  which takes the arguments in the
2099  *   normal way and the offset in bytes.
2100  *
2101  * - On s390x-linux there is mmap (aka old_mmap) which takes the
2102  *   arguments in a memory block and the offset in bytes. mmap2
2103  *   is also available (but not exported via unistd.h) with
2104  *   arguments in a memory block and the offset in pages.
2105  *
2106  * To cope with all this we provide a generic handler function here
2107  * and then each platform implements one or more system call handlers
2108  * which call this generic routine after extracting and normalising
2109  * the arguments.
2110  */
2111 
2112 SysRes
ML_(generic_PRE_sys_mmap)2113 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2114                             UWord arg1, UWord arg2, UWord arg3,
2115                             UWord arg4, UWord arg5, Off64T arg6 )
2116 {
2117    Addr       advised;
2118    SysRes     sres;
2119    MapRequest mreq;
2120    Bool       mreq_ok;
2121 
2122 #  if defined(VGO_darwin)
2123    // Nb: we can't use this on Darwin, it has races:
2124    // * needs to RETRY if advisory succeeds but map fails
2125    //   (could have been some other thread in a nonblocking call)
2126    // * needs to not use fixed-position mmap() on Darwin
2127    //   (mmap will cheerfully smash whatever's already there, which might
2128    //   be a new mapping from some other thread in a nonblocking call)
2129    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2130 #  endif
2131 
2132    if (arg2 == 0) {
2133       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2134          shall be established. */
2135       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2136    }
2137 
2138    if (!VG_IS_PAGE_ALIGNED(arg1)) {
2139       /* zap any misaligned addresses. */
2140       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2141          to fail.   Here, we catch them all. */
2142       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2143    }
2144 
2145    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2146       /* zap any misaligned offsets. */
2147       /* SuSV3 says: The off argument is constrained to be aligned and
2148          sized according to the value returned by sysconf() when
2149          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2150       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2151    }
2152 
2153 #  if defined(VKI_MAP_32BIT)
2154    /* We can't support MAP_32BIT (at least, not without significant
2155       complication), and it's royally unportable, so if the client
2156       asks for it, just fail it. */
2157    if (arg4 & VKI_MAP_32BIT) {
2158       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2159    }
2160 #  endif
2161 
2162    /* Figure out what kind of allocation constraints there are
2163       (fixed/hint/any), and ask aspacem what we should do. */
2164    mreq.start = arg1;
2165    mreq.len   = arg2;
2166    if (arg4 & VKI_MAP_FIXED) {
2167       mreq.rkind = MFixed;
2168    } else
2169    if (arg1 != 0) {
2170       mreq.rkind = MHint;
2171    } else {
2172       mreq.rkind = MAny;
2173    }
2174 
2175    /* Enquire ... */
2176    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2177    if (!mreq_ok) {
2178       /* Our request was bounced, so we'd better fail. */
2179       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2180    }
2181 
2182    /* Otherwise we're OK (so far).  Install aspacem's choice of
2183       address, and let the mmap go through.  */
2184    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2185                                     arg4 | VKI_MAP_FIXED,
2186                                     arg5, arg6);
2187 
2188    /* A refinement: it may be that the kernel refused aspacem's choice
2189       of address.  If we were originally asked for a hinted mapping,
2190       there is still a last chance: try again at any address.
2191       Hence: */
2192    if (mreq.rkind == MHint && sr_isError(sres)) {
2193       mreq.start = 0;
2194       mreq.len   = arg2;
2195       mreq.rkind = MAny;
2196       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2197       if (!mreq_ok) {
2198          /* Our request was bounced, so we'd better fail. */
2199          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2200       }
2201       /* and try again with the kernel */
2202       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2203                                        arg4 | VKI_MAP_FIXED,
2204                                        arg5, arg6);
2205    }
2206 
2207    /* Yet another refinement : sometimes valgrind chooses an address
2208       which is not acceptable by the kernel. This at least happens
2209       when mmap-ing huge pages, using the flag MAP_HUGETLB.
2210       valgrind aspacem does not know about huge pages, and modifying
2211       it to handle huge pages is not straightforward (e.g. need
2212       to understand special file system mount options).
2213       So, let's just redo an mmap, without giving any constraint to
2214       the kernel. If that succeeds, check with aspacem that the returned
2215       address is acceptable (i.e. is free).
2216       This will give a similar effect as if the user would have
2217       specified a MAP_FIXED at that address.
2218       The aspacem state will be correctly updated afterwards.
2219       We however cannot do this last refinement when the user asked
2220       for a fixed mapping, as the user asked a specific address. */
2221    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
2222       advised = 0;
2223       /* try mmap with NULL address and without VKI_MAP_FIXED
2224          to let the kernel decide. */
2225       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2226                                        arg4,
2227                                        arg5, arg6);
2228       if (!sr_isError(sres)) {
2229          vg_assert(VG_(am_covered_by_single_free_segment)((Addr)sr_Res(sres),
2230                                                            arg2));
2231       }
2232    }
2233 
2234    if (!sr_isError(sres)) {
2235       ULong di_handle;
2236       /* Notify aspacem. */
2237       notify_core_of_mmap(
2238          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2239          arg2, /* length */
2240          arg3, /* prot */
2241          arg4, /* the original flags value */
2242          arg5, /* fd */
2243          arg6  /* offset */
2244       );
2245       /* Load symbols? */
2246       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2247                                        False/*allow_SkFileV*/, (Int)arg5 );
2248       /* Notify the tool. */
2249       notify_tool_of_mmap(
2250          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2251          arg2, /* length */
2252          arg3, /* prot */
2253          di_handle /* so the tool can refer to the read debuginfo later,
2254                       if it wants. */
2255       );
2256    }
2257 
2258    /* Stay sane */
2259    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2260       vg_assert(sr_Res(sres) == arg1);
2261 
2262    return sres;
2263 }
2264 
2265 
2266 /* ---------------------------------------------------------------------
2267    The Main Entertainment ... syscall wrappers
2268    ------------------------------------------------------------------ */
2269 
2270 /* Note: the PRE() and POST() wrappers are for the actual functions
2271    implementing the system calls in the OS kernel.  These mostly have
2272    names like sys_write();  a few have names like old_mmap().  See the
2273    comment for ML_(syscall_table)[] for important info about the __NR_foo
2274    constants and their relationship to the sys_foo() functions.
2275 
2276    Some notes about names used for syscalls and args:
2277    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2278      ambiguity.
2279 
2280    - For error messages, we generally use a somewhat generic name
2281      for the syscall (eg. "write" rather than "sys_write").  This should be
2282      good enough for the average user to understand what is happening,
2283      without confusing them with names like "sys_write".
2284 
2285    - Also, for error messages the arg names are mostly taken from the man
2286      pages (even though many of those man pages are really for glibc
2287      functions of the same name), rather than from the OS kernel source,
2288      for the same reason -- a user presented with a "bogus foo(bar)" arg
2289      will most likely look at the "foo" man page to see which is the "bar"
2290      arg.
2291 
2292    Note that we use our own vki_* types.  The one exception is in
2293    PRE_REG_READn calls, where pointer types haven't been changed, because
2294    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2295    be visible.
2296 
2297    XXX: some of these are arch-specific, and should be factored out.
2298 */
2299 
2300 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2301 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2302 
2303 // Macros to support 64-bit syscall args split into two 32 bit values
2304 #if defined(VG_LITTLEENDIAN)
2305 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2306 #define MERGE64_FIRST(name) name##_low
2307 #define MERGE64_SECOND(name) name##_high
2308 #elif defined(VG_BIGENDIAN)
2309 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2310 #define MERGE64_FIRST(name) name##_high
2311 #define MERGE64_SECOND(name) name##_low
2312 #else
2313 #error Unknown endianness
2314 #endif
2315 
PRE(sys_exit)2316 PRE(sys_exit)
2317 {
2318    ThreadState* tst;
2319    /* simple; just make this thread exit */
2320    PRINT("exit( %ld )", ARG1);
2321    PRE_REG_READ1(void, "exit", int, status);
2322    tst = VG_(get_ThreadState)(tid);
2323    /* Set the thread's status to be exiting, then claim that the
2324       syscall succeeded. */
2325    tst->exitreason = VgSrc_ExitThread;
2326    tst->os_state.exitcode = ARG1;
2327    SET_STATUS_Success(0);
2328 }
2329 
PRE(sys_ni_syscall)2330 PRE(sys_ni_syscall)
2331 {
2332    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2333       VG_SYSNUM_STRING(SYSNO));
2334    PRE_REG_READ0(long, "ni_syscall");
2335    SET_STATUS_Failure( VKI_ENOSYS );
2336 }
2337 
PRE(sys_iopl)2338 PRE(sys_iopl)
2339 {
2340    PRINT("sys_iopl ( %ld )", ARG1);
2341    PRE_REG_READ1(long, "iopl", unsigned long, level);
2342 }
2343 
PRE(sys_fsync)2344 PRE(sys_fsync)
2345 {
2346    *flags |= SfMayBlock;
2347    PRINT("sys_fsync ( %ld )", ARG1);
2348    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2349 }
2350 
PRE(sys_fdatasync)2351 PRE(sys_fdatasync)
2352 {
2353    *flags |= SfMayBlock;
2354    PRINT("sys_fdatasync ( %ld )", ARG1);
2355    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2356 }
2357 
PRE(sys_msync)2358 PRE(sys_msync)
2359 {
2360    *flags |= SfMayBlock;
2361    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2362    PRE_REG_READ3(long, "msync",
2363                  unsigned long, start, vki_size_t, length, int, flags);
2364    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2365 }
2366 
2367 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2368 // versions of LiS (Linux Streams).  They are not part of the kernel.
2369 // Therefore, we have to provide this type ourself, rather than getting it
2370 // from the kernel sources.
2371 struct vki_pmsg_strbuf {
2372    int     maxlen;         /* no. of bytes in buffer */
2373    int     len;            /* no. of bytes returned */
2374    vki_caddr_t buf;        /* pointer to data */
2375 };
PRE(sys_getpmsg)2376 PRE(sys_getpmsg)
2377 {
2378    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2379    struct vki_pmsg_strbuf *ctrl;
2380    struct vki_pmsg_strbuf *data;
2381    *flags |= SfMayBlock;
2382    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2383    PRE_REG_READ5(int, "getpmsg",
2384                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2385                  int *, bandp, int *, flagsp);
2386    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2387    data = (struct vki_pmsg_strbuf *)ARG3;
2388    if (ctrl && ctrl->maxlen > 0)
2389       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2390    if (data && data->maxlen > 0)
2391       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2392    if (ARG4)
2393       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2394    if (ARG5)
2395       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2396 }
POST(sys_getpmsg)2397 POST(sys_getpmsg)
2398 {
2399    struct vki_pmsg_strbuf *ctrl;
2400    struct vki_pmsg_strbuf *data;
2401    vg_assert(SUCCESS);
2402    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2403    data = (struct vki_pmsg_strbuf *)ARG3;
2404    if (RES == 0 && ctrl && ctrl->len > 0) {
2405       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2406    }
2407    if (RES == 0 && data && data->len > 0) {
2408       POST_MEM_WRITE( (Addr)data->buf, data->len);
2409    }
2410 }
2411 
PRE(sys_putpmsg)2412 PRE(sys_putpmsg)
2413 {
2414    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2415    struct vki_pmsg_strbuf *ctrl;
2416    struct vki_pmsg_strbuf *data;
2417    *flags |= SfMayBlock;
2418    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2419    PRE_REG_READ5(int, "putpmsg",
2420                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2421                  int, band, int, flags);
2422    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2423    data = (struct vki_pmsg_strbuf *)ARG3;
2424    if (ctrl && ctrl->len > 0)
2425       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2426    if (data && data->len > 0)
2427       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2428 }
2429 
PRE(sys_getitimer)2430 PRE(sys_getitimer)
2431 {
2432    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2433    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2434    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2435 
2436    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2437    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2438 }
2439 
POST(sys_getitimer)2440 POST(sys_getitimer)
2441 {
2442    if (ARG2 != (Addr)NULL) {
2443       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2444       POST_timeval_WRITE( &(value->it_interval) );
2445       POST_timeval_WRITE( &(value->it_value) );
2446    }
2447 }
2448 
PRE(sys_setitimer)2449 PRE(sys_setitimer)
2450 {
2451    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2452    PRE_REG_READ3(long, "setitimer",
2453                  int, which,
2454                  struct itimerval *, value, struct itimerval *, ovalue);
2455    if (ARG2 != (Addr)NULL) {
2456       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2457       PRE_timeval_READ( "setitimer(&value->it_interval)",
2458                          &(value->it_interval));
2459       PRE_timeval_READ( "setitimer(&value->it_value)",
2460                          &(value->it_value));
2461    }
2462    if (ARG3 != (Addr)NULL) {
2463       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2464       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2465                          &(ovalue->it_interval));
2466       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2467                          &(ovalue->it_value));
2468    }
2469 }
2470 
POST(sys_setitimer)2471 POST(sys_setitimer)
2472 {
2473    if (ARG3 != (Addr)NULL) {
2474       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2475       POST_timeval_WRITE( &(ovalue->it_interval) );
2476       POST_timeval_WRITE( &(ovalue->it_value) );
2477    }
2478 }
2479 
PRE(sys_chroot)2480 PRE(sys_chroot)
2481 {
2482    PRINT("sys_chroot ( %#lx )", ARG1);
2483    PRE_REG_READ1(long, "chroot", const char *, path);
2484    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2485 }
2486 
PRE(sys_madvise)2487 PRE(sys_madvise)
2488 {
2489    *flags |= SfMayBlock;
2490    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2491    PRE_REG_READ3(long, "madvise",
2492                  unsigned long, start, vki_size_t, length, int, advice);
2493 }
2494 
2495 #if HAVE_MREMAP
PRE(sys_mremap)2496 PRE(sys_mremap)
2497 {
2498    // Nb: this is different to the glibc version described in the man pages,
2499    // which lacks the fifth 'new_address' argument.
2500    if (ARG4 & VKI_MREMAP_FIXED) {
2501       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2502             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2503       PRE_REG_READ5(unsigned long, "mremap",
2504                     unsigned long, old_addr, unsigned long, old_size,
2505                     unsigned long, new_size, unsigned long, flags,
2506                     unsigned long, new_addr);
2507    } else {
2508       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2509             ARG1, (ULong)ARG2, ARG3, ARG4);
2510       PRE_REG_READ4(unsigned long, "mremap",
2511                     unsigned long, old_addr, unsigned long, old_size,
2512                     unsigned long, new_size, unsigned long, flags);
2513    }
2514    SET_STATUS_from_SysRes(
2515       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2516    );
2517 }
2518 #endif /* HAVE_MREMAP */
2519 
PRE(sys_nice)2520 PRE(sys_nice)
2521 {
2522    PRINT("sys_nice ( %ld )", ARG1);
2523    PRE_REG_READ1(long, "nice", int, inc);
2524 }
2525 
PRE(sys_mlock)2526 PRE(sys_mlock)
2527 {
2528    *flags |= SfMayBlock;
2529    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2530    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2531 }
2532 
PRE(sys_munlock)2533 PRE(sys_munlock)
2534 {
2535    *flags |= SfMayBlock;
2536    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2537    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2538 }
2539 
PRE(sys_mlockall)2540 PRE(sys_mlockall)
2541 {
2542    *flags |= SfMayBlock;
2543    PRINT("sys_mlockall ( %lx )", ARG1);
2544    PRE_REG_READ1(long, "mlockall", int, flags);
2545 }
2546 
PRE(sys_setpriority)2547 PRE(sys_setpriority)
2548 {
2549    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2550    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2551 }
2552 
PRE(sys_getpriority)2553 PRE(sys_getpriority)
2554 {
2555    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2556    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2557 }
2558 
PRE(sys_pwrite64)2559 PRE(sys_pwrite64)
2560 {
2561    *flags |= SfMayBlock;
2562 #if VG_WORDSIZE == 4
2563    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2564          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2565    PRE_REG_READ5(ssize_t, "pwrite64",
2566                  unsigned int, fd, const char *, buf, vki_size_t, count,
2567                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2568 #elif VG_WORDSIZE == 8
2569    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2570          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2571    PRE_REG_READ4(ssize_t, "pwrite64",
2572                  unsigned int, fd, const char *, buf, vki_size_t, count,
2573                  Word, offset);
2574 #else
2575 #  error Unexpected word size
2576 #endif
2577    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2578 }
2579 
PRE(sys_sync)2580 PRE(sys_sync)
2581 {
2582    *flags |= SfMayBlock;
2583    PRINT("sys_sync ( )");
2584    PRE_REG_READ0(long, "sync");
2585 }
2586 
PRE(sys_fstatfs)2587 PRE(sys_fstatfs)
2588 {
2589    FUSE_COMPATIBLE_MAY_BLOCK();
2590    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2591    PRE_REG_READ2(long, "fstatfs",
2592                  unsigned int, fd, struct statfs *, buf);
2593    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2594 }
2595 
POST(sys_fstatfs)2596 POST(sys_fstatfs)
2597 {
2598    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2599 }
2600 
PRE(sys_fstatfs64)2601 PRE(sys_fstatfs64)
2602 {
2603    FUSE_COMPATIBLE_MAY_BLOCK();
2604    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2605    PRE_REG_READ3(long, "fstatfs64",
2606                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2607    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2608 }
POST(sys_fstatfs64)2609 POST(sys_fstatfs64)
2610 {
2611    POST_MEM_WRITE( ARG3, ARG2 );
2612 }
2613 
PRE(sys_getsid)2614 PRE(sys_getsid)
2615 {
2616    PRINT("sys_getsid ( %ld )", ARG1);
2617    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2618 }
2619 
PRE(sys_pread64)2620 PRE(sys_pread64)
2621 {
2622    *flags |= SfMayBlock;
2623 #if VG_WORDSIZE == 4
2624    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2625          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2626    PRE_REG_READ5(ssize_t, "pread64",
2627                  unsigned int, fd, char *, buf, vki_size_t, count,
2628                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2629 #elif VG_WORDSIZE == 8
2630    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2631          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2632    PRE_REG_READ4(ssize_t, "pread64",
2633                  unsigned int, fd, char *, buf, vki_size_t, count,
2634                  Word, offset);
2635 #else
2636 #  error Unexpected word size
2637 #endif
2638    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2639 }
POST(sys_pread64)2640 POST(sys_pread64)
2641 {
2642    vg_assert(SUCCESS);
2643    if (RES > 0) {
2644       POST_MEM_WRITE( ARG2, RES );
2645    }
2646 }
2647 
PRE(sys_mknod)2648 PRE(sys_mknod)
2649 {
2650    FUSE_COMPATIBLE_MAY_BLOCK();
2651    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2652    PRE_REG_READ3(long, "mknod",
2653                  const char *, pathname, int, mode, unsigned, dev);
2654    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2655 }
2656 
PRE(sys_flock)2657 PRE(sys_flock)
2658 {
2659    *flags |= SfMayBlock;
2660    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2661    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2662 }
2663 
2664 // Pre_read a char** argument.
pre_argv_envp(Addr a,ThreadId tid,const HChar * s1,const HChar * s2)2665 static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2)
2666 {
2667    while (True) {
2668       Addr a_deref;
2669       Addr* a_p = (Addr*)a;
2670       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2671       a_deref = *a_p;
2672       if (0 == a_deref)
2673          break;
2674       PRE_MEM_RASCIIZ( s2, a_deref );
2675       a += sizeof(char*);
2676    }
2677 }
2678 
i_am_the_only_thread(void)2679 static Bool i_am_the_only_thread ( void )
2680 {
2681    Int c = VG_(count_living_threads)();
2682    vg_assert(c >= 1); /* stay sane */
2683    return c == 1;
2684 }
2685 
2686 /* Wait until all other threads disappear. */
VG_(reap_threads)2687 void VG_(reap_threads)(ThreadId self)
2688 {
2689    while (!i_am_the_only_thread()) {
2690       /* Let other thread(s) run */
2691       VG_(vg_yield)();
2692       VG_(poll_signals)(self);
2693    }
2694    vg_assert(i_am_the_only_thread());
2695 }
2696 
2697 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2698 // but it seems to work nonetheless...
PRE(sys_execve)2699 PRE(sys_execve)
2700 {
2701    HChar*       path = NULL;       /* path to executable */
2702    HChar**      envp = NULL;
2703    HChar**      argv = NULL;
2704    HChar**      arg2copy;
2705    HChar*       launcher_basename = NULL;
2706    ThreadState* tst;
2707    Int          i, j, tot_args;
2708    SysRes       res;
2709    Bool         setuid_allowed, trace_this_child;
2710 
2711    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2712    PRE_REG_READ3(vki_off_t, "execve",
2713                  char *, filename, char **, argv, char **, envp);
2714    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2715    if (ARG2 != 0)
2716       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2717    if (ARG3 != 0)
2718       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2719 
2720    vg_assert(VG_(is_valid_tid)(tid));
2721    tst = VG_(get_ThreadState)(tid);
2722 
2723    /* Erk.  If the exec fails, then the following will have made a
2724       mess of things which makes it hard for us to continue.  The
2725       right thing to do is piece everything together again in
2726       POST(execve), but that's close to impossible.  Instead, we make
2727       an effort to check that the execve will work before actually
2728       doing it. */
2729 
2730    /* Check that the name at least begins in client-accessible storage. */
2731    if (ARG1 == 0 /* obviously bogus */
2732        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2733       SET_STATUS_Failure( VKI_EFAULT );
2734       return;
2735    }
2736 
2737    // debug-only printing
2738    if (0) {
2739       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2740       if (ARG2) {
2741          VG_(printf)("ARG2 = ");
2742          Int q;
2743          HChar** vec = (HChar**)ARG2;
2744          for (q = 0; vec[q]; q++)
2745             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2746          VG_(printf)("\n");
2747       } else {
2748          VG_(printf)("ARG2 = null\n");
2749       }
2750    }
2751 
2752    // Decide whether or not we want to follow along
2753    { // Make 'child_argv' be a pointer to the child's arg vector
2754      // (skipping the exe name)
2755      const HChar** child_argv = (const HChar**)ARG2;
2756      if (child_argv && child_argv[0] == NULL)
2757         child_argv = NULL;
2758      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2759    }
2760 
2761    // Do the important checks:  it is a file, is executable, permissions are
2762    // ok, etc.  We allow setuid executables to run only in the case when
2763    // we are not simulating them, that is, they to be run natively.
2764    setuid_allowed = trace_this_child  ? False  : True;
2765    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
2766    if (sr_isError(res)) {
2767       SET_STATUS_Failure( sr_Err(res) );
2768       return;
2769    }
2770 
2771    /* If we're tracing the child, and the launcher name looks bogus
2772       (possibly because launcher.c couldn't figure it out, see
2773       comments therein) then we have no option but to fail. */
2774    if (trace_this_child
2775        && (VG_(name_of_launcher) == NULL
2776            || VG_(name_of_launcher)[0] != '/')) {
2777       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2778       return;
2779    }
2780 
2781    /* After this point, we can't recover if the execve fails. */
2782    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
2783 
2784 
2785    // Terminate gdbserver if it is active.
2786    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2787       // If the child will not be traced, we need to terminate gdbserver
2788       // to cleanup the gdbserver resources (e.g. the FIFO files).
2789       // If child will be traced, we also terminate gdbserver: the new
2790       // Valgrind will start a fresh gdbserver after exec.
2791       VG_(gdbserver) (0);
2792    }
2793 
2794    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2795       this. (Really, nuke them all, since the new process will make
2796       its own new thread.) */
2797    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2798    VG_(reap_threads)(tid);
2799 
2800    // Set up the child's exe path.
2801    //
2802    if (trace_this_child) {
2803 
2804       // We want to exec the launcher.  Get its pre-remembered path.
2805       path = VG_(name_of_launcher);
2806       // VG_(name_of_launcher) should have been acquired by m_main at
2807       // startup.
2808       vg_assert(path);
2809 
2810       launcher_basename = VG_(strrchr)(path, '/');
2811       if (launcher_basename == NULL || launcher_basename[1] == 0) {
2812          launcher_basename = path;  // hmm, tres dubious
2813       } else {
2814          launcher_basename++;
2815       }
2816 
2817    } else {
2818       path = (HChar*)ARG1;
2819    }
2820 
2821    // Set up the child's environment.
2822    //
2823    // Remove the valgrind-specific stuff from the environment so the
2824    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2825    // This is done unconditionally, since if we are tracing the child,
2826    // the child valgrind will set up the appropriate client environment.
2827    // Nb: we make a copy of the environment before trying to mangle it
2828    // as it might be in read-only memory (this was bug #101881).
2829    //
2830    // Then, if tracing the child, set VALGRIND_LIB for it.
2831    //
2832    if (ARG3 == 0) {
2833       envp = NULL;
2834    } else {
2835       envp = VG_(env_clone)( (HChar**)ARG3 );
2836       if (envp == NULL) goto hosed;
2837       VG_(env_remove_valgrind_env_stuff)( envp );
2838    }
2839 
2840    if (trace_this_child) {
2841       // Set VALGRIND_LIB in ARG3 (the environment)
2842       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2843    }
2844 
2845    // Set up the child's args.  If not tracing it, they are
2846    // simply ARG2.  Otherwise, they are
2847    //
2848    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2849    //
2850    // except that the first VG_(args_for_valgrind_noexecpass) args
2851    // are omitted.
2852    //
2853    if (!trace_this_child) {
2854       argv = (HChar**)ARG2;
2855    } else {
2856       vg_assert( VG_(args_for_valgrind) );
2857       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2858       vg_assert( VG_(args_for_valgrind_noexecpass)
2859                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2860       /* how many args in total will there be? */
2861       // launcher basename
2862       tot_args = 1;
2863       // V's args
2864       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2865       tot_args -= VG_(args_for_valgrind_noexecpass);
2866       // name of client exe
2867       tot_args++;
2868       // args for client exe, skipping [0]
2869       arg2copy = (HChar**)ARG2;
2870       if (arg2copy && arg2copy[0]) {
2871          for (i = 1; arg2copy[i]; i++)
2872             tot_args++;
2873       }
2874       // allocate
2875       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2876                           (tot_args+1) * sizeof(HChar*) );
2877       // copy
2878       j = 0;
2879       argv[j++] = launcher_basename;
2880       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2881          if (i < VG_(args_for_valgrind_noexecpass))
2882             continue;
2883          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2884       }
2885       argv[j++] = (HChar*)ARG1;
2886       if (arg2copy && arg2copy[0])
2887          for (i = 1; arg2copy[i]; i++)
2888             argv[j++] = arg2copy[i];
2889       argv[j++] = NULL;
2890       // check
2891       vg_assert(j == tot_args+1);
2892    }
2893 
2894    /* restore the DATA rlimit for the child */
2895    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2896 
2897    /*
2898       Set the signal state up for exec.
2899 
2900       We need to set the real signal state to make sure the exec'd
2901       process gets SIG_IGN properly.
2902 
2903       Also set our real sigmask to match the client's sigmask so that
2904       the exec'd child will get the right mask.  First we need to
2905       clear out any pending signals so they they don't get delivered,
2906       which would confuse things.
2907 
2908       XXX This is a bug - the signals should remain pending, and be
2909       delivered to the new process after exec.  There's also a
2910       race-condition, since if someone delivers us a signal between
2911       the sigprocmask and the execve, we'll still get the signal. Oh
2912       well.
2913    */
2914    {
2915       vki_sigset_t allsigs;
2916       vki_siginfo_t info;
2917 
2918       /* What this loop does: it queries SCSS (the signal state that
2919          the client _thinks_ the kernel is in) by calling
2920          VG_(do_sys_sigaction), and modifies the real kernel signal
2921          state accordingly. */
2922       for (i = 1; i < VG_(max_signal); i++) {
2923          vki_sigaction_fromK_t sa_f;
2924          vki_sigaction_toK_t   sa_t;
2925          VG_(do_sys_sigaction)(i, NULL, &sa_f);
2926          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2927          if (sa_t.ksa_handler == VKI_SIG_IGN)
2928             VG_(sigaction)(i, &sa_t, NULL);
2929          else {
2930             sa_t.ksa_handler = VKI_SIG_DFL;
2931             VG_(sigaction)(i, &sa_t, NULL);
2932          }
2933       }
2934 
2935       VG_(sigfillset)(&allsigs);
2936       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2937          ;
2938 
2939       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2940    }
2941 
2942    if (0) {
2943       HChar **cpp;
2944       VG_(printf)("exec: %s\n", path);
2945       for (cpp = argv; cpp && *cpp; cpp++)
2946          VG_(printf)("argv: %s\n", *cpp);
2947       if (0)
2948          for (cpp = envp; cpp && *cpp; cpp++)
2949             VG_(printf)("env: %s\n", *cpp);
2950    }
2951 
2952    SET_STATUS_from_SysRes(
2953       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2954    );
2955 
2956    /* If we got here, then the execve failed.  We've already made way
2957       too much of a mess to continue, so we have to abort. */
2958   hosed:
2959    vg_assert(FAILURE);
2960    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2961                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2962    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2963                             "execve() failing, so I'm dying.\n");
2964    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2965                             "or work out how to recover.\n");
2966    VG_(exit)(101);
2967 }
2968 
PRE(sys_access)2969 PRE(sys_access)
2970 {
2971    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2972    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2973    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2974 }
2975 
PRE(sys_alarm)2976 PRE(sys_alarm)
2977 {
2978    PRINT("sys_alarm ( %ld )", ARG1);
2979    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2980 }
2981 
PRE(sys_brk)2982 PRE(sys_brk)
2983 {
2984    Addr brk_limit = VG_(brk_limit);
2985    Addr brk_new;
2986 
2987    /* libc   says: int   brk(void *end_data_segment);
2988       kernel says: void* brk(void* end_data_segment);  (more or less)
2989 
2990       libc returns 0 on success, and -1 (and sets errno) on failure.
2991       Nb: if you ask to shrink the dataseg end below what it
2992       currently is, that always succeeds, even if the dataseg end
2993       doesn't actually change (eg. brk(0)).  Unless it seg faults.
2994 
2995       Kernel returns the new dataseg end.  If the brk() failed, this
2996       will be unchanged from the old one.  That's why calling (kernel)
2997       brk(0) gives the current dataseg end (libc brk() just returns
2998       zero in that case).
2999 
3000       Both will seg fault if you shrink it back into a text segment.
3001    */
3002    PRINT("sys_brk ( %#lx )", ARG1);
3003    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
3004 
3005    brk_new = do_brk(ARG1, tid);
3006    SET_STATUS_Success( brk_new );
3007 
3008    if (brk_new == ARG1) {
3009       /* brk() succeeded */
3010       if (brk_new < brk_limit) {
3011          /* successfully shrunk the data segment. */
3012          VG_TRACK( die_mem_brk, (Addr)ARG1,
3013 		   brk_limit-ARG1 );
3014       } else
3015       if (brk_new > brk_limit) {
3016          /* successfully grew the data segment */
3017          VG_TRACK( new_mem_brk, brk_limit,
3018                    ARG1-brk_limit, tid );
3019       }
3020    } else {
3021       /* brk() failed */
3022       vg_assert(brk_limit == brk_new);
3023    }
3024 }
3025 
PRE(sys_chdir)3026 PRE(sys_chdir)
3027 {
3028    FUSE_COMPATIBLE_MAY_BLOCK();
3029    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3030    PRE_REG_READ1(long, "chdir", const char *, path);
3031    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
3032 }
3033 
PRE(sys_chmod)3034 PRE(sys_chmod)
3035 {
3036    FUSE_COMPATIBLE_MAY_BLOCK();
3037    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3038    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
3039    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
3040 }
3041 
PRE(sys_chown)3042 PRE(sys_chown)
3043 {
3044    FUSE_COMPATIBLE_MAY_BLOCK();
3045    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3046    PRE_REG_READ3(long, "chown",
3047                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3048    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
3049 }
3050 
PRE(sys_lchown)3051 PRE(sys_lchown)
3052 {
3053    FUSE_COMPATIBLE_MAY_BLOCK();
3054    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3055    PRE_REG_READ3(long, "lchown",
3056                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3057    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
3058 }
3059 
PRE(sys_close)3060 PRE(sys_close)
3061 {
3062    FUSE_COMPATIBLE_MAY_BLOCK();
3063    PRINT("sys_close ( %ld )", ARG1);
3064    PRE_REG_READ1(long, "close", unsigned int, fd);
3065 
3066    /* Detect and negate attempts by the client to close Valgrind's log fd */
3067    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
3068         /* If doing -d style logging (which is to fd=2), don't
3069            allow that to be closed either. */
3070         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3071       SET_STATUS_Failure( VKI_EBADF );
3072 }
3073 
POST(sys_close)3074 POST(sys_close)
3075 {
3076    if (VG_(clo_track_fds)) record_fd_close(ARG1);
3077 }
3078 
PRE(sys_dup)3079 PRE(sys_dup)
3080 {
3081    PRINT("sys_dup ( %ld )", ARG1);
3082    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3083 }
3084 
POST(sys_dup)3085 POST(sys_dup)
3086 {
3087    vg_assert(SUCCESS);
3088    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3089       VG_(close)(RES);
3090       SET_STATUS_Failure( VKI_EMFILE );
3091    } else {
3092       if (VG_(clo_track_fds))
3093          ML_(record_fd_open_named)(tid, RES);
3094    }
3095 }
3096 
PRE(sys_dup2)3097 PRE(sys_dup2)
3098 {
3099    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
3100    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3101    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3102       SET_STATUS_Failure( VKI_EBADF );
3103 }
3104 
POST(sys_dup2)3105 POST(sys_dup2)
3106 {
3107    vg_assert(SUCCESS);
3108    if (VG_(clo_track_fds))
3109       ML_(record_fd_open_named)(tid, RES);
3110 }
3111 
PRE(sys_fchdir)3112 PRE(sys_fchdir)
3113 {
3114    FUSE_COMPATIBLE_MAY_BLOCK();
3115    PRINT("sys_fchdir ( %ld )", ARG1);
3116    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3117 }
3118 
PRE(sys_fchown)3119 PRE(sys_fchown)
3120 {
3121    FUSE_COMPATIBLE_MAY_BLOCK();
3122    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
3123    PRE_REG_READ3(long, "fchown",
3124                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3125 }
3126 
PRE(sys_fchmod)3127 PRE(sys_fchmod)
3128 {
3129    FUSE_COMPATIBLE_MAY_BLOCK();
3130    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
3131    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3132 }
3133 
PRE(sys_newfstat)3134 PRE(sys_newfstat)
3135 {
3136    FUSE_COMPATIBLE_MAY_BLOCK();
3137    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
3138    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3139    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3140 }
3141 
POST(sys_newfstat)3142 POST(sys_newfstat)
3143 {
3144    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3145 }
3146 
3147 static vki_sigset_t fork_saved_mask;
3148 
3149 // In Linux, the sys_fork() function varies across architectures, but we
3150 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)3151 PRE(sys_fork)
3152 {
3153    Bool is_child;
3154    Int child_pid;
3155    vki_sigset_t mask;
3156 
3157    PRINT("sys_fork ( )");
3158    PRE_REG_READ0(long, "fork");
3159 
3160    /* Block all signals during fork, so that we can fix things up in
3161       the child without being interrupted. */
3162    VG_(sigfillset)(&mask);
3163    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3164 
3165    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3166 
3167    if (!SUCCESS) return;
3168 
3169 #if defined(VGO_linux)
3170    // RES is 0 for child, non-0 (the child's PID) for parent.
3171    is_child = ( RES == 0 ? True : False );
3172    child_pid = ( is_child ? -1 : RES );
3173 #elif defined(VGO_darwin)
3174    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3175    is_child = RESHI;
3176    child_pid = RES;
3177 #else
3178 #  error Unknown OS
3179 #endif
3180 
3181    VG_(do_atfork_pre)(tid);
3182 
3183    if (is_child) {
3184       VG_(do_atfork_child)(tid);
3185 
3186       /* restore signal mask */
3187       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3188 
3189       /* If --child-silent-after-fork=yes was specified, set the
3190          output file descriptors to 'impossible' values.  This is
3191          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3192          duly stops writing any further output. */
3193       if (VG_(clo_child_silent_after_fork)) {
3194          if (!VG_(log_output_sink).is_socket)
3195             VG_(log_output_sink).fd = -1;
3196          if (!VG_(xml_output_sink).is_socket)
3197             VG_(xml_output_sink).fd = -1;
3198       }
3199 
3200    } else {
3201       VG_(do_atfork_parent)(tid);
3202 
3203       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3204 
3205       /* restore signal mask */
3206       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3207    }
3208 }
3209 
PRE(sys_ftruncate)3210 PRE(sys_ftruncate)
3211 {
3212    *flags |= SfMayBlock;
3213    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3214    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3215 }
3216 
PRE(sys_truncate)3217 PRE(sys_truncate)
3218 {
3219    *flags |= SfMayBlock;
3220    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3221    PRE_REG_READ2(long, "truncate",
3222                  const char *, path, unsigned long, length);
3223    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3224 }
3225 
PRE(sys_ftruncate64)3226 PRE(sys_ftruncate64)
3227 {
3228    *flags |= SfMayBlock;
3229 #if VG_WORDSIZE == 4
3230    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3231    PRE_REG_READ3(long, "ftruncate64",
3232                  unsigned int, fd,
3233                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3234 #else
3235    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3236    PRE_REG_READ2(long, "ftruncate64",
3237                  unsigned int,fd, UWord,length);
3238 #endif
3239 }
3240 
PRE(sys_truncate64)3241 PRE(sys_truncate64)
3242 {
3243    *flags |= SfMayBlock;
3244 #if VG_WORDSIZE == 4
3245    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3246    PRE_REG_READ3(long, "truncate64",
3247                  const char *, path,
3248                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3249 #else
3250    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3251    PRE_REG_READ2(long, "truncate64",
3252                  const char *,path, UWord,length);
3253 #endif
3254    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3255 }
3256 
PRE(sys_getdents)3257 PRE(sys_getdents)
3258 {
3259    *flags |= SfMayBlock;
3260    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3261    PRE_REG_READ3(long, "getdents",
3262                  unsigned int, fd, struct vki_dirent *, dirp,
3263                  unsigned int, count);
3264    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3265 }
3266 
POST(sys_getdents)3267 POST(sys_getdents)
3268 {
3269    vg_assert(SUCCESS);
3270    if (RES > 0)
3271       POST_MEM_WRITE( ARG2, RES );
3272 }
3273 
PRE(sys_getdents64)3274 PRE(sys_getdents64)
3275 {
3276    *flags |= SfMayBlock;
3277    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3278    PRE_REG_READ3(long, "getdents64",
3279                  unsigned int, fd, struct vki_dirent64 *, dirp,
3280                  unsigned int, count);
3281    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3282 }
3283 
POST(sys_getdents64)3284 POST(sys_getdents64)
3285 {
3286    vg_assert(SUCCESS);
3287    if (RES > 0)
3288       POST_MEM_WRITE( ARG2, RES );
3289 }
3290 
PRE(sys_getgroups)3291 PRE(sys_getgroups)
3292 {
3293    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3294    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3295    if (ARG1 > 0)
3296       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3297 }
3298 
POST(sys_getgroups)3299 POST(sys_getgroups)
3300 {
3301    vg_assert(SUCCESS);
3302    if (ARG1 > 0 && RES > 0)
3303       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3304 }
3305 
PRE(sys_getcwd)3306 PRE(sys_getcwd)
3307 {
3308    // Comment from linux/fs/dcache.c:
3309    //   NOTE! The user-level library version returns a character pointer.
3310    //   The kernel system call just returns the length of the buffer filled
3311    //   (which includes the ending '\0' character), or a negative error
3312    //   value.
3313    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3314    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3315    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3316    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3317 }
3318 
POST(sys_getcwd)3319 POST(sys_getcwd)
3320 {
3321    vg_assert(SUCCESS);
3322    if (RES != (Addr)NULL)
3323       POST_MEM_WRITE( ARG1, RES );
3324 }
3325 
PRE(sys_geteuid)3326 PRE(sys_geteuid)
3327 {
3328    PRINT("sys_geteuid ( )");
3329    PRE_REG_READ0(long, "geteuid");
3330 }
3331 
PRE(sys_getegid)3332 PRE(sys_getegid)
3333 {
3334    PRINT("sys_getegid ( )");
3335    PRE_REG_READ0(long, "getegid");
3336 }
3337 
PRE(sys_getgid)3338 PRE(sys_getgid)
3339 {
3340    PRINT("sys_getgid ( )");
3341    PRE_REG_READ0(long, "getgid");
3342 }
3343 
PRE(sys_getpid)3344 PRE(sys_getpid)
3345 {
3346    PRINT("sys_getpid ()");
3347    PRE_REG_READ0(long, "getpid");
3348 }
3349 
PRE(sys_getpgid)3350 PRE(sys_getpgid)
3351 {
3352    PRINT("sys_getpgid ( %ld )", ARG1);
3353    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3354 }
3355 
PRE(sys_getpgrp)3356 PRE(sys_getpgrp)
3357 {
3358    PRINT("sys_getpgrp ()");
3359    PRE_REG_READ0(long, "getpgrp");
3360 }
3361 
PRE(sys_getppid)3362 PRE(sys_getppid)
3363 {
3364    PRINT("sys_getppid ()");
3365    PRE_REG_READ0(long, "getppid");
3366 }
3367 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3368 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3369 {
3370    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3371 
3372 #ifdef _RLIMIT_POSIX_FLAG
3373    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3374    // Unset it here to make the switch case below work correctly.
3375    a1 &= ~_RLIMIT_POSIX_FLAG;
3376 #endif
3377 
3378    switch (a1) {
3379    case VKI_RLIMIT_NOFILE:
3380       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3381       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3382       break;
3383 
3384    case VKI_RLIMIT_DATA:
3385       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3386       break;
3387 
3388    case VKI_RLIMIT_STACK:
3389       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3390       break;
3391    }
3392 }
3393 
PRE(sys_old_getrlimit)3394 PRE(sys_old_getrlimit)
3395 {
3396    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3397    PRE_REG_READ2(long, "old_getrlimit",
3398                  unsigned int, resource, struct rlimit *, rlim);
3399    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3400 }
3401 
POST(sys_old_getrlimit)3402 POST(sys_old_getrlimit)
3403 {
3404    common_post_getrlimit(tid, ARG1, ARG2);
3405 }
3406 
PRE(sys_getrlimit)3407 PRE(sys_getrlimit)
3408 {
3409    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3410    PRE_REG_READ2(long, "getrlimit",
3411                  unsigned int, resource, struct rlimit *, rlim);
3412    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3413 }
3414 
POST(sys_getrlimit)3415 POST(sys_getrlimit)
3416 {
3417    common_post_getrlimit(tid, ARG1, ARG2);
3418 }
3419 
PRE(sys_getrusage)3420 PRE(sys_getrusage)
3421 {
3422    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3423    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3424    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3425 }
3426 
POST(sys_getrusage)3427 POST(sys_getrusage)
3428 {
3429    vg_assert(SUCCESS);
3430    if (RES == 0)
3431       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3432 }
3433 
PRE(sys_gettimeofday)3434 PRE(sys_gettimeofday)
3435 {
3436    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3437    PRE_REG_READ2(long, "gettimeofday",
3438                  struct timeval *, tv, struct timezone *, tz);
3439    // GrP fixme does darwin write to *tz anymore?
3440    if (ARG1 != 0)
3441       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3442    if (ARG2 != 0)
3443       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3444 }
3445 
POST(sys_gettimeofday)3446 POST(sys_gettimeofday)
3447 {
3448    vg_assert(SUCCESS);
3449    if (RES == 0) {
3450       if (ARG1 != 0)
3451          POST_timeval_WRITE( ARG1 );
3452       if (ARG2 != 0)
3453 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3454    }
3455 }
3456 
PRE(sys_settimeofday)3457 PRE(sys_settimeofday)
3458 {
3459    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3460    PRE_REG_READ2(long, "settimeofday",
3461                  struct timeval *, tv, struct timezone *, tz);
3462    if (ARG1 != 0)
3463       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3464    if (ARG2 != 0) {
3465       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3466       /* maybe should warn if tz->tz_dsttime is non-zero? */
3467    }
3468 }
3469 
PRE(sys_getuid)3470 PRE(sys_getuid)
3471 {
3472    PRINT("sys_getuid ( )");
3473    PRE_REG_READ0(long, "getuid");
3474 }
3475 
ML_(PRE_unknown_ioctl)3476 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3477 {
3478    /* We don't have any specific information on it, so
3479       try to do something reasonable based on direction and
3480       size bits.  The encoding scheme is described in
3481       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3482 
3483       According to Simon Hausmann, _IOC_READ means the kernel
3484       writes a value to the ioctl value passed from the user
3485       space and the other way around with _IOC_WRITE. */
3486 
3487    UInt dir  = _VKI_IOC_DIR(request);
3488    UInt size = _VKI_IOC_SIZE(request);
3489    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
3490       /*
3491        * Be very lax about ioctl handling; the only
3492        * assumption is that the size is correct. Doesn't
3493        * require the full buffer to be initialized when
3494        * writing.  Without this, using some device
3495        * drivers with a large number of strange ioctl
3496        * commands becomes very tiresome.
3497        */
3498    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3499       static UWord unknown_ioctl[10];
3500       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
3501 
3502       if (moans > 0 && !VG_(clo_xml)) {
3503          /* Check if have not already moaned for this request. */
3504          UInt i;
3505          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
3506             if (unknown_ioctl[i] == request)
3507                break;
3508             if (unknown_ioctl[i] == 0) {
3509                unknown_ioctl[i] = request;
3510                moans--;
3511                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3512                          " with no size/direction hints.\n", request);
3513                VG_(umsg)("   This could cause spurious value errors to appear.\n");
3514                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3515                          "guidance on writing a proper wrapper.\n" );
3516                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3517                return;
3518             }
3519          }
3520       }
3521    } else {
3522       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3523       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3524       if ((dir & _VKI_IOC_WRITE) && size > 0)
3525          PRE_MEM_READ( "ioctl(generic)", arg, size);
3526       if ((dir & _VKI_IOC_READ) && size > 0)
3527          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3528    }
3529 }
3530 
ML_(POST_unknown_ioctl)3531 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3532 {
3533    /* We don't have any specific information on it, so
3534       try to do something reasonable based on direction and
3535       size bits.  The encoding scheme is described in
3536       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3537 
3538       According to Simon Hausmann, _IOC_READ means the kernel
3539       writes a value to the ioctl value passed from the user
3540       space and the other way around with _IOC_WRITE. */
3541 
3542    UInt dir  = _VKI_IOC_DIR(request);
3543    UInt size = _VKI_IOC_SIZE(request);
3544    if (size > 0 && (dir & _VKI_IOC_READ)
3545        && res == 0
3546        && arg != (Addr)NULL)
3547    {
3548       POST_MEM_WRITE(arg, size);
3549    }
3550 }
3551 
3552 /*
3553    If we're sending a SIGKILL to one of our own threads, then simulate
3554    it rather than really sending the signal, so that the target thread
3555    gets a chance to clean up.  Returns True if we did the killing (or
3556    no killing is necessary), and False if the caller should use the
3557    normal kill syscall.
3558 
3559    "pid" is any pid argument which can be passed to kill; group kills
3560    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3561    they'll most likely hit all the threads and we won't need to worry
3562    about cleanup.  In truth, we can't fully emulate these multicast
3563    kills.
3564 
3565    "tgid" is a thread group id.  If it is not -1, then the target
3566    thread must be in that thread group.
3567  */
ML_(do_sigkill)3568 Bool ML_(do_sigkill)(Int pid, Int tgid)
3569 {
3570    ThreadState *tst;
3571    ThreadId tid;
3572 
3573    if (pid <= 0)
3574       return False;
3575 
3576    tid = VG_(lwpid_to_vgtid)(pid);
3577    if (tid == VG_INVALID_THREADID)
3578       return False;		/* none of our threads */
3579 
3580    tst = VG_(get_ThreadState)(tid);
3581    if (tst == NULL || tst->status == VgTs_Empty)
3582       return False;		/* hm, shouldn't happen */
3583 
3584    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3585       return False;		/* not the right thread group */
3586 
3587    /* Check to see that the target isn't already exiting. */
3588    if (!VG_(is_exiting)(tid)) {
3589       if (VG_(clo_trace_signals))
3590 	 VG_(message)(Vg_DebugMsg,
3591                       "Thread %d being killed with SIGKILL\n",
3592                       tst->tid);
3593 
3594       tst->exitreason = VgSrc_FatalSig;
3595       tst->os_state.fatalsig = VKI_SIGKILL;
3596 
3597       if (!VG_(is_running_thread)(tid))
3598 	 VG_(get_thread_out_of_syscall)(tid);
3599    }
3600 
3601    return True;
3602 }
3603 
PRE(sys_kill)3604 PRE(sys_kill)
3605 {
3606    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3607    PRE_REG_READ2(long, "kill", int, pid, int, sig);
3608    if (!ML_(client_signal_OK)(ARG2)) {
3609       SET_STATUS_Failure( VKI_EINVAL );
3610       return;
3611    }
3612 
3613    /* If we're sending SIGKILL, check to see if the target is one of
3614       our threads and handle it specially. */
3615    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3616       SET_STATUS_Success(0);
3617    else
3618       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3619          affecting how posix-compliant the call is.  I guess it is
3620          harmless to pass the 3rd arg on other platforms; hence pass
3621          it on all. */
3622       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3623 
3624    if (VG_(clo_trace_signals))
3625       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3626 		   ARG2, ARG1);
3627 
3628    /* This kill might have given us a pending signal.  Ask for a check once
3629       the syscall is done. */
3630    *flags |= SfPollAfter;
3631 }
3632 
PRE(sys_link)3633 PRE(sys_link)
3634 {
3635    *flags |= SfMayBlock;
3636    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3637    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3638    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3639    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3640 }
3641 
PRE(sys_newlstat)3642 PRE(sys_newlstat)
3643 {
3644    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3645    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3646    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3647    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3648 }
3649 
POST(sys_newlstat)3650 POST(sys_newlstat)
3651 {
3652    vg_assert(SUCCESS);
3653    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3654 }
3655 
PRE(sys_mkdir)3656 PRE(sys_mkdir)
3657 {
3658    *flags |= SfMayBlock;
3659    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3660    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3661    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3662 }
3663 
PRE(sys_mprotect)3664 PRE(sys_mprotect)
3665 {
3666    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3667    PRE_REG_READ3(long, "mprotect",
3668                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3669 
3670    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3671       SET_STATUS_Failure( VKI_ENOMEM );
3672    }
3673 #if defined(VKI_PROT_GROWSDOWN)
3674    else
3675    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3676       /* Deal with mprotects on growable stack areas.
3677 
3678          The critical files to understand all this are mm/mprotect.c
3679          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3680          glibc.
3681 
3682          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3683          round the start/end address of mprotect to the start/end of
3684          the underlying vma and glibc uses that as an easy way to
3685          change the protection of the stack by calling mprotect on the
3686          last page of the stack with PROT_GROWSDOWN set.
3687 
3688          The sanity check provided by the kernel is that the vma must
3689          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3690       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3691       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3692       NSegment const *rseg;
3693 
3694       vg_assert(aseg);
3695 
3696       if (grows == VKI_PROT_GROWSDOWN) {
3697          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
3698          if (rseg &&
3699              rseg->kind == SkResvn &&
3700              rseg->smode == SmUpper &&
3701              rseg->end+1 == aseg->start) {
3702             Addr end = ARG1 + ARG2;
3703             ARG1 = aseg->start;
3704             ARG2 = end - aseg->start;
3705             ARG3 &= ~VKI_PROT_GROWSDOWN;
3706          } else {
3707             SET_STATUS_Failure( VKI_EINVAL );
3708          }
3709       } else if (grows == VKI_PROT_GROWSUP) {
3710          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
3711          if (rseg &&
3712              rseg->kind == SkResvn &&
3713              rseg->smode == SmLower &&
3714              aseg->end+1 == rseg->start) {
3715             ARG2 = aseg->end - ARG1 + 1;
3716             ARG3 &= ~VKI_PROT_GROWSUP;
3717          } else {
3718             SET_STATUS_Failure( VKI_EINVAL );
3719          }
3720       } else {
3721          /* both GROWSUP and GROWSDOWN */
3722          SET_STATUS_Failure( VKI_EINVAL );
3723       }
3724    }
3725 #endif   // defined(VKI_PROT_GROWSDOWN)
3726 }
3727 
POST(sys_mprotect)3728 POST(sys_mprotect)
3729 {
3730    Addr a    = ARG1;
3731    SizeT len = ARG2;
3732    Int  prot = ARG3;
3733 
3734    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3735 }
3736 
PRE(sys_munmap)3737 PRE(sys_munmap)
3738 {
3739    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3740    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3741    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3742 
3743    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3744       SET_STATUS_Failure( VKI_EINVAL );
3745 }
3746 
POST(sys_munmap)3747 POST(sys_munmap)
3748 {
3749    Addr  a   = ARG1;
3750    SizeT len = ARG2;
3751 
3752    ML_(notify_core_and_tool_of_munmap)( a, len );
3753 }
3754 
PRE(sys_mincore)3755 PRE(sys_mincore)
3756 {
3757    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3758    PRE_REG_READ3(long, "mincore",
3759                  unsigned long, start, vki_size_t, length,
3760                  unsigned char *, vec);
3761    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3762 }
POST(sys_mincore)3763 POST(sys_mincore)
3764 {
3765    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3766 }
3767 
PRE(sys_nanosleep)3768 PRE(sys_nanosleep)
3769 {
3770    *flags |= SfMayBlock|SfPostOnFail;
3771    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3772    PRE_REG_READ2(long, "nanosleep",
3773                  struct timespec *, req, struct timespec *, rem);
3774    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3775    if (ARG2 != 0)
3776       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3777 }
3778 
POST(sys_nanosleep)3779 POST(sys_nanosleep)
3780 {
3781    vg_assert(SUCCESS || FAILURE);
3782    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3783       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3784 }
3785 
PRE(sys_open)3786 PRE(sys_open)
3787 {
3788    if (ARG2 & VKI_O_CREAT) {
3789       // 3-arg version
3790       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3791       PRE_REG_READ3(long, "open",
3792                     const char *, filename, int, flags, int, mode);
3793    } else {
3794       // 2-arg version
3795       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3796       PRE_REG_READ2(long, "open",
3797                     const char *, filename, int, flags);
3798    }
3799    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3800 
3801 #if defined(VGO_linux)
3802    /* Handle the case where the open is of /proc/self/cmdline or
3803       /proc/<pid>/cmdline, and just give it a copy of the fd for the
3804       fake file we cooked up at startup (in m_main).  Also, seek the
3805       cloned fd back to the start. */
3806    {
3807       HChar  name[30];   // large enough
3808       HChar* arg1s = (HChar*) ARG1;
3809       SysRes sres;
3810 
3811       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3812       if (ML_(safe_to_deref)( arg1s, 1 ) &&
3813           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3814          )
3815       {
3816          sres = VG_(dup)( VG_(cl_cmdline_fd) );
3817          SET_STATUS_from_SysRes( sres );
3818          if (!sr_isError(sres)) {
3819             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3820             if (off < 0)
3821                SET_STATUS_Failure( VKI_EMFILE );
3822          }
3823          return;
3824       }
3825    }
3826 
3827    /* Handle the case where the open is of /proc/self/auxv or
3828       /proc/<pid>/auxv, and just give it a copy of the fd for the
3829       fake file we cooked up at startup (in m_main).  Also, seek the
3830       cloned fd back to the start. */
3831    {
3832       HChar  name[30];   // large enough
3833       HChar* arg1s = (HChar*) ARG1;
3834       SysRes sres;
3835 
3836       VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
3837       if (ML_(safe_to_deref)( arg1s, 1 ) &&
3838           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/auxv"))
3839          )
3840       {
3841          sres = VG_(dup)( VG_(cl_auxv_fd) );
3842          SET_STATUS_from_SysRes( sres );
3843          if (!sr_isError(sres)) {
3844             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3845             if (off < 0)
3846                SET_STATUS_Failure( VKI_EMFILE );
3847          }
3848          return;
3849       }
3850    }
3851 #endif // defined(VGO_linux)
3852 
3853    /* Otherwise handle normally */
3854    *flags |= SfMayBlock;
3855 }
3856 
POST(sys_open)3857 POST(sys_open)
3858 {
3859    vg_assert(SUCCESS);
3860    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3861       VG_(close)(RES);
3862       SET_STATUS_Failure( VKI_EMFILE );
3863    } else {
3864       if (VG_(clo_track_fds))
3865          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
3866    }
3867 }
3868 
PRE(sys_read)3869 PRE(sys_read)
3870 {
3871    *flags |= SfMayBlock;
3872    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3873    PRE_REG_READ3(ssize_t, "read",
3874                  unsigned int, fd, char *, buf, vki_size_t, count);
3875 
3876    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3877       SET_STATUS_Failure( VKI_EBADF );
3878    else
3879       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3880 }
3881 
POST(sys_read)3882 POST(sys_read)
3883 {
3884    vg_assert(SUCCESS);
3885    POST_MEM_WRITE( ARG2, RES );
3886 }
3887 
PRE(sys_write)3888 PRE(sys_write)
3889 {
3890    Bool ok;
3891    *flags |= SfMayBlock;
3892    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3893    PRE_REG_READ3(ssize_t, "write",
3894                  unsigned int, fd, const char *, buf, vki_size_t, count);
3895    /* check to see if it is allowed.  If not, try for an exemption from
3896       --sim-hints=enable-outer (used for self hosting). */
3897    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3898    if (!ok && ARG1 == 2/*stderr*/
3899            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
3900       ok = True;
3901    if (!ok)
3902       SET_STATUS_Failure( VKI_EBADF );
3903    else
3904       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3905 }
3906 
PRE(sys_creat)3907 PRE(sys_creat)
3908 {
3909    *flags |= SfMayBlock;
3910    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3911    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3912    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3913 }
3914 
POST(sys_creat)3915 POST(sys_creat)
3916 {
3917    vg_assert(SUCCESS);
3918    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3919       VG_(close)(RES);
3920       SET_STATUS_Failure( VKI_EMFILE );
3921    } else {
3922       if (VG_(clo_track_fds))
3923          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
3924    }
3925 }
3926 
PRE(sys_poll)3927 PRE(sys_poll)
3928 {
3929    /* struct pollfd {
3930         int fd;           -- file descriptor
3931         short events;     -- requested events
3932         short revents;    -- returned events
3933       };
3934       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3935    */
3936    UInt i;
3937    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3938    *flags |= SfMayBlock;
3939    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3940    PRE_REG_READ3(long, "poll",
3941                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3942 
3943    for (i = 0; i < ARG2; i++) {
3944       PRE_MEM_READ( "poll(ufds.fd)",
3945                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3946       PRE_MEM_READ( "poll(ufds.events)",
3947                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3948       PRE_MEM_WRITE( "poll(ufds.revents)",
3949                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3950    }
3951 }
3952 
POST(sys_poll)3953 POST(sys_poll)
3954 {
3955    if (RES >= 0) {
3956       UInt i;
3957       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3958       for (i = 0; i < ARG2; i++)
3959 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3960    }
3961 }
3962 
PRE(sys_readlink)3963 PRE(sys_readlink)
3964 {
3965    FUSE_COMPATIBLE_MAY_BLOCK();
3966    Word saved = SYSNO;
3967 
3968    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3969    PRE_REG_READ3(long, "readlink",
3970                  const char *, path, char *, buf, int, bufsiz);
3971    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3972    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3973 
3974    {
3975 #if defined(VGO_linux)
3976       /*
3977        * Handle the case where readlink is looking at /proc/self/exe or
3978        * /proc/<pid>/exe.
3979        */
3980       HChar  name[30];   // large enough
3981       HChar* arg1s = (HChar*) ARG1;
3982       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3983       if (ML_(safe_to_deref)(arg1s, 1) &&
3984           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3985          )
3986       {
3987          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3988          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3989                                                          ARG2, ARG3));
3990       } else
3991 #endif // defined(VGO_linux)
3992       {
3993          /* Normal case */
3994          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3995       }
3996    }
3997 
3998    if (SUCCESS && RES > 0)
3999       POST_MEM_WRITE( ARG2, RES );
4000 }
4001 
PRE(sys_readv)4002 PRE(sys_readv)
4003 {
4004    Int i;
4005    struct vki_iovec * vec;
4006    *flags |= SfMayBlock;
4007    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4008    PRE_REG_READ3(ssize_t, "readv",
4009                  unsigned long, fd, const struct iovec *, vector,
4010                  unsigned long, count);
4011    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
4012       SET_STATUS_Failure( VKI_EBADF );
4013    } else {
4014       if ((Int)ARG3 >= 0)
4015          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
4016 
4017       if (ARG2 != 0) {
4018          /* ToDo: don't do any of the following if the vector is invalid */
4019          vec = (struct vki_iovec *)ARG2;
4020          for (i = 0; i < (Int)ARG3; i++)
4021             PRE_MEM_WRITE( "readv(vector[...])",
4022                            (Addr)vec[i].iov_base, vec[i].iov_len );
4023       }
4024    }
4025 }
4026 
POST(sys_readv)4027 POST(sys_readv)
4028 {
4029    vg_assert(SUCCESS);
4030    if (RES > 0) {
4031       Int i;
4032       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
4033       Int remains = RES;
4034 
4035       /* RES holds the number of bytes read. */
4036       for (i = 0; i < (Int)ARG3; i++) {
4037 	 Int nReadThisBuf = vec[i].iov_len;
4038 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
4039 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
4040 	 remains -= nReadThisBuf;
4041 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
4042       }
4043    }
4044 }
4045 
PRE(sys_rename)4046 PRE(sys_rename)
4047 {
4048    FUSE_COMPATIBLE_MAY_BLOCK();
4049    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4050    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
4051    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
4052    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
4053 }
4054 
PRE(sys_rmdir)4055 PRE(sys_rmdir)
4056 {
4057    *flags |= SfMayBlock;
4058    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
4059    PRE_REG_READ1(long, "rmdir", const char *, pathname);
4060    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
4061 }
4062 
PRE(sys_select)4063 PRE(sys_select)
4064 {
4065    *flags |= SfMayBlock;
4066    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
4067    PRE_REG_READ5(long, "select",
4068                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
4069                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
4070    // XXX: this possibly understates how much memory is read.
4071    if (ARG2 != 0)
4072       PRE_MEM_READ( "select(readfds)",
4073 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
4074    if (ARG3 != 0)
4075       PRE_MEM_READ( "select(writefds)",
4076 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
4077    if (ARG4 != 0)
4078       PRE_MEM_READ( "select(exceptfds)",
4079 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
4080    if (ARG5 != 0)
4081       PRE_timeval_READ( "select(timeout)", ARG5 );
4082 }
4083 
PRE(sys_setgid)4084 PRE(sys_setgid)
4085 {
4086    PRINT("sys_setgid ( %ld )", ARG1);
4087    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4088 }
4089 
PRE(sys_setsid)4090 PRE(sys_setsid)
4091 {
4092    PRINT("sys_setsid ( )");
4093    PRE_REG_READ0(long, "setsid");
4094 }
4095 
PRE(sys_setgroups)4096 PRE(sys_setgroups)
4097 {
4098    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
4099    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4100    if (ARG1 > 0)
4101       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4102 }
4103 
PRE(sys_setpgid)4104 PRE(sys_setpgid)
4105 {
4106    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
4107    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4108 }
4109 
PRE(sys_setregid)4110 PRE(sys_setregid)
4111 {
4112    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
4113    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4114 }
4115 
PRE(sys_setreuid)4116 PRE(sys_setreuid)
4117 {
4118    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
4119    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4120 }
4121 
PRE(sys_setrlimit)4122 PRE(sys_setrlimit)
4123 {
4124    UWord arg1 = ARG1;
4125    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
4126    PRE_REG_READ2(long, "setrlimit",
4127                  unsigned int, resource, struct rlimit *, rlim);
4128    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4129 
4130 #ifdef _RLIMIT_POSIX_FLAG
4131    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4132    // Unset it here to make the if statements below work correctly.
4133    arg1 &= ~_RLIMIT_POSIX_FLAG;
4134 #endif
4135 
4136    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
4137                                     VKI_PROT_READ)) {
4138       SET_STATUS_Failure( VKI_EFAULT );
4139    }
4140    else if (((struct vki_rlimit *)ARG2)->rlim_cur
4141             > ((struct vki_rlimit *)ARG2)->rlim_max) {
4142       SET_STATUS_Failure( VKI_EINVAL );
4143    }
4144    else if (arg1 == VKI_RLIMIT_NOFILE) {
4145       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4146           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4147          SET_STATUS_Failure( VKI_EPERM );
4148       }
4149       else {
4150          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
4151          SET_STATUS_Success( 0 );
4152       }
4153    }
4154    else if (arg1 == VKI_RLIMIT_DATA) {
4155       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
4156           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
4157          SET_STATUS_Failure( VKI_EPERM );
4158       }
4159       else {
4160          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
4161          SET_STATUS_Success( 0 );
4162       }
4163    }
4164    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4165       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
4166           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
4167          SET_STATUS_Failure( VKI_EPERM );
4168       }
4169       else {
4170          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
4171          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
4172          SET_STATUS_Success( 0 );
4173       }
4174    }
4175 }
4176 
PRE(sys_setuid)4177 PRE(sys_setuid)
4178 {
4179    PRINT("sys_setuid ( %ld )", ARG1);
4180    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4181 }
4182 
PRE(sys_newstat)4183 PRE(sys_newstat)
4184 {
4185    FUSE_COMPATIBLE_MAY_BLOCK();
4186    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4187    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4188    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4189    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4190 }
4191 
POST(sys_newstat)4192 POST(sys_newstat)
4193 {
4194    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4195 }
4196 
PRE(sys_statfs)4197 PRE(sys_statfs)
4198 {
4199    FUSE_COMPATIBLE_MAY_BLOCK();
4200    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
4201    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4202    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4203    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4204 }
POST(sys_statfs)4205 POST(sys_statfs)
4206 {
4207    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4208 }
4209 
PRE(sys_statfs64)4210 PRE(sys_statfs64)
4211 {
4212    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4213    PRE_REG_READ3(long, "statfs64",
4214                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
4215    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4216    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4217 }
POST(sys_statfs64)4218 POST(sys_statfs64)
4219 {
4220    POST_MEM_WRITE( ARG3, ARG2 );
4221 }
4222 
PRE(sys_symlink)4223 PRE(sys_symlink)
4224 {
4225    *flags |= SfMayBlock;
4226    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4227    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4228    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4229    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4230 }
4231 
PRE(sys_time)4232 PRE(sys_time)
4233 {
4234    /* time_t time(time_t *t); */
4235    PRINT("sys_time ( %#lx )",ARG1);
4236    PRE_REG_READ1(long, "time", int *, t);
4237    if (ARG1 != 0) {
4238       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4239    }
4240 }
4241 
POST(sys_time)4242 POST(sys_time)
4243 {
4244    if (ARG1 != 0) {
4245       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4246    }
4247 }
4248 
PRE(sys_times)4249 PRE(sys_times)
4250 {
4251    PRINT("sys_times ( %#lx )", ARG1);
4252    PRE_REG_READ1(long, "times", struct tms *, buf);
4253    if (ARG1 != 0) {
4254       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4255    }
4256 }
4257 
POST(sys_times)4258 POST(sys_times)
4259 {
4260    if (ARG1 != 0) {
4261       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4262    }
4263 }
4264 
PRE(sys_umask)4265 PRE(sys_umask)
4266 {
4267    PRINT("sys_umask ( %ld )", ARG1);
4268    PRE_REG_READ1(long, "umask", int, mask);
4269 }
4270 
PRE(sys_unlink)4271 PRE(sys_unlink)
4272 {
4273    *flags |= SfMayBlock;
4274    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4275    PRE_REG_READ1(long, "unlink", const char *, pathname);
4276    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4277 }
4278 
PRE(sys_newuname)4279 PRE(sys_newuname)
4280 {
4281    PRINT("sys_newuname ( %#lx )", ARG1);
4282    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4283    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4284 }
4285 
POST(sys_newuname)4286 POST(sys_newuname)
4287 {
4288    if (ARG1 != 0) {
4289       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4290    }
4291 }
4292 
PRE(sys_waitpid)4293 PRE(sys_waitpid)
4294 {
4295    *flags |= SfMayBlock;
4296    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4297    PRE_REG_READ3(long, "waitpid",
4298                  vki_pid_t, pid, unsigned int *, status, int, options);
4299 
4300    if (ARG2 != (Addr)NULL)
4301       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4302 }
4303 
POST(sys_waitpid)4304 POST(sys_waitpid)
4305 {
4306    if (ARG2 != (Addr)NULL)
4307       POST_MEM_WRITE( ARG2, sizeof(int) );
4308 }
4309 
PRE(sys_wait4)4310 PRE(sys_wait4)
4311 {
4312    *flags |= SfMayBlock;
4313    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4314 
4315    PRE_REG_READ4(long, "wait4",
4316                  vki_pid_t, pid, unsigned int *, status, int, options,
4317                  struct rusage *, rusage);
4318    if (ARG2 != (Addr)NULL)
4319       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4320    if (ARG4 != (Addr)NULL)
4321       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4322 }
4323 
POST(sys_wait4)4324 POST(sys_wait4)
4325 {
4326    if (ARG2 != (Addr)NULL)
4327       POST_MEM_WRITE( ARG2, sizeof(int) );
4328    if (ARG4 != (Addr)NULL)
4329       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4330 }
4331 
PRE(sys_writev)4332 PRE(sys_writev)
4333 {
4334    Int i;
4335    struct vki_iovec * vec;
4336    *flags |= SfMayBlock;
4337    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4338    PRE_REG_READ3(ssize_t, "writev",
4339                  unsigned long, fd, const struct iovec *, vector,
4340                  unsigned long, count);
4341    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4342       SET_STATUS_Failure( VKI_EBADF );
4343    } else {
4344       if ((Int)ARG3 >= 0)
4345          PRE_MEM_READ( "writev(vector)",
4346                        ARG2, ARG3 * sizeof(struct vki_iovec) );
4347       if (ARG2 != 0) {
4348          /* ToDo: don't do any of the following if the vector is invalid */
4349          vec = (struct vki_iovec *)ARG2;
4350          for (i = 0; i < (Int)ARG3; i++)
4351             PRE_MEM_READ( "writev(vector[...])",
4352                            (Addr)vec[i].iov_base, vec[i].iov_len );
4353       }
4354    }
4355 }
4356 
PRE(sys_utimes)4357 PRE(sys_utimes)
4358 {
4359    FUSE_COMPATIBLE_MAY_BLOCK();
4360    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4361    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4362    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4363    if (ARG2 != 0) {
4364       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4365       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4366    }
4367 }
4368 
PRE(sys_acct)4369 PRE(sys_acct)
4370 {
4371    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4372    PRE_REG_READ1(long, "acct", const char *, filename);
4373    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4374 }
4375 
PRE(sys_pause)4376 PRE(sys_pause)
4377 {
4378    *flags |= SfMayBlock;
4379    PRINT("sys_pause ( )");
4380    PRE_REG_READ0(long, "pause");
4381 }
4382 
PRE(sys_sigaltstack)4383 PRE(sys_sigaltstack)
4384 {
4385    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4386    PRE_REG_READ2(int, "sigaltstack",
4387                  const vki_stack_t *, ss, vki_stack_t *, oss);
4388    if (ARG1 != 0) {
4389       const vki_stack_t *ss = (vki_stack_t *)ARG1;
4390       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4391       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4392       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4393    }
4394    if (ARG2 != 0) {
4395       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4396    }
4397 
4398    SET_STATUS_from_SysRes(
4399       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4400                               (vki_stack_t*)ARG2)
4401    );
4402 }
POST(sys_sigaltstack)4403 POST(sys_sigaltstack)
4404 {
4405    vg_assert(SUCCESS);
4406    if (RES == 0 && ARG2 != 0)
4407       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4408 }
4409 
PRE(sys_sethostname)4410 PRE(sys_sethostname)
4411 {
4412    PRINT("sys_sethostname ( %#lx, %ld )", ARG1,ARG2);
4413    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
4414    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
4415 }
4416 
4417 #undef PRE
4418 #undef POST
4419 
4420 #endif // defined(VGO_linux) || defined(VGO_darwin)
4421 
4422 /*--------------------------------------------------------------------*/
4423 /*--- end                                                          ---*/
4424 /*--------------------------------------------------------------------*/
4425