1 
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls.                          syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_threadstate.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcproc.h"      // For VG_(getpid)()
42 #include "pub_core_libcsignal.h"
43 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
44                                     //   and VG_(vg_yield)
45 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
46 #include "pub_core_tooliface.h"
47 #include "pub_core_options.h"
48 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
49 #include "pub_core_syscall.h"
50 #include "pub_core_machine.h"
51 #include "pub_core_mallocfree.h"
52 #include "pub_core_syswrap.h"
53 
54 #include "priv_types_n_macros.h"
55 #include "priv_syswrap-main.h"
56 
57 #if defined(VGO_darwin)
58 #include "priv_syswrap-darwin.h"
59 #endif
60 
61 /* Useful info which needs to be recorded somewhere:
62    Use of registers in syscalls is:
63 
64           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65    LINUX:
66    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
67    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
68    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
69    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
70    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
71    mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
72    mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
73    arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
74 
75    On s390x the svc instruction is used for system calls. The system call
76    number is encoded in the instruction (8 bit immediate field). Since Linux
77    2.6 it is also allowed to use svc 0 with the system call number in r1.
78    This was introduced for system calls >255, but works for all. It is
79    also possible to see the svc 0 together with an EXecute instruction, that
80    fills in the immediate field.
81    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
82 
83           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
84    DARWIN:
85    x86    eax   +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
86    amd64  rax   rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
87 
88    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
89    amd64-darwin.  Apparently 0(%esp) is some kind of return address
90    (perhaps for syscalls done with "sysenter"?)  I don't think it is
91    relevant for syscalls done with "int $0x80/1/2".
92 
93    SOLARIS:
94    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
95    amd64  rax rdi  rsi  rdx  r10  r8   r9   +8   +16  rdx:rax, rflags.c
96 
97    "+N" denotes "in memory at N(%esp)". Solaris also supports fasttrap
98    syscalls. Fasttraps do not take any parameters (except of the sysno in eax)
99    and never fail (if the sysno is valid).
100 */
101 
102 /* This is the top level of the system-call handler module.  All
103    system calls are channelled through here, doing two things:
104 
105    * notify the tool of the events (mem/reg reads, writes) happening
106 
107    * perform the syscall, usually by passing it along to the kernel
108      unmodified.
109 
110    A magical piece of assembly code, do_syscall_for_client_WRK, in
111    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
112    kernel, whilst having the simulator retain control.
113 */
114 
115 /* The main function is VG_(client_syscall).  The simulation calls it
116    whenever a client thread wants to do a syscall.  The following is a
117    sketch of what it does.
118 
119    * Ensures the root thread's stack is suitably mapped.  Tedious and
120      arcane.  See big big comment in VG_(client_syscall).
121 
122    * First, it rounds up the syscall number and args (which is a
123      platform dependent activity) and puts them in a struct ("args")
124      and also a copy in "orig_args".
125 
126      The pre/post wrappers refer to these structs and so no longer
127      need magic macros to access any specific registers.  This struct
128      is stored in thread-specific storage.
129 
130 
131    * The pre-wrapper is called, passing it a pointer to struct
132      "args".
133 
134 
135    * The pre-wrapper examines the args and pokes the tool
136      appropriately.  It may modify the args; this is why "orig_args"
137      is also stored.
138 
139      The pre-wrapper may choose to 'do' the syscall itself, and
140      concludes one of three outcomes:
141 
142        Success(N)    -- syscall is already complete, with success;
143                         result is N
144 
145        Fail(N)       -- syscall is already complete, with failure;
146                         error code is N
147 
148        HandToKernel  -- (the usual case): this needs to be given to
149                         the kernel to be done, using the values in
150                         the possibly-modified "args" struct.
151 
152      In addition, the pre-wrapper may set some flags:
153 
154        MayBlock   -- only applicable when outcome==HandToKernel
155 
156        PostOnFail -- only applicable when outcome==HandToKernel or Fail
157 
158 
159    * If the pre-outcome is HandToKernel, the syscall is duly handed
160      off to the kernel (perhaps involving some thread switchery, but
161      that's not important).  This reduces the possible set of outcomes
162      to either Success(N) or Fail(N).
163 
164 
165    * The outcome (Success(N) or Fail(N)) is written back to the guest
166      register(s).  This is platform specific:
167 
168      x86:    Success(N) ==>  eax = N
169              Fail(N)    ==>  eax = -N
170 
171      ditto amd64
172 
173      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
174              Fail(N) ==>     r3 = N, CR0.SO = 1
175 
176      Darwin:
177      x86:    Success(N) ==>  edx:eax = N, cc = 0
178              Fail(N)    ==>  edx:eax = N, cc = 1
179 
180      s390x:  Success(N) ==>  r2 = N
181              Fail(N)    ==>  r2 = -N
182 
183      Solaris:
184      x86:    Success(N) ==>  edx:eax = N, cc = 0
185              Fail(N)    ==>      eax = N, cc = 1
186      Same applies for fasttraps except they never fail.
187 
188    * The post wrapper is called if:
189 
190      - it exists, and
191      - outcome==Success or (outcome==Fail and PostOnFail is set)
192 
193      The post wrapper is passed the adulterated syscall args (struct
194      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
195 
196    There are several other complications, primarily to do with
197    syscalls getting interrupted, explained in comments in the code.
198 */
199 
200 /* CAVEATS for writing wrappers.  It is important to follow these!
201 
202    The macros defined in priv_types_n_macros.h are designed to help
203    decouple the wrapper logic from the actual representation of
204    syscall args/results, since these wrappers are designed to work on
205    multiple platforms.
206 
207    Sometimes a PRE wrapper will complete the syscall itself, without
208    handing it to the kernel.  It will use one of SET_STATUS_Success,
209    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
210    value.  It is critical to appreciate that use of the macro does not
211    immediately cause the underlying guest state to be updated -- that
212    is done by the driver logic in this file, when the wrapper returns.
213 
214    As a result, PRE wrappers of the following form will malfunction:
215 
216    PRE(fooble)
217    {
218       ... do stuff ...
219       SET_STATUS_Somehow(...)
220 
221       // do something that assumes guest state is up to date
222    }
223 
224    In particular, direct or indirect calls to VG_(poll_signals) after
225    setting STATUS can cause the guest state to be read (in order to
226    build signal frames).  Do not do this.  If you want a signal poll
227    after the syscall goes through, do "*flags |= SfPollAfter" and the
228    driver logic will do it for you.
229 
230    -----------
231 
232    Another critical requirement following introduction of new address
233    space manager (JRS, 20050923):
234 
235    In a situation where the mappedness of memory has changed, aspacem
236    should be notified BEFORE the tool.  Hence the following is
237    correct:
238 
239       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
240       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
241       if (d)
242          VG_(discard_translations)(s->start, s->end+1 - s->start);
243 
244    whilst this is wrong:
245 
246       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
247       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
248       if (d)
249          VG_(discard_translations)(s->start, s->end+1 - s->start);
250 
251    The reason is that the tool may itself ask aspacem for more shadow
252    memory as a result of the VG_TRACK call.  In such a situation it is
253    critical that aspacem's segment array is up to date -- hence the
254    need to notify aspacem first.
255 
256    -----------
257 
258    Also .. take care to call VG_(discard_translations) whenever
259    memory with execute permissions is unmapped.
260 */
261 
262 
263 /* ---------------------------------------------------------------------
264    Do potentially blocking syscall for the client, and mess with
265    signal masks at the same time.
266    ------------------------------------------------------------------ */
267 
268 /* Perform a syscall on behalf of a client thread, using a specific
269    signal mask.  On completion, the signal mask is set to restore_mask
270    (which presumably blocks almost everything).  If a signal happens
271    during the syscall, the handler should call
272    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
273    thread's context to do the right thing.
274 
275    The _WRK function is handwritten assembly, implemented per-platform
276    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
277    properties.  See comments at the top of
278    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
279 
280    This function (these functions) are required to return zero in case
281    of success (even if the syscall itself failed), and nonzero if the
282    sigprocmask-swizzling calls failed.  We don't actually care about
283    the failure values from sigprocmask, although most of the assembly
284    implementations do attempt to return that, using the convention
285    0 for success, or 0x8000 | error-code for failure.
286 */
287 #if defined(VGO_linux)
288 extern
289 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
290                                       void* guest_state,
291                                       const vki_sigset_t *syscall_mask,
292                                       const vki_sigset_t *restore_mask,
293                                       Word sigsetSzB );
294 #elif defined(VGO_darwin)
295 extern
296 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
297                                            void* guest_state,
298                                            const vki_sigset_t *syscall_mask,
299                                            const vki_sigset_t *restore_mask,
300                                            Word sigsetSzB ); /* unused */
301 extern
302 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
303                                            void* guest_state,
304                                            const vki_sigset_t *syscall_mask,
305                                            const vki_sigset_t *restore_mask,
306                                            Word sigsetSzB ); /* unused */
307 extern
308 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
309                                            void* guest_state,
310                                            const vki_sigset_t *syscall_mask,
311                                            const vki_sigset_t *restore_mask,
312                                            Word sigsetSzB ); /* unused */
313 #elif defined(VGO_solaris)
314 extern
315 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
316                                       void* guest_state,
317                                       const vki_sigset_t *syscall_mask,
318                                       const vki_sigset_t *restore_mask,
319                                       UChar *cflag);
320 UWord ML_(do_syscall_for_client_dret_WRK)( Word syscallno,
321                                            void* guest_state,
322                                            const vki_sigset_t *syscall_mask,
323                                            const vki_sigset_t *restore_mask,
324                                            UChar *cflag);
325 #else
326 #  error "Unknown OS"
327 #endif
328 
329 
330 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)331 void do_syscall_for_client ( Int syscallno,
332                              ThreadState* tst,
333                              const vki_sigset_t* syscall_mask )
334 {
335    vki_sigset_t saved;
336    UWord err;
337 #  if defined(VGO_linux)
338    err = ML_(do_syscall_for_client_WRK)(
339             syscallno, &tst->arch.vex,
340             syscall_mask, &saved, sizeof(vki_sigset_t)
341          );
342 #  elif defined(VGO_darwin)
343    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
344       case VG_DARWIN_SYSCALL_CLASS_UNIX:
345          err = ML_(do_syscall_for_client_unix_WRK)(
346                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
347                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
348                );
349          break;
350       case VG_DARWIN_SYSCALL_CLASS_MACH:
351          err = ML_(do_syscall_for_client_mach_WRK)(
352                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
353                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
354                );
355          break;
356       case VG_DARWIN_SYSCALL_CLASS_MDEP:
357          err = ML_(do_syscall_for_client_mdep_WRK)(
358                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
359                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
360                );
361          break;
362       default:
363          vg_assert(0);
364          /*NOTREACHED*/
365          break;
366    }
367 #  elif defined(VGO_solaris)
368    UChar cflag;
369 
370    /* Fasttraps or anything else cannot go through this path. */
371    vg_assert(VG_SOLARIS_SYSNO_CLASS(syscallno)
372              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
373 
374    /* If the syscall is a door_return call then it has to be handled very
375       differently. */
376    if (tst->os_state.in_door_return)
377       err = ML_(do_syscall_for_client_dret_WRK)(
378                 syscallno, &tst->arch.vex,
379                 syscall_mask, &saved, &cflag
380             );
381    else
382       err = ML_(do_syscall_for_client_WRK)(
383                 syscallno, &tst->arch.vex,
384                 syscall_mask, &saved, &cflag
385             );
386 
387    /* Save the carry flag. */
388 #  if defined(VGP_x86_solaris)
389    LibVEX_GuestX86_put_eflag_c(cflag, &tst->arch.vex);
390 #  elif defined(VGP_amd64_solaris)
391    LibVEX_GuestAMD64_put_rflag_c(cflag, &tst->arch.vex);
392 #  else
393 #    error "Unknown platform"
394 #  endif
395 
396 #  else
397 #    error "Unknown OS"
398 #  endif
399    vg_assert2(
400       err == 0,
401       "ML_(do_syscall_for_client_WRK): sigprocmask error %lu",
402       err & 0xFFF
403    );
404 }
405 
406 
407 /* ---------------------------------------------------------------------
408    Impedance matchers and misc helpers
409    ------------------------------------------------------------------ */
410 
411 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)412 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
413 {
414    return a1->sysno == a2->sysno
415           && a1->arg1 == a2->arg1
416           && a1->arg2 == a2->arg2
417           && a1->arg3 == a2->arg3
418           && a1->arg4 == a2->arg4
419           && a1->arg5 == a2->arg5
420           && a1->arg6 == a2->arg6
421           && a1->arg7 == a2->arg7
422           && a1->arg8 == a2->arg8;
423 }
424 
425 static
eq_SyscallStatus(UInt sysno,SyscallStatus * s1,SyscallStatus * s2)426 Bool eq_SyscallStatus ( UInt sysno, SyscallStatus* s1, SyscallStatus* s2 )
427 {
428    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
429    if (s1->what == s2->what && sr_EQ( sysno, s1->sres, s2->sres ))
430       return True;
431 #  if defined(VGO_darwin)
432    /* Darwin-specific debugging guff */
433    vg_assert(s1->what == s2->what);
434    VG_(printf)("eq_SyscallStatus:\n");
435    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
436    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
437    vg_assert(0);
438 #  endif
439    return False;
440 }
441 
442 /* Convert between SysRes and SyscallStatus, to the extent possible. */
443 
444 static
convert_SysRes_to_SyscallStatus(SysRes res)445 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
446 {
447    SyscallStatus status;
448    status.what = SsComplete;
449    status.sres = res;
450    return status;
451 }
452 
453 
454 /* Impedance matchers.  These convert syscall arg or result data from
455    the platform-specific in-guest-state format to the canonical
456    formats, and back. */
457 
458 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)459 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
460                                     /*IN*/ VexGuestArchState* gst_vanilla,
461                                     /*IN*/ UInt trc )
462 {
463 #if defined(VGP_x86_linux)
464    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
465    canonical->sysno = gst->guest_EAX;
466    canonical->arg1  = gst->guest_EBX;
467    canonical->arg2  = gst->guest_ECX;
468    canonical->arg3  = gst->guest_EDX;
469    canonical->arg4  = gst->guest_ESI;
470    canonical->arg5  = gst->guest_EDI;
471    canonical->arg6  = gst->guest_EBP;
472    canonical->arg7  = 0;
473    canonical->arg8  = 0;
474 
475 #elif defined(VGP_amd64_linux)
476    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
477    canonical->sysno = gst->guest_RAX;
478    canonical->arg1  = gst->guest_RDI;
479    canonical->arg2  = gst->guest_RSI;
480    canonical->arg3  = gst->guest_RDX;
481    canonical->arg4  = gst->guest_R10;
482    canonical->arg5  = gst->guest_R8;
483    canonical->arg6  = gst->guest_R9;
484    canonical->arg7  = 0;
485    canonical->arg8  = 0;
486 
487 #elif defined(VGP_ppc32_linux)
488    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
489    canonical->sysno = gst->guest_GPR0;
490    canonical->arg1  = gst->guest_GPR3;
491    canonical->arg2  = gst->guest_GPR4;
492    canonical->arg3  = gst->guest_GPR5;
493    canonical->arg4  = gst->guest_GPR6;
494    canonical->arg5  = gst->guest_GPR7;
495    canonical->arg6  = gst->guest_GPR8;
496    canonical->arg7  = 0;
497    canonical->arg8  = 0;
498 
499 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
500    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
501    canonical->sysno = gst->guest_GPR0;
502    canonical->arg1  = gst->guest_GPR3;
503    canonical->arg2  = gst->guest_GPR4;
504    canonical->arg3  = gst->guest_GPR5;
505    canonical->arg4  = gst->guest_GPR6;
506    canonical->arg5  = gst->guest_GPR7;
507    canonical->arg6  = gst->guest_GPR8;
508    canonical->arg7  = 0;
509    canonical->arg8  = 0;
510 
511 #elif defined(VGP_arm_linux)
512    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
513    canonical->sysno = gst->guest_R7;
514    canonical->arg1  = gst->guest_R0;
515    canonical->arg2  = gst->guest_R1;
516    canonical->arg3  = gst->guest_R2;
517    canonical->arg4  = gst->guest_R3;
518    canonical->arg5  = gst->guest_R4;
519    canonical->arg6  = gst->guest_R5;
520    canonical->arg7  = 0;
521    canonical->arg8  = 0;
522 
523 #elif defined(VGP_arm64_linux)
524    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
525    canonical->sysno = gst->guest_X8;
526    canonical->arg1  = gst->guest_X0;
527    canonical->arg2  = gst->guest_X1;
528    canonical->arg3  = gst->guest_X2;
529    canonical->arg4  = gst->guest_X3;
530    canonical->arg5  = gst->guest_X4;
531    canonical->arg6  = gst->guest_X5;
532    canonical->arg7  = 0;
533    canonical->arg8  = 0;
534 
535 #elif defined(VGP_mips32_linux)
536    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
537    canonical->sysno = gst->guest_r2;    // v0
538    if (canonical->sysno == __NR_exit) {
539       canonical->arg1 = gst->guest_r4;    // a0
540       canonical->arg2 = 0;
541       canonical->arg3 = 0;
542       canonical->arg4 = 0;
543       canonical->arg5 = 0;
544       canonical->arg6 = 0;
545       canonical->arg8 = 0;
546    } else if (canonical->sysno != __NR_syscall) {
547       canonical->arg1  = gst->guest_r4;    // a0
548       canonical->arg2  = gst->guest_r5;    // a1
549       canonical->arg3  = gst->guest_r6;    // a2
550       canonical->arg4  = gst->guest_r7;    // a3
551       canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
552       canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(sp)
553       canonical->arg8 = 0;
554    } else {
555       // Fixme hack handle syscall()
556       canonical->sysno = gst->guest_r4;    // a0
557       canonical->arg1  = gst->guest_r5;    // a1
558       canonical->arg2  = gst->guest_r6;    // a2
559       canonical->arg3  = gst->guest_r7;    // a3
560       canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
561       canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
562       canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
563       canonical->arg8 = __NR_syscall;
564    }
565 
566 #elif defined(VGP_mips64_linux)
567    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
568    canonical->sysno = gst->guest_r2;    // v0
569    canonical->arg1  = gst->guest_r4;    // a0
570    canonical->arg2  = gst->guest_r5;    // a1
571    canonical->arg3  = gst->guest_r6;    // a2
572    canonical->arg4  = gst->guest_r7;    // a3
573    canonical->arg5  = gst->guest_r8;    // a4
574    canonical->arg6  = gst->guest_r9;    // a5
575 
576 #elif defined(VGP_x86_darwin)
577    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
578    UWord *stack = (UWord *)gst->guest_ESP;
579    // GrP fixme hope syscalls aren't called with really shallow stacks...
580    canonical->sysno = gst->guest_EAX;
581    if (canonical->sysno != 0) {
582       // stack[0] is return address
583       canonical->arg1  = stack[1];
584       canonical->arg2  = stack[2];
585       canonical->arg3  = stack[3];
586       canonical->arg4  = stack[4];
587       canonical->arg5  = stack[5];
588       canonical->arg6  = stack[6];
589       canonical->arg7  = stack[7];
590       canonical->arg8  = stack[8];
591    } else {
592       // GrP fixme hack handle syscall()
593       // GrP fixme what about __syscall() ?
594       // stack[0] is return address
595       // DDD: the tool can't see that the params have been shifted!  Can
596       //      lead to incorrect checking, I think, because the PRRAn/PSARn
597       //      macros will mention the pre-shifted args.
598       canonical->sysno = stack[1];
599       vg_assert(canonical->sysno != 0);
600       canonical->arg1  = stack[2];
601       canonical->arg2  = stack[3];
602       canonical->arg3  = stack[4];
603       canonical->arg4  = stack[5];
604       canonical->arg5  = stack[6];
605       canonical->arg6  = stack[7];
606       canonical->arg7  = stack[8];
607       canonical->arg8  = stack[9];
608 
609       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
610             VG_(getpid)(), /*tid,*/
611             VG_SYSNUM_STRING(canonical->sysno));
612    }
613 
614    // Here we determine what kind of syscall it was by looking at the
615    // interrupt kind, and then encode the syscall number using the 64-bit
616    // encoding for Valgrind's internal use.
617    //
618    // DDD: Would it be better to stash the JMP kind into the Darwin
619    // thread state rather than passing in the trc?
620    switch (trc) {
621    case VEX_TRC_JMP_SYS_INT128:
622       // int $0x80 = Unix, 64-bit result
623       vg_assert(canonical->sysno >= 0);
624       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
625       break;
626    case VEX_TRC_JMP_SYS_SYSENTER:
627       // syscall = Unix, 32-bit result
628       // OR        Mach, 32-bit result
629       if (canonical->sysno >= 0) {
630          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
631          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
632                                                              & 0xffff);
633       } else {
634          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
635       }
636       break;
637    case VEX_TRC_JMP_SYS_INT129:
638       // int $0x81 = Mach, 32-bit result
639       vg_assert(canonical->sysno < 0);
640       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
641       break;
642    case VEX_TRC_JMP_SYS_INT130:
643       // int $0x82 = mdep, 32-bit result
644       vg_assert(canonical->sysno >= 0);
645       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
646       break;
647    default:
648       vg_assert(0);
649       break;
650    }
651 
652 #elif defined(VGP_amd64_darwin)
653    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
654    UWord *stack = (UWord *)gst->guest_RSP;
655 
656    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
657 
658    // GrP fixme hope syscalls aren't called with really shallow stacks...
659    canonical->sysno = gst->guest_RAX;
660    if (canonical->sysno != __NR_syscall) {
661       // stack[0] is return address
662       canonical->arg1  = gst->guest_RDI;
663       canonical->arg2  = gst->guest_RSI;
664       canonical->arg3  = gst->guest_RDX;
665       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
666       canonical->arg5  = gst->guest_R8;
667       canonical->arg6  = gst->guest_R9;
668       canonical->arg7  = stack[1];
669       canonical->arg8  = stack[2];
670    } else {
671       // GrP fixme hack handle syscall()
672       // GrP fixme what about __syscall() ?
673       // stack[0] is return address
674       // DDD: the tool can't see that the params have been shifted!  Can
675       //      lead to incorrect checking, I think, because the PRRAn/PSARn
676       //      macros will mention the pre-shifted args.
677       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
678       vg_assert(canonical->sysno != __NR_syscall);
679       canonical->arg1  = gst->guest_RSI;
680       canonical->arg2  = gst->guest_RDX;
681       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
682       canonical->arg4  = gst->guest_R8;
683       canonical->arg5  = gst->guest_R9;
684       canonical->arg6  = stack[1];
685       canonical->arg7  = stack[2];
686       canonical->arg8  = stack[3];
687 
688       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
689             VG_(getpid)(), /*tid,*/
690             VG_SYSNUM_STRING(canonical->sysno));
691    }
692 
693    // no canonical->sysno adjustment needed
694 
695 #elif defined(VGP_s390x_linux)
696    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
697    canonical->sysno = gst->guest_SYSNO;
698    canonical->arg1  = gst->guest_r2;
699    canonical->arg2  = gst->guest_r3;
700    canonical->arg3  = gst->guest_r4;
701    canonical->arg4  = gst->guest_r5;
702    canonical->arg5  = gst->guest_r6;
703    canonical->arg6  = gst->guest_r7;
704    canonical->arg7  = 0;
705    canonical->arg8  = 0;
706 
707 #elif defined(VGP_tilegx_linux)
708    VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
709    canonical->sysno = gst->guest_r10;
710    canonical->arg1  = gst->guest_r0;
711    canonical->arg2  = gst->guest_r1;
712    canonical->arg3  = gst->guest_r2;
713    canonical->arg4  = gst->guest_r3;
714    canonical->arg5  = gst->guest_r4;
715    canonical->arg6  = gst->guest_r5;
716    canonical->arg7  = 0;
717    canonical->arg8  = 0;
718 
719 #elif defined(VGP_x86_solaris)
720    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
721    UWord *stack = (UWord *)gst->guest_ESP;
722    canonical->sysno = gst->guest_EAX;
723    /* stack[0] is a return address. */
724    canonical->arg1  = stack[1];
725    canonical->arg2  = stack[2];
726    canonical->arg3  = stack[3];
727    canonical->arg4  = stack[4];
728    canonical->arg5  = stack[5];
729    canonical->arg6  = stack[6];
730    canonical->arg7  = stack[7];
731    canonical->arg8  = stack[8];
732 
733    switch (trc) {
734    case VEX_TRC_JMP_SYS_INT145:
735    case VEX_TRC_JMP_SYS_SYSENTER:
736    case VEX_TRC_JMP_SYS_SYSCALL:
737    /* These three are not actually valid syscall instructions on Solaris.
738       Pretend for now that we handle them as normal syscalls. */
739    case VEX_TRC_JMP_SYS_INT128:
740    case VEX_TRC_JMP_SYS_INT129:
741    case VEX_TRC_JMP_SYS_INT130:
742       /* int $0x91, sysenter, syscall = normal syscall */
743       break;
744    case VEX_TRC_JMP_SYS_INT210:
745       /* int $0xD2 = fasttrap */
746       canonical->sysno
747          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
748       break;
749    default:
750       vg_assert(0);
751       break;
752    }
753 
754 #elif defined(VGP_amd64_solaris)
755    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
756    UWord *stack = (UWord *)gst->guest_RSP;
757    canonical->sysno = gst->guest_RAX;
758    /* stack[0] is a return address. */
759    canonical->arg1 = gst->guest_RDI;
760    canonical->arg2 = gst->guest_RSI;
761    canonical->arg3 = gst->guest_RDX;
762    canonical->arg4 = gst->guest_R10;  /* Not RCX with syscall. */
763    canonical->arg5 = gst->guest_R8;
764    canonical->arg6 = gst->guest_R9;
765    canonical->arg7 = stack[1];
766    canonical->arg8 = stack[2];
767 
768    switch (trc) {
769    case VEX_TRC_JMP_SYS_SYSCALL:
770       /* syscall = normal syscall */
771       break;
772    case VEX_TRC_JMP_SYS_INT210:
773       /* int $0xD2 = fasttrap */
774       canonical->sysno
775          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
776       break;
777    default:
778       vg_assert(0);
779       break;
780    }
781 
782 #else
783 #  error "getSyscallArgsFromGuestState: unknown arch"
784 #endif
785 }
786 
787 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)788 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
789                                     /*OUT*/VexGuestArchState* gst_vanilla )
790 {
791 #if defined(VGP_x86_linux)
792    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
793    gst->guest_EAX = canonical->sysno;
794    gst->guest_EBX = canonical->arg1;
795    gst->guest_ECX = canonical->arg2;
796    gst->guest_EDX = canonical->arg3;
797    gst->guest_ESI = canonical->arg4;
798    gst->guest_EDI = canonical->arg5;
799    gst->guest_EBP = canonical->arg6;
800 
801 #elif defined(VGP_amd64_linux)
802    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
803    gst->guest_RAX = canonical->sysno;
804    gst->guest_RDI = canonical->arg1;
805    gst->guest_RSI = canonical->arg2;
806    gst->guest_RDX = canonical->arg3;
807    gst->guest_R10 = canonical->arg4;
808    gst->guest_R8  = canonical->arg5;
809    gst->guest_R9  = canonical->arg6;
810 
811 #elif defined(VGP_ppc32_linux)
812    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
813    gst->guest_GPR0 = canonical->sysno;
814    gst->guest_GPR3 = canonical->arg1;
815    gst->guest_GPR4 = canonical->arg2;
816    gst->guest_GPR5 = canonical->arg3;
817    gst->guest_GPR6 = canonical->arg4;
818    gst->guest_GPR7 = canonical->arg5;
819    gst->guest_GPR8 = canonical->arg6;
820 
821 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
822    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
823    gst->guest_GPR0 = canonical->sysno;
824    gst->guest_GPR3 = canonical->arg1;
825    gst->guest_GPR4 = canonical->arg2;
826    gst->guest_GPR5 = canonical->arg3;
827    gst->guest_GPR6 = canonical->arg4;
828    gst->guest_GPR7 = canonical->arg5;
829    gst->guest_GPR8 = canonical->arg6;
830 
831 #elif defined(VGP_arm_linux)
832    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
833    gst->guest_R7 = canonical->sysno;
834    gst->guest_R0 = canonical->arg1;
835    gst->guest_R1 = canonical->arg2;
836    gst->guest_R2 = canonical->arg3;
837    gst->guest_R3 = canonical->arg4;
838    gst->guest_R4 = canonical->arg5;
839    gst->guest_R5 = canonical->arg6;
840 
841 #elif defined(VGP_arm64_linux)
842    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
843    gst->guest_X8 = canonical->sysno;
844    gst->guest_X0 = canonical->arg1;
845    gst->guest_X1 = canonical->arg2;
846    gst->guest_X2 = canonical->arg3;
847    gst->guest_X3 = canonical->arg4;
848    gst->guest_X4 = canonical->arg5;
849    gst->guest_X5 = canonical->arg6;
850 
851 #elif defined(VGP_x86_darwin)
852    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
853    UWord *stack = (UWord *)gst->guest_ESP;
854 
855    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
856 
857    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
858    // stack[0] is return address
859    stack[1] = canonical->arg1;
860    stack[2] = canonical->arg2;
861    stack[3] = canonical->arg3;
862    stack[4] = canonical->arg4;
863    stack[5] = canonical->arg5;
864    stack[6] = canonical->arg6;
865    stack[7] = canonical->arg7;
866    stack[8] = canonical->arg8;
867 
868 #elif defined(VGP_amd64_darwin)
869    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
870    UWord *stack = (UWord *)gst->guest_RSP;
871 
872    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
873    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
874 
875    // stack[0] is return address
876    gst->guest_RDI = canonical->arg1;
877    gst->guest_RSI = canonical->arg2;
878    gst->guest_RDX = canonical->arg3;
879    gst->guest_RCX = canonical->arg4;
880    gst->guest_R8  = canonical->arg5;
881    gst->guest_R9  = canonical->arg6;
882    stack[1]       = canonical->arg7;
883    stack[2]       = canonical->arg8;
884 
885 #elif defined(VGP_s390x_linux)
886    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
887    gst->guest_SYSNO  = canonical->sysno;
888    gst->guest_r2     = canonical->arg1;
889    gst->guest_r3     = canonical->arg2;
890    gst->guest_r4     = canonical->arg3;
891    gst->guest_r5     = canonical->arg4;
892    gst->guest_r6     = canonical->arg5;
893    gst->guest_r7     = canonical->arg6;
894 
895 #elif defined(VGP_mips32_linux)
896    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
897    if (canonical->arg8 != __NR_syscall) {
898       gst->guest_r2 = canonical->sysno;
899       gst->guest_r4 = canonical->arg1;
900       gst->guest_r5 = canonical->arg2;
901       gst->guest_r6 = canonical->arg3;
902       gst->guest_r7 = canonical->arg4;
903       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
904       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
905    } else {
906       canonical->arg8 = 0;
907       gst->guest_r2 = __NR_syscall;
908       gst->guest_r4 = canonical->sysno;
909       gst->guest_r5 = canonical->arg1;
910       gst->guest_r6 = canonical->arg2;
911       gst->guest_r7 = canonical->arg3;
912       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
913       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
914       *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
915    }
916 
917 #elif defined(VGP_mips64_linux)
918    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
919    gst->guest_r2 = canonical->sysno;
920    gst->guest_r4 = canonical->arg1;
921    gst->guest_r5 = canonical->arg2;
922    gst->guest_r6 = canonical->arg3;
923    gst->guest_r7 = canonical->arg4;
924    gst->guest_r8 = canonical->arg5;
925    gst->guest_r9 = canonical->arg6;
926 
927 #elif defined(VGP_tilegx_linux)
928    VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
929    gst->guest_r10 = canonical->sysno;
930    gst->guest_r0 = canonical->arg1;
931    gst->guest_r1 = canonical->arg2;
932    gst->guest_r2 = canonical->arg3;
933    gst->guest_r3 = canonical->arg4;
934    gst->guest_r4 = canonical->arg5;
935    gst->guest_r5 = canonical->arg6;
936 
937 #elif defined(VGP_x86_solaris)
938    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
939    UWord *stack = (UWord *)gst->guest_ESP;
940 
941    /* Fasttraps or anything else cannot go through this way. */
942    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
943              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
944    gst->guest_EAX = canonical->sysno;
945    /* stack[0] is a return address. */
946    stack[1] = canonical->arg1;
947    stack[2] = canonical->arg2;
948    stack[3] = canonical->arg3;
949    stack[4] = canonical->arg4;
950    stack[5] = canonical->arg5;
951    stack[6] = canonical->arg6;
952    stack[7] = canonical->arg7;
953    stack[8] = canonical->arg8;
954 
955 #elif defined(VGP_amd64_solaris)
956    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
957    UWord *stack = (UWord *)gst->guest_RSP;
958 
959    /* Fasttraps or anything else cannot go through this way. */
960    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
961              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
962    gst->guest_RAX = canonical->sysno;
963    /* stack[0] is a return address. */
964    gst->guest_RDI = canonical->arg1;
965    gst->guest_RSI = canonical->arg2;
966    gst->guest_RDX = canonical->arg3;
967    gst->guest_R10 = canonical->arg4;
968    gst->guest_R8  = canonical->arg5;
969    gst->guest_R9  = canonical->arg6;
970    stack[1] = canonical->arg7;
971    stack[2] = canonical->arg8;
972 
973 #else
974 #  error "putSyscallArgsIntoGuestState: unknown arch"
975 #endif
976 }
977 
978 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)979 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
980                                       /*IN*/ VexGuestArchState* gst_vanilla )
981 {
982 #  if defined(VGP_x86_linux)
983    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
984    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
985    canonical->what = SsComplete;
986 
987 #  elif defined(VGP_amd64_linux)
988    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
989    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
990    canonical->what = SsComplete;
991 
992 #  elif defined(VGP_ppc32_linux)
993    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
994    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
995    UInt                cr0so = (cr >> 28) & 1;
996    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
997    canonical->what = SsComplete;
998 
999 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1000    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
1001    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
1002    UInt                cr0so = (cr >> 28) & 1;
1003    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
1004    canonical->what = SsComplete;
1005 
1006 #  elif defined(VGP_arm_linux)
1007    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
1008    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
1009    canonical->what = SsComplete;
1010 
1011 #  elif defined(VGP_arm64_linux)
1012    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
1013    canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
1014    canonical->what = SsComplete;
1015 
1016 #  elif defined(VGP_mips32_linux)
1017    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1018    UInt                v0 = gst->guest_r2;    // v0
1019    UInt                v1 = gst->guest_r3;    // v1
1020    UInt                a3 = gst->guest_r7;    // a3
1021    canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
1022    canonical->what = SsComplete;
1023 
1024 #  elif defined(VGP_mips64_linux)
1025    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1026    ULong                v0 = gst->guest_r2;    // v0
1027    ULong                v1 = gst->guest_r3;    // v1
1028    ULong                a3 = gst->guest_r7;    // a3
1029    canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3);
1030    canonical->what = SsComplete;
1031 
1032 #  elif defined(VGP_x86_darwin)
1033    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
1034    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1035    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
1036    UInt err = 0;
1037    UInt wLO = 0;
1038    UInt wHI = 0;
1039    switch (gst->guest_SC_CLASS) {
1040       case VG_DARWIN_SYSCALL_CLASS_UNIX:
1041          // int $0x80 = Unix, 64-bit result
1042          err = carry;
1043          wLO = gst->guest_EAX;
1044          wHI = gst->guest_EDX;
1045          break;
1046       case VG_DARWIN_SYSCALL_CLASS_MACH:
1047          // int $0x81 = Mach, 32-bit result
1048          wLO = gst->guest_EAX;
1049          break;
1050       case VG_DARWIN_SYSCALL_CLASS_MDEP:
1051          // int $0x82 = mdep, 32-bit result
1052          wLO = gst->guest_EAX;
1053          break;
1054       default:
1055          vg_assert(0);
1056          break;
1057    }
1058    canonical->sres = VG_(mk_SysRes_x86_darwin)(
1059                         gst->guest_SC_CLASS, err ? True : False,
1060                         wHI, wLO
1061                      );
1062    canonical->what = SsComplete;
1063 
1064 #  elif defined(VGP_amd64_darwin)
1065    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
1066    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1067    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
1068    ULong err = 0;
1069    ULong wLO = 0;
1070    ULong wHI = 0;
1071    switch (gst->guest_SC_CLASS) {
1072       case VG_DARWIN_SYSCALL_CLASS_UNIX:
1073          // syscall = Unix, 128-bit result
1074          err = carry;
1075          wLO = gst->guest_RAX;
1076          wHI = gst->guest_RDX;
1077          break;
1078       case VG_DARWIN_SYSCALL_CLASS_MACH:
1079          // syscall = Mach, 64-bit result
1080          wLO = gst->guest_RAX;
1081          break;
1082       case VG_DARWIN_SYSCALL_CLASS_MDEP:
1083          // syscall = mdep, 64-bit result
1084          wLO = gst->guest_RAX;
1085          break;
1086       default:
1087          vg_assert(0);
1088          break;
1089    }
1090    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
1091                         gst->guest_SC_CLASS, err ? True : False,
1092                         wHI, wLO
1093                      );
1094    canonical->what = SsComplete;
1095 
1096 #  elif defined(VGP_s390x_linux)
1097    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
1098    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
1099    canonical->what = SsComplete;
1100 
1101 #  elif defined(VGP_tilegx_linux)
1102    VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
1103    canonical->sres = VG_(mk_SysRes_tilegx_linux)( gst->guest_r0 );
1104    canonical->what = SsComplete;
1105 
1106 #  elif defined(VGP_x86_solaris)
1107    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1108    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
1109 
1110    canonical->sres = VG_(mk_SysRes_x86_solaris)(carry ? True : False,
1111                                                 gst->guest_EAX,
1112                                                 carry ? 0 : gst->guest_EDX);
1113    canonical->what = SsComplete;
1114 
1115 #  elif defined(VGP_amd64_solaris)
1116    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1117    UInt carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
1118 
1119    canonical->sres = VG_(mk_SysRes_amd64_solaris)(carry ? True : False,
1120                                                   gst->guest_RAX,
1121                                                   carry ? 0 : gst->guest_RDX);
1122    canonical->what = SsComplete;
1123 
1124 #  else
1125 #    error "getSyscallStatusFromGuestState: unknown arch"
1126 #  endif
1127 }
1128 
1129 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)1130 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
1131                                       /*IN*/ SyscallStatus*     canonical,
1132                                       /*OUT*/VexGuestArchState* gst_vanilla )
1133 {
1134 #  if defined(VGP_x86_linux)
1135    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1136    vg_assert(canonical->what == SsComplete);
1137    if (sr_isError(canonical->sres)) {
1138       /* This isn't exactly right, in that really a Failure with res
1139          not in the range 1 .. 4095 is unrepresentable in the
1140          Linux-x86 scheme.  Oh well. */
1141       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
1142    } else {
1143       gst->guest_EAX = sr_Res(canonical->sres);
1144    }
1145    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1146              OFFSET_x86_EAX, sizeof(UWord) );
1147 
1148 #  elif defined(VGP_amd64_linux)
1149    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1150    vg_assert(canonical->what == SsComplete);
1151    if (sr_isError(canonical->sres)) {
1152       /* This isn't exactly right, in that really a Failure with res
1153          not in the range 1 .. 4095 is unrepresentable in the
1154          Linux-amd64 scheme.  Oh well. */
1155       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
1156    } else {
1157       gst->guest_RAX = sr_Res(canonical->sres);
1158    }
1159    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1160              OFFSET_amd64_RAX, sizeof(UWord) );
1161 
1162 #  elif defined(VGP_ppc32_linux)
1163    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
1164    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
1165    vg_assert(canonical->what == SsComplete);
1166    if (sr_isError(canonical->sres)) {
1167       /* set CR0.SO */
1168       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
1169       gst->guest_GPR3 = sr_Err(canonical->sres);
1170    } else {
1171       /* clear CR0.SO */
1172       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
1173       gst->guest_GPR3 = sr_Res(canonical->sres);
1174    }
1175    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1176              OFFSET_ppc32_GPR3, sizeof(UWord) );
1177    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1178              OFFSET_ppc32_CR0_0, sizeof(UChar) );
1179 
1180 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1181    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
1182    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
1183    vg_assert(canonical->what == SsComplete);
1184    if (sr_isError(canonical->sres)) {
1185       /* set CR0.SO */
1186       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
1187       gst->guest_GPR3 = sr_Err(canonical->sres);
1188    } else {
1189       /* clear CR0.SO */
1190       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
1191       gst->guest_GPR3 = sr_Res(canonical->sres);
1192    }
1193    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1194              OFFSET_ppc64_GPR3, sizeof(UWord) );
1195    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1196              OFFSET_ppc64_CR0_0, sizeof(UChar) );
1197 
1198 #  elif defined(VGP_arm_linux)
1199    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
1200    vg_assert(canonical->what == SsComplete);
1201    if (sr_isError(canonical->sres)) {
1202       /* This isn't exactly right, in that really a Failure with res
1203          not in the range 1 .. 4095 is unrepresentable in the
1204          Linux-arm scheme.  Oh well. */
1205       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
1206    } else {
1207       gst->guest_R0 = sr_Res(canonical->sres);
1208    }
1209    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1210              OFFSET_arm_R0, sizeof(UWord) );
1211 
1212 #  elif defined(VGP_arm64_linux)
1213    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
1214    vg_assert(canonical->what == SsComplete);
1215    if (sr_isError(canonical->sres)) {
1216       /* This isn't exactly right, in that really a Failure with res
1217          not in the range 1 .. 4095 is unrepresentable in the
1218          Linux-arm64 scheme.  Oh well. */
1219       gst->guest_X0 = - (Long)sr_Err(canonical->sres);
1220    } else {
1221       gst->guest_X0 = sr_Res(canonical->sres);
1222    }
1223    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1224              OFFSET_arm64_X0, sizeof(UWord) );
1225 
1226 #elif defined(VGP_x86_darwin)
1227    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1228    SysRes sres = canonical->sres;
1229    vg_assert(canonical->what == SsComplete);
1230    /* Unfortunately here we have to break abstraction and look
1231       directly inside 'res', in order to decide what to do. */
1232    switch (sres._mode) {
1233       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
1234       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
1235          gst->guest_EAX = sres._wLO;
1236          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1237                    OFFSET_x86_EAX, sizeof(UInt) );
1238          break;
1239       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
1240       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
1241          gst->guest_EAX = sres._wLO;
1242          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1243                    OFFSET_x86_EAX, sizeof(UInt) );
1244          gst->guest_EDX = sres._wHI;
1245          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1246                    OFFSET_x86_EDX, sizeof(UInt) );
1247          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1248                                       gst );
1249          // GrP fixme sets defined for entire eflags, not just bit c
1250          // DDD: this breaks exp-ptrcheck.
1251          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1252                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
1253          break;
1254       default:
1255          vg_assert(0);
1256          break;
1257    }
1258 
1259 #elif defined(VGP_amd64_darwin)
1260    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1261    SysRes sres = canonical->sres;
1262    vg_assert(canonical->what == SsComplete);
1263    /* Unfortunately here we have to break abstraction and look
1264       directly inside 'res', in order to decide what to do. */
1265    switch (sres._mode) {
1266       case SysRes_MACH: // syscall = Mach, 64-bit result
1267       case SysRes_MDEP: // syscall = mdep, 64-bit result
1268          gst->guest_RAX = sres._wLO;
1269          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1270                    OFFSET_amd64_RAX, sizeof(ULong) );
1271          break;
1272       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
1273       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
1274          gst->guest_RAX = sres._wLO;
1275          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1276                    OFFSET_amd64_RAX, sizeof(ULong) );
1277          gst->guest_RDX = sres._wHI;
1278          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1279                    OFFSET_amd64_RDX, sizeof(ULong) );
1280          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1281                                         gst );
1282          // GrP fixme sets defined for entire rflags, not just bit c
1283          // DDD: this breaks exp-ptrcheck.
1284          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1285                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1286          break;
1287       default:
1288          vg_assert(0);
1289          break;
1290    }
1291 
1292 #  elif defined(VGP_s390x_linux)
1293    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
1294    vg_assert(canonical->what == SsComplete);
1295    if (sr_isError(canonical->sres)) {
1296       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
1297    } else {
1298       gst->guest_r2 = sr_Res(canonical->sres);
1299    }
1300 
1301 #  elif defined(VGP_mips32_linux)
1302    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1303    vg_assert(canonical->what == SsComplete);
1304    if (sr_isError(canonical->sres)) {
1305       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1306       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1307    } else {
1308       gst->guest_r2 = sr_Res(canonical->sres);
1309       gst->guest_r3 = sr_ResEx(canonical->sres);
1310       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1311    }
1312    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1313              OFFSET_mips32_r2, sizeof(UWord) );
1314    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1315              OFFSET_mips32_r3, sizeof(UWord) );
1316    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1317              OFFSET_mips32_r7, sizeof(UWord) );
1318 
1319 #  elif defined(VGP_mips64_linux)
1320    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1321    vg_assert(canonical->what == SsComplete);
1322    if (sr_isError(canonical->sres)) {
1323       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1324       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1325    } else {
1326       gst->guest_r2 = sr_Res(canonical->sres);
1327       gst->guest_r3 = sr_ResEx(canonical->sres);
1328       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1329    }
1330    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1331              OFFSET_mips64_r2, sizeof(UWord) );
1332    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1333              OFFSET_mips64_r3, sizeof(UWord) );
1334    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1335              OFFSET_mips64_r7, sizeof(UWord) );
1336 
1337 #  elif defined(VGP_tilegx_linux)
1338    VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
1339    vg_assert(canonical->what == SsComplete);
1340    if (sr_isError(canonical->sres)) {
1341       gst->guest_r0 = - (Long)sr_Err(canonical->sres);
1342       // r1 hold errno
1343       gst->guest_r1 = (Long)sr_Err(canonical->sres);
1344    } else {
1345       gst->guest_r0 = sr_Res(canonical->sres);
1346       gst->guest_r1 = 0;
1347    }
1348 
1349 #  elif defined(VGP_x86_solaris)
1350    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1351    SysRes sres = canonical->sres;
1352    vg_assert(canonical->what == SsComplete);
1353 
1354    if (sr_isError(sres)) {
1355       gst->guest_EAX = sr_Err(sres);
1356       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
1357                sizeof(UInt));
1358       LibVEX_GuestX86_put_eflag_c(1, gst);
1359    }
1360    else {
1361       gst->guest_EAX = sr_Res(sres);
1362       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
1363                sizeof(UInt));
1364       gst->guest_EDX = sr_ResHI(sres);
1365       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EDX,
1366                sizeof(UInt));
1367       LibVEX_GuestX86_put_eflag_c(0, gst);
1368    }
1369    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
1370       other eflags defined too (see README.solaris). */
1371    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
1372             guest_CC_DEP1), sizeof(UInt));
1373    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
1374             guest_CC_DEP2), sizeof(UInt));
1375 
1376 #  elif defined(VGP_amd64_solaris)
1377    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1378    SysRes sres = canonical->sres;
1379    vg_assert(canonical->what == SsComplete);
1380 
1381    if (sr_isError(sres)) {
1382       gst->guest_RAX = sr_Err(sres);
1383       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
1384                sizeof(ULong));
1385       LibVEX_GuestAMD64_put_rflag_c(1, gst);
1386    }
1387    else {
1388       gst->guest_RAX = sr_Res(sres);
1389       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
1390                sizeof(ULong));
1391       gst->guest_RDX = sr_ResHI(sres);
1392       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RDX,
1393                sizeof(ULong));
1394       LibVEX_GuestAMD64_put_rflag_c(0, gst);
1395    }
1396    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
1397       other eflags defined too (see README.solaris). */
1398    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
1399             guest_CC_DEP1), sizeof(ULong));
1400    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
1401             guest_CC_DEP2), sizeof(ULong));
1402 
1403 #  else
1404 #    error "putSyscallStatusIntoGuestState: unknown arch"
1405 #  endif
1406 }
1407 
1408 
1409 /* Tell me the offsets in the guest state of the syscall params, so
1410    that the scalar argument checkers don't have to have this info
1411    hardwired. */
1412 
1413 static
getSyscallArgLayout(SyscallArgLayout * layout)1414 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1415 {
1416    VG_(bzero_inline)(layout, sizeof(*layout));
1417 
1418 #if defined(VGP_x86_linux)
1419    layout->o_sysno  = OFFSET_x86_EAX;
1420    layout->o_arg1   = OFFSET_x86_EBX;
1421    layout->o_arg2   = OFFSET_x86_ECX;
1422    layout->o_arg3   = OFFSET_x86_EDX;
1423    layout->o_arg4   = OFFSET_x86_ESI;
1424    layout->o_arg5   = OFFSET_x86_EDI;
1425    layout->o_arg6   = OFFSET_x86_EBP;
1426    layout->uu_arg7  = -1; /* impossible value */
1427    layout->uu_arg8  = -1; /* impossible value */
1428 
1429 #elif defined(VGP_amd64_linux)
1430    layout->o_sysno  = OFFSET_amd64_RAX;
1431    layout->o_arg1   = OFFSET_amd64_RDI;
1432    layout->o_arg2   = OFFSET_amd64_RSI;
1433    layout->o_arg3   = OFFSET_amd64_RDX;
1434    layout->o_arg4   = OFFSET_amd64_R10;
1435    layout->o_arg5   = OFFSET_amd64_R8;
1436    layout->o_arg6   = OFFSET_amd64_R9;
1437    layout->uu_arg7  = -1; /* impossible value */
1438    layout->uu_arg8  = -1; /* impossible value */
1439 
1440 #elif defined(VGP_ppc32_linux)
1441    layout->o_sysno  = OFFSET_ppc32_GPR0;
1442    layout->o_arg1   = OFFSET_ppc32_GPR3;
1443    layout->o_arg2   = OFFSET_ppc32_GPR4;
1444    layout->o_arg3   = OFFSET_ppc32_GPR5;
1445    layout->o_arg4   = OFFSET_ppc32_GPR6;
1446    layout->o_arg5   = OFFSET_ppc32_GPR7;
1447    layout->o_arg6   = OFFSET_ppc32_GPR8;
1448    layout->uu_arg7  = -1; /* impossible value */
1449    layout->uu_arg8  = -1; /* impossible value */
1450 
1451 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1452    layout->o_sysno  = OFFSET_ppc64_GPR0;
1453    layout->o_arg1   = OFFSET_ppc64_GPR3;
1454    layout->o_arg2   = OFFSET_ppc64_GPR4;
1455    layout->o_arg3   = OFFSET_ppc64_GPR5;
1456    layout->o_arg4   = OFFSET_ppc64_GPR6;
1457    layout->o_arg5   = OFFSET_ppc64_GPR7;
1458    layout->o_arg6   = OFFSET_ppc64_GPR8;
1459    layout->uu_arg7  = -1; /* impossible value */
1460    layout->uu_arg8  = -1; /* impossible value */
1461 
1462 #elif defined(VGP_arm_linux)
1463    layout->o_sysno  = OFFSET_arm_R7;
1464    layout->o_arg1   = OFFSET_arm_R0;
1465    layout->o_arg2   = OFFSET_arm_R1;
1466    layout->o_arg3   = OFFSET_arm_R2;
1467    layout->o_arg4   = OFFSET_arm_R3;
1468    layout->o_arg5   = OFFSET_arm_R4;
1469    layout->o_arg6   = OFFSET_arm_R5;
1470    layout->uu_arg7  = -1; /* impossible value */
1471    layout->uu_arg8  = -1; /* impossible value */
1472 
1473 #elif defined(VGP_arm64_linux)
1474    layout->o_sysno  = OFFSET_arm64_X8;
1475    layout->o_arg1   = OFFSET_arm64_X0;
1476    layout->o_arg2   = OFFSET_arm64_X1;
1477    layout->o_arg3   = OFFSET_arm64_X2;
1478    layout->o_arg4   = OFFSET_arm64_X3;
1479    layout->o_arg5   = OFFSET_arm64_X4;
1480    layout->o_arg6   = OFFSET_arm64_X5;
1481    layout->uu_arg7  = -1; /* impossible value */
1482    layout->uu_arg8  = -1; /* impossible value */
1483 
1484 #elif defined(VGP_mips32_linux)
1485    layout->o_sysno  = OFFSET_mips32_r2;
1486    layout->o_arg1   = OFFSET_mips32_r4;
1487    layout->o_arg2   = OFFSET_mips32_r5;
1488    layout->o_arg3   = OFFSET_mips32_r6;
1489    layout->o_arg4   = OFFSET_mips32_r7;
1490    layout->s_arg5   = sizeof(UWord) * 4;
1491    layout->s_arg6   = sizeof(UWord) * 5;
1492    layout->uu_arg7  = -1; /* impossible value */
1493    layout->uu_arg8  = -1; /* impossible value */
1494 
1495 #elif defined(VGP_mips64_linux)
1496    layout->o_sysno  = OFFSET_mips64_r2;
1497    layout->o_arg1   = OFFSET_mips64_r4;
1498    layout->o_arg2   = OFFSET_mips64_r5;
1499    layout->o_arg3   = OFFSET_mips64_r6;
1500    layout->o_arg4   = OFFSET_mips64_r7;
1501    layout->o_arg5   = OFFSET_mips64_r8;
1502    layout->o_arg6   = OFFSET_mips64_r9;
1503    layout->uu_arg7  = -1; /* impossible value */
1504    layout->uu_arg8  = -1; /* impossible value */
1505 
1506 #elif defined(VGP_x86_darwin)
1507    layout->o_sysno  = OFFSET_x86_EAX;
1508    // syscall parameters are on stack in C convention
1509    layout->s_arg1   = sizeof(UWord) * 1;
1510    layout->s_arg2   = sizeof(UWord) * 2;
1511    layout->s_arg3   = sizeof(UWord) * 3;
1512    layout->s_arg4   = sizeof(UWord) * 4;
1513    layout->s_arg5   = sizeof(UWord) * 5;
1514    layout->s_arg6   = sizeof(UWord) * 6;
1515    layout->s_arg7   = sizeof(UWord) * 7;
1516    layout->s_arg8   = sizeof(UWord) * 8;
1517 
1518 #elif defined(VGP_amd64_darwin)
1519    layout->o_sysno  = OFFSET_amd64_RAX;
1520    layout->o_arg1   = OFFSET_amd64_RDI;
1521    layout->o_arg2   = OFFSET_amd64_RSI;
1522    layout->o_arg3   = OFFSET_amd64_RDX;
1523    layout->o_arg4   = OFFSET_amd64_RCX;
1524    layout->o_arg5   = OFFSET_amd64_R8;
1525    layout->o_arg6   = OFFSET_amd64_R9;
1526    layout->s_arg7   = sizeof(UWord) * 1;
1527    layout->s_arg8   = sizeof(UWord) * 2;
1528 
1529 #elif defined(VGP_s390x_linux)
1530    layout->o_sysno  = OFFSET_s390x_SYSNO;
1531    layout->o_arg1   = OFFSET_s390x_r2;
1532    layout->o_arg2   = OFFSET_s390x_r3;
1533    layout->o_arg3   = OFFSET_s390x_r4;
1534    layout->o_arg4   = OFFSET_s390x_r5;
1535    layout->o_arg5   = OFFSET_s390x_r6;
1536    layout->o_arg6   = OFFSET_s390x_r7;
1537    layout->uu_arg7  = -1; /* impossible value */
1538    layout->uu_arg8  = -1; /* impossible value */
1539 
1540 #elif defined(VGP_tilegx_linux)
1541    layout->o_sysno  = OFFSET_tilegx_r(10);
1542    layout->o_arg1   = OFFSET_tilegx_r(0);
1543    layout->o_arg2   = OFFSET_tilegx_r(1);
1544    layout->o_arg3   = OFFSET_tilegx_r(2);
1545    layout->o_arg4   = OFFSET_tilegx_r(3);
1546    layout->o_arg5   = OFFSET_tilegx_r(4);
1547    layout->o_arg6   = OFFSET_tilegx_r(5);
1548    layout->uu_arg7  = -1; /* impossible value */
1549    layout->uu_arg8  = -1; /* impossible value */
1550 
1551 #elif defined(VGP_x86_solaris)
1552    layout->o_sysno  = OFFSET_x86_EAX;
1553    /* Syscall parameters are on the stack. */
1554    layout->s_arg1   = sizeof(UWord) * 1;
1555    layout->s_arg2   = sizeof(UWord) * 2;
1556    layout->s_arg3   = sizeof(UWord) * 3;
1557    layout->s_arg4   = sizeof(UWord) * 4;
1558    layout->s_arg5   = sizeof(UWord) * 5;
1559    layout->s_arg6   = sizeof(UWord) * 6;
1560    layout->s_arg7   = sizeof(UWord) * 7;
1561    layout->s_arg8   = sizeof(UWord) * 8;
1562 
1563 #elif defined(VGP_amd64_solaris)
1564    layout->o_sysno  = OFFSET_amd64_RAX;
1565    layout->o_arg1   = OFFSET_amd64_RDI;
1566    layout->o_arg2   = OFFSET_amd64_RSI;
1567    layout->o_arg3   = OFFSET_amd64_RDX;
1568    layout->o_arg4   = OFFSET_amd64_R10;
1569    layout->o_arg5   = OFFSET_amd64_R8;
1570    layout->o_arg6   = OFFSET_amd64_R9;
1571    layout->s_arg7   = sizeof(UWord) * 1;
1572    layout->s_arg8   = sizeof(UWord) * 2;
1573 
1574 #else
1575 #  error "getSyscallLayout: unknown arch"
1576 #endif
1577 }
1578 
1579 
1580 /* ---------------------------------------------------------------------
1581    The main driver logic
1582    ------------------------------------------------------------------ */
1583 
1584 /* Finding the handlers for a given syscall, or faking up one
1585    when no handler is found. */
1586 
1587 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1588 void bad_before ( ThreadId              tid,
1589                   SyscallArgLayout*     layout,
1590                   /*MOD*/SyscallArgs*   args,
1591                   /*OUT*/SyscallStatus* status,
1592                   /*OUT*/UWord*         flags )
1593 {
1594    VG_(dmsg)("WARNING: unhandled %s syscall: %s\n",
1595       VG_PLATFORM, VG_SYSNUM_STRING(args->sysno));
1596    if (VG_(clo_verbosity) > 1) {
1597       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1598    }
1599    VG_(dmsg)("You may be able to write your own handler.\n");
1600    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1601    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
1602    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1603 
1604    SET_STATUS_Failure(VKI_ENOSYS);
1605 
1606 #  if defined(VGO_solaris)
1607    VG_(exit)(1);
1608 #  endif
1609 }
1610 
1611 static SyscallTableEntry bad_sys =
1612    { bad_before, NULL };
1613 
get_syscall_entry(Int syscallno)1614 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1615 {
1616    const SyscallTableEntry* sys = NULL;
1617 
1618 #  if defined(VGO_linux)
1619    sys = ML_(get_linux_syscall_entry)( syscallno );
1620 
1621 #  elif defined(VGO_darwin)
1622    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1623 
1624    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1625    case VG_DARWIN_SYSCALL_CLASS_UNIX:
1626       if (idx >= 0 && idx < ML_(syscall_table_size) &&
1627           ML_(syscall_table)[idx].before != NULL)
1628          sys = &ML_(syscall_table)[idx];
1629          break;
1630    case VG_DARWIN_SYSCALL_CLASS_MACH:
1631       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1632           ML_(mach_trap_table)[idx].before != NULL)
1633          sys = &ML_(mach_trap_table)[idx];
1634          break;
1635    case VG_DARWIN_SYSCALL_CLASS_MDEP:
1636       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1637           ML_(mdep_trap_table)[idx].before != NULL)
1638          sys = &ML_(mdep_trap_table)[idx];
1639          break;
1640    default:
1641       vg_assert(0);
1642       break;
1643    }
1644 
1645 #  elif defined(VGO_solaris)
1646    sys = ML_(get_solaris_syscall_entry)(syscallno);
1647 
1648 #  else
1649 #    error Unknown OS
1650 #  endif
1651 
1652    return sys == NULL  ? &bad_sys  : sys;
1653 }
1654 
1655 
1656 /* Add and remove signals from mask so that we end up telling the
1657    kernel the state we actually want rather than what the client
1658    wants. */
sanitize_client_sigmask(vki_sigset_t * mask)1659 static void sanitize_client_sigmask(vki_sigset_t *mask)
1660 {
1661    VG_(sigdelset)(mask, VKI_SIGKILL);
1662    VG_(sigdelset)(mask, VKI_SIGSTOP);
1663    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1664 }
1665 
1666 typedef
1667    struct {
1668       SyscallArgs   orig_args;
1669       SyscallArgs   args;
1670       SyscallStatus status;
1671       UWord         flags;
1672    }
1673    SyscallInfo;
1674 
1675 SyscallInfo *syscallInfo;
1676 
1677 /* The scheduler needs to be able to zero out these records after a
1678    fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1679 void VG_(clear_syscallInfo) ( Int tid )
1680 {
1681    vg_assert(syscallInfo);
1682    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1683    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1684    syscallInfo[tid].status.what = SsIdle;
1685 }
1686 
VG_(is_in_syscall)1687 Bool VG_(is_in_syscall) ( Int tid )
1688 {
1689    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1690    return (syscallInfo[tid].status.what != SsIdle);
1691 }
1692 
ensure_initialised(void)1693 static void ensure_initialised ( void )
1694 {
1695    Int i;
1696    static Bool init_done = False;
1697    if (init_done)
1698       return;
1699    init_done = True;
1700 
1701    syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
1702 
1703    for (i = 0; i < VG_N_THREADS; i++) {
1704       VG_(clear_syscallInfo)( i );
1705    }
1706 }
1707 
1708 /* --- This is the main function of this file. --- */
1709 
VG_(client_syscall)1710 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1711 {
1712    Word                     sysno;
1713    ThreadState*             tst;
1714    const SyscallTableEntry* ent;
1715    SyscallArgLayout         layout;
1716    SyscallInfo*             sci;
1717 
1718    ensure_initialised();
1719 
1720    vg_assert(VG_(is_valid_tid)(tid));
1721    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1722    vg_assert(VG_(is_running_thread)(tid));
1723 
1724 #  if !defined(VGO_darwin)
1725    // Resync filtering is meaningless on non-Darwin targets.
1726    vg_assert(VG_(clo_resync_filter) == 0);
1727 #  endif
1728 
1729    tst = VG_(get_ThreadState)(tid);
1730 
1731    /* BEGIN ensure root thread's stack is suitably mapped */
1732    /* In some rare circumstances, we may do the syscall without the
1733       bottom page of the stack being mapped, because the stack pointer
1734       was moved down just a few instructions before the syscall
1735       instruction, and there have been no memory references since
1736       then, that would cause a call to VG_(extend_stack) to have
1737       happened.
1738 
1739       In native execution that's OK: the kernel automagically extends
1740       the stack's mapped area down to cover the stack pointer (or sp -
1741       redzone, really).  In simulated normal execution that's OK too,
1742       since any signals we get from accessing below the mapped area of
1743       the (guest's) stack lead us to VG_(extend_stack), where we
1744       simulate the kernel's stack extension logic.  But that leaves
1745       the problem of entering a syscall with the SP unmapped.  Because
1746       the kernel doesn't know that the segment immediately above SP is
1747       supposed to be a grow-down segment, it causes the syscall to
1748       fail, and thereby causes a divergence between native behaviour
1749       (syscall succeeds) and simulated behaviour (syscall fails).
1750 
1751       This is quite a rare failure mode.  It has only been seen
1752       affecting calls to sys_readlink on amd64-linux, and even then it
1753       requires a certain code sequence around the syscall to trigger
1754       it.  Here is one:
1755 
1756       extern int my_readlink ( const char* path );
1757       asm(
1758       ".text\n"
1759       ".globl my_readlink\n"
1760       "my_readlink:\n"
1761       "\tsubq    $0x1008,%rsp\n"
1762       "\tmovq    %rdi,%rdi\n"              // path is in rdi
1763       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
1764       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
1765       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
1766       "\tsyscall\n"
1767       "\taddq    $0x1008,%rsp\n"
1768       "\tret\n"
1769       ".previous\n"
1770       );
1771 
1772       For more details, see bug #156404
1773       (https://bugs.kde.org/show_bug.cgi?id=156404).
1774 
1775       The fix is actually very simple.  We simply need to call
1776       VG_(extend_stack) for this thread, handing it the lowest
1777       possible valid address for stack (sp - redzone), to ensure the
1778       pages all the way down to that address, are mapped.  Because
1779       this is a potentially expensive and frequent operation, we
1780       do the following:
1781 
1782       Only the main thread (tid=1) has a growdown stack.  So
1783       ignore all others.  It is conceivable, although highly unlikely,
1784       that the main thread exits, and later another thread is
1785       allocated tid=1, but that's harmless, I believe;
1786       VG_(extend_stack) will do nothing when applied to a non-root
1787       thread.
1788 
1789       All this guff is of course Linux-specific.  Hence the ifdef.
1790    */
1791 #  if defined(VGO_linux)
1792    if (tid == 1/*ROOT THREAD*/) {
1793       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1794 
1795       /* The precise thing to do here would be to extend the stack only
1796          if the system call can be proven to access unmapped user stack
1797          memory. That is an enormous amount of work even if a proper
1798          spec of system calls was available.
1799 
1800          In the case where the system call does not access user memory
1801          the stack pointer here can have any value. A legitimate testcase
1802          that exercises this is none/tests/s390x/stmg.c:
1803          The stack pointer happens to be in the reservation segment near
1804          the end of the addressable memory and there is no SkAnonC segment
1805          above.
1806 
1807          So the approximation we're taking here is to extend the stack only
1808          if the client stack pointer does not look bogus. */
1809       if (VG_(am_addr_is_in_extensible_client_stack)(stackMin))
1810          VG_(extend_stack)( tid, stackMin );
1811    }
1812 #  endif
1813    /* END ensure root thread's stack is suitably mapped */
1814 
1815    /* First off, get the syscall args and number.  This is a
1816       platform-dependent action. */
1817 
1818    sci = & syscallInfo[tid];
1819    vg_assert(sci->status.what == SsIdle);
1820 
1821    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1822 
1823    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
1824       we want to keep the originals too, just in case. */
1825    sci->args = sci->orig_args;
1826 
1827    /* Save the syscall number in the thread state in case the syscall
1828       is interrupted by a signal. */
1829    sysno = sci->orig_args.sysno;
1830 
1831    /* It's sometimes useful, as a crude debugging hack, to get a
1832       stack trace at each (or selected) syscalls. */
1833    if (0 && sysno == __NR_ioctl) {
1834       VG_(umsg)("\nioctl:\n");
1835       VG_(get_and_pp_StackTrace)(tid, 10);
1836       VG_(umsg)("\n");
1837    }
1838 
1839 #  if defined(VGO_darwin)
1840    /* Record syscall class.  But why?  Because the syscall might be
1841       interrupted by a signal, and in the signal handler (which will
1842       be m_signals.async_signalhandler) we will need to build a SysRes
1843       reflecting the syscall return result.  In order to do that we
1844       need to know the syscall class.  Hence stash it in the guest
1845       state of this thread.  This madness is not needed on Linux
1846       because it only has a single syscall return convention and so
1847       there is no ambiguity involved in converting the post-signal
1848       machine state into a SysRes. */
1849    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1850 #  endif
1851 
1852    /* The default what-to-do-next thing is hand the syscall to the
1853       kernel, so we pre-set that here.  Set .sres to something
1854       harmless looking (is irrelevant because .what is not
1855       SsComplete.) */
1856    sci->status.what = SsHandToKernel;
1857    sci->status.sres = VG_(mk_SysRes_Error)(0);
1858    sci->flags       = 0;
1859 
1860    /* Fetch the syscall's handlers.  If no handlers exist for this
1861       syscall, we are given dummy handlers which force an immediate
1862       return with ENOSYS. */
1863    ent = get_syscall_entry(sysno);
1864 
1865    /* Fetch the layout information, which tells us where in the guest
1866       state the syscall args reside.  This is a platform-dependent
1867       action.  This info is needed so that the scalar syscall argument
1868       checks (PRE_REG_READ calls) know which bits of the guest state
1869       they need to inspect. */
1870    getSyscallArgLayout( &layout );
1871 
1872    /* Make sure the tmp signal mask matches the real signal mask;
1873       sigsuspend may change this. */
1874    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1875 
1876    /* Right, we're finally ready to Party.  Call the pre-handler and
1877       see what we get back.  At this point:
1878 
1879         sci->status.what  is Unset (we don't know yet).
1880         sci->orig_args    contains the original args.
1881         sci->args         is the same as sci->orig_args.
1882         sci->flags        is zero.
1883    */
1884 
1885    PRINT("SYSCALL[%d,%u](%s) ",
1886       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1887 
1888    /* Do any pre-syscall actions */
1889    if (VG_(needs).syscall_wrapper) {
1890       UWord tmpv[8];
1891       tmpv[0] = sci->orig_args.arg1;
1892       tmpv[1] = sci->orig_args.arg2;
1893       tmpv[2] = sci->orig_args.arg3;
1894       tmpv[3] = sci->orig_args.arg4;
1895       tmpv[4] = sci->orig_args.arg5;
1896       tmpv[5] = sci->orig_args.arg6;
1897       tmpv[6] = sci->orig_args.arg7;
1898       tmpv[7] = sci->orig_args.arg8;
1899       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1900                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1901    }
1902 
1903    vg_assert(ent);
1904    vg_assert(ent->before);
1905    (ent->before)( tid,
1906                   &layout,
1907                   &sci->args, &sci->status, &sci->flags );
1908 
1909    /* The pre-handler may have modified:
1910          sci->args
1911          sci->status
1912          sci->flags
1913       All else remains unchanged.
1914       Although the args may be modified, pre handlers are not allowed
1915       to change the syscall number.
1916    */
1917    /* Now we proceed according to what the pre-handler decided. */
1918    vg_assert(sci->status.what == SsHandToKernel
1919              || sci->status.what == SsComplete);
1920    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1921 
1922    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1923       /* The pre-handler completed the syscall itself, declaring
1924          success. */
1925       if (sci->flags & SfNoWriteResult) {
1926          PRINT(" --> [pre-success] NoWriteResult");
1927       } else {
1928          PRINT(" --> [pre-success] %s", VG_(sr_as_string)(sci->status.sres));
1929       }
1930       /* In this case the allowable flags are to ask for a signal-poll
1931          and/or a yield after the call.  Changing the args isn't
1932          allowed. */
1933       vg_assert(0 == (sci->flags
1934                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1935       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1936    }
1937 
1938    else
1939    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1940       /* The pre-handler decided to fail syscall itself. */
1941       PRINT(" --> [pre-fail] %s", VG_(sr_as_string)(sci->status.sres));
1942       /* In this case, the pre-handler is also allowed to ask for the
1943          post-handler to be run anyway.  Changing the args is not
1944          allowed. */
1945       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1946       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1947    }
1948 
1949    else
1950    if (sci->status.what != SsHandToKernel) {
1951       /* huh?! */
1952       vg_assert(0);
1953    }
1954 
1955    else /* (sci->status.what == HandToKernel) */ {
1956       /* Ok, this is the usual case -- and the complicated one.  There
1957          are two subcases: sync and async.  async is the general case
1958          and is to be used when there is any possibility that the
1959          syscall might block [a fact that the pre-handler must tell us
1960          via the sci->flags field.]  Because the tidying-away /
1961          context-switch overhead of the async case could be large, if
1962          we are sure that the syscall will not block, we fast-track it
1963          by doing it directly in this thread, which is a lot
1964          simpler. */
1965 
1966       /* Check that the given flags are allowable: MayBlock, PollAfter
1967          and PostOnFail are ok. */
1968       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1969 
1970       if (sci->flags & SfMayBlock) {
1971 
1972          /* Syscall may block, so run it asynchronously */
1973          vki_sigset_t mask;
1974 
1975          PRINT(" --> [async] ... \n");
1976 
1977          mask = tst->sig_mask;
1978          sanitize_client_sigmask(&mask);
1979 
1980          /* Gack.  More impedance matching.  Copy the possibly
1981             modified syscall args back into the guest state. */
1982          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1983             then this assertion is senseless:
1984               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1985             The case that exposed it was sys_posix_spawn on Darwin,
1986             which heavily modifies its arguments but then lets the call
1987             go through anyway, with SfToBlock set, hence we end up here. */
1988          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1989 
1990          /* SfNoWriteResult flag is invalid for blocking signals because
1991             do_syscall_for_client() directly modifies the guest state. */
1992          vg_assert(!(sci->flags & SfNoWriteResult));
1993 
1994          /* Drop the bigLock */
1995          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1996          /* Urr.  We're now in a race against other threads trying to
1997             acquire the bigLock.  I guess that doesn't matter provided
1998             that do_syscall_for_client only touches thread-local
1999             state. */
2000 
2001          /* Do the call, which operates directly on the guest state,
2002             not on our abstracted copies of the args/result. */
2003          do_syscall_for_client(sysno, tst, &mask);
2004 
2005          /* do_syscall_for_client may not return if the syscall was
2006             interrupted by a signal.  In that case, flow of control is
2007             first to m_signals.async_sighandler, which calls
2008             VG_(fixup_guest_state_after_syscall_interrupted), which
2009             fixes up the guest state, and possibly calls
2010             VG_(post_syscall).  Once that's done, control drops back
2011             to the scheduler.  */
2012 
2013          /* Darwin: do_syscall_for_client may not return if the
2014             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
2015             responded by starting the thread at wqthread_hijack(reuse=1)
2016             (to run another workqueue item). In that case, wqthread_hijack
2017             calls ML_(wqthread_continue), which is similar to
2018             VG_(fixup_guest_state_after_syscall_interrupted). */
2019 
2020          /* Reacquire the lock */
2021          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
2022 
2023          /* Even more impedance matching.  Extract the syscall status
2024             from the guest state. */
2025          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
2026          vg_assert(sci->status.what == SsComplete);
2027 
2028          /* Be decorative, if required. */
2029          if (VG_(clo_trace_syscalls)) {
2030             PRINT("SYSCALL[%d,%u](%s) ... [async] --> %s",
2031                   VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
2032                   VG_(sr_as_string)(sci->status.sres));
2033          }
2034 
2035       } else {
2036 
2037          /* run the syscall directly */
2038          /* The pre-handler may have modified the syscall args, but
2039             since we're passing values in ->args directly to the
2040             kernel, there's no point in flushing them back to the
2041             guest state.  Indeed doing so could be construed as
2042             incorrect. */
2043          SysRes sres
2044             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
2045                                      sci->args.arg3, sci->args.arg4,
2046                                      sci->args.arg5, sci->args.arg6,
2047                                      sci->args.arg7, sci->args.arg8 );
2048          sci->status = convert_SysRes_to_SyscallStatus(sres);
2049 
2050          /* Be decorative, if required. */
2051          if (VG_(clo_trace_syscalls)) {
2052            PRINT("[sync] --> %s", VG_(sr_as_string)(sci->status.sres));
2053          }
2054       }
2055    }
2056 
2057    vg_assert(sci->status.what == SsComplete);
2058 
2059    vg_assert(VG_(is_running_thread)(tid));
2060 
2061    /* Dump the syscall result back in the guest state.  This is
2062       a platform-specific action. */
2063    if (!(sci->flags & SfNoWriteResult))
2064       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
2065 
2066    /* Situation now:
2067       - the guest state is now correctly modified following the syscall
2068       - modified args, original args and syscall status are still
2069         available in the syscallInfo[] entry for this syscall.
2070 
2071       Now go on to do the post-syscall actions (read on down ..)
2072    */
2073    PRINT(" ");
2074    VG_(post_syscall)(tid);
2075    PRINT("\n");
2076 }
2077 
2078 
2079 /* Perform post syscall actions.  The expected state on entry is
2080    precisely as at the end of VG_(client_syscall), that is:
2081 
2082    - guest state up to date following the syscall
2083    - modified args, original args and syscall status are still
2084      available in the syscallInfo[] entry for this syscall.
2085    - syscall status matches what's in the guest state.
2086 
2087    There are two ways to get here: the normal way -- being called by
2088    VG_(client_syscall), and the unusual way, from
2089    VG_(fixup_guest_state_after_syscall_interrupted).
2090    Darwin: there's a third way, ML_(wqthread_continue).
2091 */
VG_(post_syscall)2092 void VG_(post_syscall) (ThreadId tid)
2093 {
2094    SyscallInfo*             sci;
2095    const SyscallTableEntry* ent;
2096    SyscallStatus            test_status;
2097    ThreadState*             tst;
2098    Word sysno;
2099 
2100    /* Preliminaries */
2101    vg_assert(VG_(is_valid_tid)(tid));
2102    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2103    vg_assert(VG_(is_running_thread)(tid));
2104 
2105    tst = VG_(get_ThreadState)(tid);
2106    sci = & syscallInfo[tid];
2107 
2108    /* m_signals.sigvgkill_handler might call here even when not in
2109       a syscall. */
2110    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
2111       sci->status.what = SsIdle;
2112       return;
2113    }
2114 
2115    /* Validate current syscallInfo entry.  In particular we require
2116       that the current .status matches what's actually in the guest
2117       state.  At least in the normal case where we have actually
2118       previously written the result into the guest state. */
2119    vg_assert(sci->status.what == SsComplete);
2120 
2121    /* Get the system call number.  Because the pre-handler isn't
2122       allowed to mess with it, it should be the same for both the
2123       original and potentially-modified args. */
2124    vg_assert(sci->args.sysno == sci->orig_args.sysno);
2125    sysno = sci->args.sysno;
2126 
2127    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
2128    if (!(sci->flags & SfNoWriteResult))
2129       vg_assert(eq_SyscallStatus( sysno, &sci->status, &test_status ));
2130    /* Failure of the above assertion on Darwin can indicate a problem
2131       in the syscall wrappers that pre-fail or pre-succeed the
2132       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
2133       when they really should call SET_STATUS_from_SysRes.  The former
2134       create a UNIX-class syscall result on Darwin, which may not be
2135       correct for the syscall; if that's the case then this assertion
2136       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
2137       non-Darwin platforms this assertion is should never fail, and this
2138       comment is completely irrelevant. */
2139    /* Ok, looks sane */
2140 
2141    /* pre: status == Complete (asserted above) */
2142    /* Consider either success or failure.  Now run the post handler if:
2143       - it exists, and
2144       - Success or (Failure and PostOnFail is set)
2145    */
2146    ent = get_syscall_entry(sysno);
2147    if (ent->after
2148        && ((!sr_isError(sci->status.sres))
2149            || (sr_isError(sci->status.sres)
2150                && (sci->flags & SfPostOnFail) ))) {
2151 
2152       (ent->after)( tid, &sci->args, &sci->status );
2153    }
2154 
2155    /* Because the post handler might have changed the status (eg, the
2156       post-handler for sys_open can change the result from success to
2157       failure if the kernel supplied a fd that it doesn't like), once
2158       again dump the syscall result back in the guest state.*/
2159    if (!(sci->flags & SfNoWriteResult))
2160       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
2161 
2162    /* Do any post-syscall actions required by the tool. */
2163    if (VG_(needs).syscall_wrapper) {
2164       UWord tmpv[8];
2165       tmpv[0] = sci->orig_args.arg1;
2166       tmpv[1] = sci->orig_args.arg2;
2167       tmpv[2] = sci->orig_args.arg3;
2168       tmpv[3] = sci->orig_args.arg4;
2169       tmpv[4] = sci->orig_args.arg5;
2170       tmpv[5] = sci->orig_args.arg6;
2171       tmpv[6] = sci->orig_args.arg7;
2172       tmpv[7] = sci->orig_args.arg8;
2173       VG_TDICT_CALL(tool_post_syscall, tid,
2174                     sysno,
2175                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
2176                     sci->status.sres);
2177    }
2178 
2179    /* The syscall is done. */
2180    vg_assert(sci->status.what == SsComplete);
2181    sci->status.what = SsIdle;
2182 
2183    /* The pre/post wrappers may have concluded that pending signals
2184       might have been created, and will have set SfPollAfter to
2185       request a poll for them once the syscall is done. */
2186    if (sci->flags & SfPollAfter)
2187       VG_(poll_signals)(tid);
2188 
2189    /* Similarly, the wrappers might have asked for a yield
2190       afterwards. */
2191    if (sci->flags & SfYieldAfter)
2192       VG_(vg_yield)();
2193 }
2194 
2195 
2196 /* ---------------------------------------------------------------------
2197    Dealing with syscalls which get interrupted by a signal:
2198    VG_(fixup_guest_state_after_syscall_interrupted)
2199    ------------------------------------------------------------------ */
2200 
2201 /* Syscalls done on behalf of the client are finally handed off to the
2202    kernel in VG_(client_syscall) above, either by calling
2203    do_syscall_for_client (the async case), or by calling
2204    VG_(do_syscall6) (the sync case).
2205 
2206    If the syscall is not interrupted by a signal (it may block and
2207    later unblock, but that's irrelevant here) then those functions
2208    eventually return and so control is passed to VG_(post_syscall).
2209    NB: not sure if the sync case can actually get interrupted, as it
2210    operates with all signals masked.
2211 
2212    However, the syscall may get interrupted by an async-signal.  In
2213    that case do_syscall_for_client/VG_(do_syscall6) do not
2214    return.  Instead we wind up in m_signals.async_sighandler.  We need
2215    to fix up the guest state to make it look like the syscall was
2216    interrupted for guest.  So async_sighandler calls here, and this
2217    does the fixup.  Note that from here we wind up calling
2218    VG_(post_syscall) too.
2219 */
2220 
2221 
2222 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
2223    syscall-$PLAT.S for details.
2224 */
2225 #if defined(VGO_linux)
2226   extern const Addr ML_(blksys_setup);
2227   extern const Addr ML_(blksys_restart);
2228   extern const Addr ML_(blksys_complete);
2229   extern const Addr ML_(blksys_committed);
2230   extern const Addr ML_(blksys_finished);
2231 #elif defined(VGO_darwin)
2232   /* Darwin requires extra uglyness */
2233   extern const Addr ML_(blksys_setup_MACH);
2234   extern const Addr ML_(blksys_restart_MACH);
2235   extern const Addr ML_(blksys_complete_MACH);
2236   extern const Addr ML_(blksys_committed_MACH);
2237   extern const Addr ML_(blksys_finished_MACH);
2238   extern const Addr ML_(blksys_setup_MDEP);
2239   extern const Addr ML_(blksys_restart_MDEP);
2240   extern const Addr ML_(blksys_complete_MDEP);
2241   extern const Addr ML_(blksys_committed_MDEP);
2242   extern const Addr ML_(blksys_finished_MDEP);
2243   extern const Addr ML_(blksys_setup_UNIX);
2244   extern const Addr ML_(blksys_restart_UNIX);
2245   extern const Addr ML_(blksys_complete_UNIX);
2246   extern const Addr ML_(blksys_committed_UNIX);
2247   extern const Addr ML_(blksys_finished_UNIX);
2248 #elif defined(VGO_solaris)
2249   extern const Addr ML_(blksys_setup);
2250   extern const Addr ML_(blksys_complete);
2251   extern const Addr ML_(blksys_committed);
2252   extern const Addr ML_(blksys_finished);
2253   extern const Addr ML_(blksys_setup_DRET);
2254   extern const Addr ML_(blksys_complete_DRET);
2255   extern const Addr ML_(blksys_committed_DRET);
2256   extern const Addr ML_(blksys_finished_DRET);
2257 #else
2258 # error "Unknown OS"
2259 #endif
2260 
2261 
2262 /* Back up guest state to restart a system call. */
2263 
ML_(fixup_guest_state_to_restart_syscall)2264 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
2265 {
2266 #if defined(VGP_x86_linux)
2267    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
2268 
2269    /* Make sure our caller is actually sane, and we're really backing
2270       back over a syscall.
2271 
2272       int $0x80 == CD 80
2273    */
2274    {
2275       UChar *p = (UChar *)arch->vex.guest_EIP;
2276 
2277       if (p[0] != 0xcd || p[1] != 0x80)
2278          VG_(message)(Vg_DebugMsg,
2279                       "?! restarting over syscall at %#x %02x %02x\n",
2280                       arch->vex.guest_EIP, p[0], p[1]);
2281 
2282       vg_assert(p[0] == 0xcd && p[1] == 0x80);
2283    }
2284 
2285 #elif defined(VGP_amd64_linux)
2286    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
2287 
2288    /* Make sure our caller is actually sane, and we're really backing
2289       back over a syscall.
2290 
2291       syscall == 0F 05
2292    */
2293    {
2294       UChar *p = (UChar *)arch->vex.guest_RIP;
2295 
2296       if (p[0] != 0x0F || p[1] != 0x05)
2297          VG_(message)(Vg_DebugMsg,
2298                       "?! restarting over syscall at %#llx %02x %02x\n",
2299                       arch->vex.guest_RIP, p[0], p[1]);
2300 
2301       vg_assert(p[0] == 0x0F && p[1] == 0x05);
2302    }
2303 
2304 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
2305    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
2306 
2307    /* Make sure our caller is actually sane, and we're really backing
2308       back over a syscall.
2309 
2310       sc == 44 00 00 02
2311    */
2312    {
2313       UChar *p = (UChar *)arch->vex.guest_CIA;
2314 
2315       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
2316          VG_(message)(Vg_DebugMsg,
2317                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2318                       (ULong)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
2319 
2320       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2321    }
2322 
2323 #elif defined(VGP_ppc64le_linux)
2324    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
2325 
2326    /* Make sure our caller is actually sane, and we're really backing
2327       back over a syscall.
2328 
2329       sc == 44 00 00 02
2330    */
2331    {
2332       UChar *p = (UChar *)arch->vex.guest_CIA;
2333 
2334       if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
2335          VG_(message)(Vg_DebugMsg,
2336                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2337                       arch->vex.guest_CIA, p[3], p[2], p[1], p[0]);
2338 
2339       vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
2340    }
2341 
2342 #elif defined(VGP_arm_linux)
2343    if (arch->vex.guest_R15T & 1) {
2344       // Thumb mode.  SVC is a encoded as
2345       //   1101 1111 imm8
2346       // where imm8 is the SVC number, and we only accept 0.
2347       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
2348       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
2349       Bool   valid = p[0] == 0 && p[1] == 0xDF;
2350       if (!valid) {
2351          VG_(message)(Vg_DebugMsg,
2352                       "?! restarting over (Thumb) syscall that is not syscall "
2353                       "at %#x %02x %02x\n",
2354                       arch->vex.guest_R15T - 1, p[0], p[1]);
2355       }
2356       vg_assert(valid);
2357       // FIXME: NOTE, this really isn't right.  We need to back up
2358       // ITSTATE to what it was before the SVC instruction, but we
2359       // don't know what it was.  At least assert that it is now
2360       // zero, because if it is nonzero then it must also have
2361       // been nonzero for the SVC itself, which means it was
2362       // conditional.  Urk.
2363       vg_assert(arch->vex.guest_ITSTATE == 0);
2364    } else {
2365       // ARM mode.  SVC is encoded as
2366       //   cond 1111 imm24
2367       // where imm24 is the SVC number, and we only accept 0.
2368       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
2369       UChar* p     = (UChar*)arch->vex.guest_R15T;
2370       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
2371                      && (p[3] & 0xF) == 0xF;
2372       if (!valid) {
2373          VG_(message)(Vg_DebugMsg,
2374                       "?! restarting over (ARM) syscall that is not syscall "
2375                       "at %#x %02x %02x %02x %02x\n",
2376                       arch->vex.guest_R15T, p[0], p[1], p[2], p[3]);
2377       }
2378       vg_assert(valid);
2379    }
2380 
2381 #elif defined(VGP_arm64_linux)
2382    arch->vex.guest_PC -= 4;             // sizeof(arm64 instr)
2383 
2384    /* Make sure our caller is actually sane, and we're really backing
2385       back over a syscall.
2386 
2387       svc #0 == d4 00 00 01
2388    */
2389    {
2390       UChar *p = (UChar *)arch->vex.guest_PC;
2391 
2392       if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4)
2393          VG_(message)(
2394             Vg_DebugMsg,
2395             "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2396             arch->vex.guest_PC, p[0], p[1], p[2], p[3]
2397           );
2398 
2399       vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4);
2400    }
2401 
2402 #elif defined(VGP_x86_darwin)
2403    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
2404 
2405    /* Make sure our caller is actually sane, and we're really backing
2406       back over a syscall.
2407 
2408       int $0x80 == CD 80  // Used to communicate with BSD syscalls
2409       int $0x81 == CD 81  // Used to communicate with Mach traps
2410       int $0x82 == CD 82  // Used to communicate with "thread" ?
2411       sysenter  == 0F 34  // Used to communicate with Unix syscalls
2412    */
2413    {
2414        UChar *p = (UChar *)arch->vex.guest_EIP;
2415        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
2416                   || (p[0] == 0xCD && p[1] == 0x81)
2417                   || (p[0] == 0xCD && p[1] == 0x82)
2418                   || (p[0] == 0x0F && p[1] == 0x34);
2419        if (!ok)
2420            VG_(message)(Vg_DebugMsg,
2421                         "?! restarting over syscall at %#x %02x %02x\n",
2422                         arch->vex.guest_EIP, p[0], p[1]);
2423        vg_assert(ok);
2424    }
2425 
2426 #elif defined(VGP_amd64_darwin)
2427    arch->vex.guest_RIP = arch->vex.guest_IP_AT_SYSCALL;
2428 
2429    /* Make sure our caller is actually sane, and we're really backing
2430       back over a syscall.
2431 
2432       syscall   == 0F 05
2433    */
2434    {
2435        UChar *p = (UChar *)arch->vex.guest_RIP;
2436 
2437        Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
2438        if (!ok)
2439            VG_(message)(Vg_DebugMsg,
2440                         "?! restarting over syscall at %#llx %02x %02x\n",
2441                         arch->vex.guest_RIP, p[0], p[1]);
2442        vg_assert(ok);
2443    }
2444 
2445 #elif defined(VGP_s390x_linux)
2446    arch->vex.guest_IA -= 2;             // sizeof(syscall)
2447 
2448    /* Make sure our caller is actually sane, and we're really backing
2449       back over a syscall.
2450 
2451       syscall == 0A <num>
2452    */
2453    {
2454       UChar *p = (UChar *)arch->vex.guest_IA;
2455       if (p[0] != 0x0A)
2456          VG_(message)(Vg_DebugMsg,
2457                       "?! restarting over syscall at %#llx %02x %02x\n",
2458                       arch->vex.guest_IA, p[0], p[1]);
2459 
2460       vg_assert(p[0] == 0x0A);
2461    }
2462 
2463 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
2464 
2465    arch->vex.guest_PC -= 4;             // sizeof(mips instr)
2466 
2467    /* Make sure our caller is actually sane, and we're really backing
2468       back over a syscall.
2469 
2470       syscall == 00 00 00 0C
2471       big endian
2472       syscall == 0C 00 00 00
2473    */
2474    {
2475       UChar *p = (UChar *)(arch->vex.guest_PC);
2476 #     if defined (VG_LITTLEENDIAN)
2477       if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
2478          VG_(message)(Vg_DebugMsg,
2479                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2480                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2481 
2482       vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
2483 #     elif defined (VG_BIGENDIAN)
2484       if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
2485          VG_(message)(Vg_DebugMsg,
2486                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2487                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2488 
2489       vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
2490 #     else
2491 #        error "Unknown endianness"
2492 #     endif
2493    }
2494 #elif defined(VGP_tilegx_linux)
2495    arch->vex.guest_pc -= 8;             // sizeof({ swint1 })
2496 
2497    /* Make sure our caller is actually sane, and we're really backing
2498       back over a syscall. no other instruction in same bundle.
2499    */
2500    {
2501       unsigned long *p = (unsigned long *)arch->vex.guest_pc;
2502 
2503       if (p[0] != 0x286b180051485000ULL )  // "swint1", little enidan only
2504          VG_(message)(Vg_DebugMsg,
2505                       "?! restarting over syscall at 0x%lx %lx\n",
2506                       arch->vex.guest_pc, p[0]);
2507       vg_assert(p[0] == 0x286b180051485000ULL);
2508    }
2509 
2510 #elif defined(VGP_x86_solaris)
2511    arch->vex.guest_EIP -= 2;   // sizeof(int $0x91) or sizeof(syscall)
2512 
2513    /* Make sure our caller is actually sane, and we're really backing
2514       back over a syscall.
2515 
2516       int $0x91 == CD 91
2517       syscall   == 0F 05
2518       sysenter  == 0F 34
2519 
2520       Handle also other syscall instructions because we also handle them in
2521       the scheduler.
2522       int $0x80 == CD 80
2523       int $0x81 == CD 81
2524       int $0x82 == CD 82
2525    */
2526    {
2527       UChar *p = (UChar *)arch->vex.guest_EIP;
2528 
2529       Bool  ok = (p[0] == 0xCD && p[1] == 0x91)
2530                   || (p[0] == 0x0F && p[1] == 0x05)
2531                   || (p[0] == 0x0F && p[1] == 0x34)
2532                   || (p[0] == 0xCD && p[1] == 0x80)
2533                   || (p[0] == 0xCD && p[1] == 0x81)
2534                   || (p[0] == 0xCD && p[1] == 0x82);
2535       if (!ok)
2536          VG_(message)(Vg_DebugMsg,
2537                       "?! restarting over syscall at %#x %02x %02x\n",
2538                       arch->vex.guest_EIP, p[0], p[1]);
2539       vg_assert(ok);
2540    }
2541 
2542 #elif defined(VGP_amd64_solaris)
2543    arch->vex.guest_RIP -= 2;   // sizeof(syscall)
2544 
2545    /* Make sure our caller is actually sane, and we're really backing
2546       back over a syscall.
2547 
2548       syscall   == 0F 05
2549    */
2550    {
2551       UChar *p = (UChar *)arch->vex.guest_RIP;
2552 
2553       Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
2554       if (!ok)
2555          VG_(message)(Vg_DebugMsg,
2556                       "?! restarting over syscall at %#llx %02x %02x\n",
2557                       arch->vex.guest_RIP, p[0], p[1]);
2558       vg_assert(ok);
2559    }
2560 
2561 #else
2562 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2563 #endif
2564 }
2565 
2566 
2567 /*
2568    Fix up the guest state when a syscall is interrupted by a signal
2569    and so has been forced to return 'sysret'.
2570 
2571    To do this, we determine the precise state of the syscall by
2572    looking at the (real) IP at the time the signal happened.  The
2573    syscall sequence looks like:
2574 
2575      1. unblock signals
2576      2. perform syscall
2577      3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
2578      4. re-block signals
2579 
2580    If a signal
2581    happens at      Then     Why?
2582    [1-2)           restart  nothing has happened (restart syscall)
2583    [2]             restart  syscall hasn't started, or kernel wants to restart
2584    [2-3)           save     syscall complete, but results not saved
2585    [3-4)           syscall complete, results saved
2586 
2587    Sometimes we never want to restart an interrupted syscall (because
2588    sigaction says not to), so we only restart if "restart" is True.
2589 
2590    This will also call VG_(post_syscall) if the syscall has actually
2591    completed (either because it was interrupted, or because it
2592    actually finished).  It will not call VG_(post_syscall) if the
2593    syscall is set up for restart, which means that the pre-wrapper may
2594    get called multiple times.
2595 */
2596 
2597 void
VG_(fixup_guest_state_after_syscall_interrupted)2598 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2599                                                   Addr     ip,
2600                                                   SysRes   sres,
2601                                                   Bool     restart,
2602                                                   struct vki_ucontext *uc)
2603 {
2604    /* Note that we don't know the syscall number here, since (1) in
2605       general there's no reliable way to get hold of it short of
2606       stashing it in the guest state before the syscall, and (2) in
2607       any case we don't need to know it for the actions done by this
2608       routine.
2609 
2610       Furthermore, 'sres' is only used in the case where the syscall
2611       is complete, but the result has not been committed to the guest
2612       state yet.  In any other situation it will be meaningless and
2613       therefore ignored. */
2614 
2615    ThreadState*     tst;
2616    SyscallStatus    canonical;
2617    ThreadArchState* th_regs;
2618    SyscallInfo*     sci;
2619 
2620    /* Compute some Booleans indicating which range we're in. */
2621    Bool outside_range,
2622         in_setup_to_restart,      // [1,2) in the .S files
2623         at_restart,               // [2]   in the .S files
2624         in_complete_to_committed, // [3,4) in the .S files
2625         in_committed_to_finished; // [4,5) in the .S files
2626 
2627    if (VG_(clo_trace_signals))
2628       VG_(message)( Vg_DebugMsg,
2629                     "interrupted_syscall: tid=%u, ip=%#lx, "
2630                     "restart=%s, sres.isErr=%s, sres.val=%lu\n",
2631                     tid,
2632                     ip,
2633                     restart ? "True" : "False",
2634                     sr_isError(sres) ? "True" : "False",
2635                     sr_isError(sres) ? sr_Err(sres) : sr_Res(sres));
2636 
2637    vg_assert(VG_(is_valid_tid)(tid));
2638    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2639    vg_assert(VG_(is_running_thread)(tid));
2640 
2641    tst     = VG_(get_ThreadState)(tid);
2642    th_regs = &tst->arch;
2643    sci     = & syscallInfo[tid];
2644 
2645 #  if defined(VGO_linux)
2646    outside_range
2647       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2648    in_setup_to_restart
2649       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2650    at_restart
2651       = ip == ML_(blksys_restart);
2652    in_complete_to_committed
2653       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2654    in_committed_to_finished
2655       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2656 #  elif defined(VGO_darwin)
2657    outside_range
2658       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2659       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2660       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2661    in_setup_to_restart
2662       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2663       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2664       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2665    at_restart
2666       =  (ip == ML_(blksys_restart_MACH))
2667       || (ip == ML_(blksys_restart_MDEP))
2668       || (ip == ML_(blksys_restart_UNIX));
2669    in_complete_to_committed
2670       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2671       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2672       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2673    in_committed_to_finished
2674       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2675       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2676       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2677    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
2678 #  elif defined(VGO_solaris)
2679    /* The solaris port is never outside the range. */
2680    outside_range = False;
2681    /* The Solaris kernel never restarts syscalls directly! */
2682    at_restart = False;
2683    if (tst->os_state.in_door_return) {
2684       vg_assert(ip >= ML_(blksys_setup_DRET)
2685                 && ip < ML_(blksys_finished_DRET));
2686 
2687       in_setup_to_restart
2688          = ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_complete_DRET);
2689       in_complete_to_committed
2690          = ip >= ML_(blksys_complete_DRET) && ip < ML_(blksys_committed_DRET);
2691       in_committed_to_finished
2692          = ip >= ML_(blksys_committed_DRET) && ip < ML_(blksys_finished_DRET);
2693    }
2694    else {
2695       vg_assert(ip >= ML_(blksys_setup) && ip < ML_(blksys_finished));
2696 
2697       in_setup_to_restart
2698          = ip >= ML_(blksys_setup) && ip < ML_(blksys_complete);
2699       in_complete_to_committed
2700          = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2701       in_committed_to_finished
2702          = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2703    }
2704 #  else
2705 #    error "Unknown OS"
2706 #  endif
2707 
2708    /* Figure out what the state of the syscall was by examining the
2709       (real) IP at the time of the signal, and act accordingly. */
2710    if (outside_range) {
2711       if (VG_(clo_trace_signals))
2712          VG_(message)( Vg_DebugMsg,
2713                        "  not in syscall at all: hmm, very suspicious\n" );
2714       /* Looks like we weren't in a syscall at all.  Hmm. */
2715       vg_assert(sci->status.what != SsIdle);
2716       return;
2717    }
2718 
2719    /* We should not be here unless this thread had first started up
2720       the machinery for a syscall by calling VG_(client_syscall).
2721       Hence: */
2722    vg_assert(sci->status.what != SsIdle);
2723 
2724    /* now, do one of four fixup actions, depending on where the IP has
2725       got to. */
2726 
2727    if (in_setup_to_restart) {
2728       /* syscall hasn't even started; go around again */
2729       if (VG_(clo_trace_signals))
2730          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
2731       vg_assert(sci->status.what == SsHandToKernel);
2732       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2733    }
2734 
2735    else
2736    if (at_restart) {
2737 #     if defined(VGO_solaris)
2738       /* We should never hit this branch on Solaris, see the comment above. */
2739       vg_assert(0);
2740 #     endif
2741 
2742       /* We're either about to run the syscall, or it was interrupted
2743          and the kernel restarted it.  Restart if asked, otherwise
2744          EINTR it. */
2745       if (restart) {
2746          if (VG_(clo_trace_signals))
2747             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
2748          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2749       } else {
2750          if (VG_(clo_trace_signals))
2751             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
2752          canonical = convert_SysRes_to_SyscallStatus(
2753                         VG_(mk_SysRes_Error)( VKI_EINTR )
2754                      );
2755          if (!(sci->flags & SfNoWriteResult))
2756             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2757          sci->status = canonical;
2758          VG_(post_syscall)(tid);
2759       }
2760    }
2761 
2762    else
2763    if (in_complete_to_committed) {
2764       /* Syscall complete, but result hasn't been written back yet.
2765          Write the SysRes we were supplied with back to the guest
2766          state. */
2767       if (VG_(clo_trace_signals))
2768          VG_(message)( Vg_DebugMsg,
2769                        "  completed, but uncommitted: committing\n");
2770       canonical = convert_SysRes_to_SyscallStatus( sres );
2771       vg_assert(!(sci->flags & SfNoWriteResult));
2772       putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2773 #     if defined(VGO_solaris)
2774       if (tst->os_state.in_door_return) {
2775 #        if defined(VGP_x86_solaris)
2776          /* Registers %esp and %ebp were also modified by the syscall. */
2777          tst->arch.vex.guest_ESP = uc->uc_mcontext.gregs[VKI_UESP];
2778          tst->arch.vex.guest_EBP = uc->uc_mcontext.gregs[VKI_EBP];
2779 #        elif defined(VGP_amd64_solaris)
2780          tst->arch.vex.guest_RSP = uc->uc_mcontext.gregs[VKI_REG_RSP];
2781          tst->arch.vex.guest_RBP = uc->uc_mcontext.gregs[VKI_REG_RBP];
2782 #        endif
2783       }
2784 #     endif
2785       sci->status = canonical;
2786       VG_(post_syscall)(tid);
2787    }
2788 
2789    else
2790    if (in_committed_to_finished) {
2791       /* Result committed, but the signal mask has not been restored;
2792          we expect our caller (the signal handler) will have fixed
2793          this up. */
2794       if (VG_(clo_trace_signals))
2795          VG_(message)( Vg_DebugMsg,
2796                        "  completed and committed: nothing to do\n");
2797 #     if defined(VGP_x86_solaris)
2798       /* The %eax and %edx values are committed but the carry flag is still
2799          uncommitted.  Save it now. */
2800       LibVEX_GuestX86_put_eflag_c(sr_isError(sres), &th_regs->vex);
2801 #     elif defined(VGP_amd64_solaris)
2802       LibVEX_GuestAMD64_put_rflag_c(sr_isError(sres), &th_regs->vex);
2803 #     endif
2804       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2805       vg_assert(sci->status.what == SsComplete);
2806       VG_(post_syscall)(tid);
2807    }
2808 
2809    else
2810       VG_(core_panic)("?? strange syscall interrupt state?");
2811 
2812    /* In all cases, the syscall is now finished (even if we called
2813       ML_(fixup_guest_state_to_restart_syscall), since that just
2814       re-positions the guest's IP for another go at it).  So we need
2815       to record that fact. */
2816    sci->status.what = SsIdle;
2817 }
2818 
2819 
2820 #if defined(VGO_solaris)
2821 /* Returns True if ip is inside a fixable syscall code in syscall-*-*.S.  This
2822    function can be called by a 'non-running' thread! */
VG_(is_ip_in_blocking_syscall)2823 Bool VG_(is_ip_in_blocking_syscall)(ThreadId tid, Addr ip)
2824 {
2825    ThreadState *tst = VG_(get_ThreadState)(tid);
2826 
2827    if (tst->os_state.in_door_return)
2828       return ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_finished_DRET);
2829    else
2830       return ip >= ML_(blksys_setup) && ip < ML_(blksys_finished);
2831 }
2832 #endif
2833 
2834 
2835 #if defined(VGO_darwin)
2836 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2837 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2838 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2839 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2840 {
2841    ThreadState*     tst;
2842    SyscallInfo*     sci;
2843 
2844    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2845 
2846    PRINT("SYSCALL[%d,%u](%s) workq_ops() starting new workqueue item\n",
2847          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2848 
2849    vg_assert(VG_(is_valid_tid)(tid));
2850    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2851    vg_assert(VG_(is_running_thread)(tid));
2852 
2853    tst     = VG_(get_ThreadState)(tid);
2854    sci     = & syscallInfo[tid];
2855    vg_assert(sci->status.what != SsIdle);
2856    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
2857 
2858    // Pretend the syscall completed normally, but don't touch the thread state.
2859    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2860    sci->flags |= SfNoWriteResult;
2861    VG_(post_syscall)(tid);
2862 
2863    ML_(sync_mappings)("in", "ML_(wqthread_continue_NORETURN)", 0);
2864 
2865    sci->status.what = SsIdle;
2866 
2867    vg_assert(tst->sched_jmpbuf_valid);
2868    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2869 
2870    /* NOTREACHED */
2871    vg_assert(0);
2872 }
2873 #endif
2874 
2875 
2876 /* ---------------------------------------------------------------------
2877    A place to store the where-to-call-when-really-done pointer
2878    ------------------------------------------------------------------ */
2879 
2880 // When the final thread is done, where shall I call to shutdown the
2881 // system cleanly?  Is set once at startup (in m_main) and never
2882 // changes after that.  Is basically a pointer to the exit
2883 // continuation.  This is all just a nasty hack to avoid calling
2884 // directly from m_syswrap to m_main at exit, since that would cause
2885 // m_main to become part of a module cycle, which is silly.
2886 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2887        (ThreadId,VgSchedReturnCode)
2888    = NULL;
2889 
2890 /*--------------------------------------------------------------------*/
2891 /*--- end                                                          ---*/
2892 /*--------------------------------------------------------------------*/
2893