1
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls. syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_threadstate.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcproc.h" // For VG_(getpid)()
42 #include "pub_core_libcsignal.h"
43 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock),
44 // and VG_(vg_yield)
45 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
46 #include "pub_core_tooliface.h"
47 #include "pub_core_options.h"
48 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals)
49 #include "pub_core_syscall.h"
50 #include "pub_core_machine.h"
51 #include "pub_core_mallocfree.h"
52 #include "pub_core_syswrap.h"
53
54 #include "priv_types_n_macros.h"
55 #include "priv_syswrap-main.h"
56
57 #if defined(VGO_darwin)
58 #include "priv_syswrap-darwin.h"
59 #endif
60
61 /* Useful info which needs to be recorded somewhere:
62 Use of registers in syscalls is:
63
64 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65 LINUX:
66 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM)
67 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM)
68 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
69 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
70 arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1)
71 mips32 v0 a0 a1 a2 a3 stack stack n/a n/a v0 (== NUM)
72 mips64 v0 a0 a1 a2 a3 a4 a5 a6 a7 v0 (== NUM)
73 arm64 x8 x0 x1 x2 x3 x4 x5 n/a n/a x0 ?? (== ARG1??)
74
75 On s390x the svc instruction is used for system calls. The system call
76 number is encoded in the instruction (8 bit immediate field). Since Linux
77 2.6 it is also allowed to use svc 0 with the system call number in r1.
78 This was introduced for system calls >255, but works for all. It is
79 also possible to see the svc 0 together with an EXecute instruction, that
80 fills in the immediate field.
81 s390x r1/SVC r2 r3 r4 r5 r6 r7 n/a n/a r2 (== ARG1)
82
83 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
84 DARWIN:
85 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c
86 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c
87
88 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
89 amd64-darwin. Apparently 0(%esp) is some kind of return address
90 (perhaps for syscalls done with "sysenter"?) I don't think it is
91 relevant for syscalls done with "int $0x80/1/2".
92 */
93
94 /* This is the top level of the system-call handler module. All
95 system calls are channelled through here, doing two things:
96
97 * notify the tool of the events (mem/reg reads, writes) happening
98
99 * perform the syscall, usually by passing it along to the kernel
100 unmodified.
101
102 A magical piece of assembly code, do_syscall_for_client_WRK, in
103 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
104 kernel, whilst having the simulator retain control.
105 */
106
107 /* The main function is VG_(client_syscall). The simulation calls it
108 whenever a client thread wants to do a syscall. The following is a
109 sketch of what it does.
110
111 * Ensures the root thread's stack is suitably mapped. Tedious and
112 arcane. See big big comment in VG_(client_syscall).
113
114 * First, it rounds up the syscall number and args (which is a
115 platform dependent activity) and puts them in a struct ("args")
116 and also a copy in "orig_args".
117
118 The pre/post wrappers refer to these structs and so no longer
119 need magic macros to access any specific registers. This struct
120 is stored in thread-specific storage.
121
122
123 * The pre-wrapper is called, passing it a pointer to struct
124 "args".
125
126
127 * The pre-wrapper examines the args and pokes the tool
128 appropriately. It may modify the args; this is why "orig_args"
129 is also stored.
130
131 The pre-wrapper may choose to 'do' the syscall itself, and
132 concludes one of three outcomes:
133
134 Success(N) -- syscall is already complete, with success;
135 result is N
136
137 Fail(N) -- syscall is already complete, with failure;
138 error code is N
139
140 HandToKernel -- (the usual case): this needs to be given to
141 the kernel to be done, using the values in
142 the possibly-modified "args" struct.
143
144 In addition, the pre-wrapper may set some flags:
145
146 MayBlock -- only applicable when outcome==HandToKernel
147
148 PostOnFail -- only applicable when outcome==HandToKernel or Fail
149
150
151 * If the pre-outcome is HandToKernel, the syscall is duly handed
152 off to the kernel (perhaps involving some thread switchery, but
153 that's not important). This reduces the possible set of outcomes
154 to either Success(N) or Fail(N).
155
156
157 * The outcome (Success(N) or Fail(N)) is written back to the guest
158 register(s). This is platform specific:
159
160 x86: Success(N) ==> eax = N
161 Fail(N) ==> eax = -N
162
163 ditto amd64
164
165 ppc32: Success(N) ==> r3 = N, CR0.SO = 0
166 Fail(N) ==> r3 = N, CR0.SO = 1
167
168 Darwin:
169 x86: Success(N) ==> edx:eax = N, cc = 0
170 Fail(N) ==> edx:eax = N, cc = 1
171
172 s390x: Success(N) ==> r2 = N
173 Fail(N) ==> r2 = -N
174
175 * The post wrapper is called if:
176
177 - it exists, and
178 - outcome==Success or (outcome==Fail and PostOnFail is set)
179
180 The post wrapper is passed the adulterated syscall args (struct
181 "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
182
183 There are several other complications, primarily to do with
184 syscalls getting interrupted, explained in comments in the code.
185 */
186
187 /* CAVEATS for writing wrappers. It is important to follow these!
188
189 The macros defined in priv_types_n_macros.h are designed to help
190 decouple the wrapper logic from the actual representation of
191 syscall args/results, since these wrappers are designed to work on
192 multiple platforms.
193
194 Sometimes a PRE wrapper will complete the syscall itself, without
195 handing it to the kernel. It will use one of SET_STATUS_Success,
196 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
197 value. It is critical to appreciate that use of the macro does not
198 immediately cause the underlying guest state to be updated -- that
199 is done by the driver logic in this file, when the wrapper returns.
200
201 As a result, PRE wrappers of the following form will malfunction:
202
203 PRE(fooble)
204 {
205 ... do stuff ...
206 SET_STATUS_Somehow(...)
207
208 // do something that assumes guest state is up to date
209 }
210
211 In particular, direct or indirect calls to VG_(poll_signals) after
212 setting STATUS can cause the guest state to be read (in order to
213 build signal frames). Do not do this. If you want a signal poll
214 after the syscall goes through, do "*flags |= SfPollAfter" and the
215 driver logic will do it for you.
216
217 -----------
218
219 Another critical requirement following introduction of new address
220 space manager (JRS, 20050923):
221
222 In a situation where the mappedness of memory has changed, aspacem
223 should be notified BEFORE the tool. Hence the following is
224 correct:
225
226 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
227 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
228 if (d)
229 VG_(discard_translations)(s->start, s->end+1 - s->start);
230
231 whilst this is wrong:
232
233 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
234 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
235 if (d)
236 VG_(discard_translations)(s->start, s->end+1 - s->start);
237
238 The reason is that the tool may itself ask aspacem for more shadow
239 memory as a result of the VG_TRACK call. In such a situation it is
240 critical that aspacem's segment array is up to date -- hence the
241 need to notify aspacem first.
242
243 -----------
244
245 Also .. take care to call VG_(discard_translations) whenever
246 memory with execute permissions is unmapped.
247 */
248
249
250 /* ---------------------------------------------------------------------
251 Do potentially blocking syscall for the client, and mess with
252 signal masks at the same time.
253 ------------------------------------------------------------------ */
254
255 /* Perform a syscall on behalf of a client thread, using a specific
256 signal mask. On completion, the signal mask is set to restore_mask
257 (which presumably blocks almost everything). If a signal happens
258 during the syscall, the handler should call
259 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
260 thread's context to do the right thing.
261
262 The _WRK function is handwritten assembly, implemented per-platform
263 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic
264 properties. See comments at the top of
265 VG_(fixup_guest_state_after_syscall_interrupted) below for details.
266
267 This function (these functions) are required to return zero in case
268 of success (even if the syscall itself failed), and nonzero if the
269 sigprocmask-swizzling calls failed. We don't actually care about
270 the failure values from sigprocmask, although most of the assembly
271 implementations do attempt to return that, using the convention
272 0 for success, or 0x8000 | error-code for failure.
273 */
274 #if defined(VGO_linux)
275 extern
276 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
277 void* guest_state,
278 const vki_sigset_t *syscall_mask,
279 const vki_sigset_t *restore_mask,
280 Word sigsetSzB );
281 #elif defined(VGO_darwin)
282 extern
283 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
284 void* guest_state,
285 const vki_sigset_t *syscall_mask,
286 const vki_sigset_t *restore_mask,
287 Word sigsetSzB ); /* unused */
288 extern
289 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
290 void* guest_state,
291 const vki_sigset_t *syscall_mask,
292 const vki_sigset_t *restore_mask,
293 Word sigsetSzB ); /* unused */
294 extern
295 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
296 void* guest_state,
297 const vki_sigset_t *syscall_mask,
298 const vki_sigset_t *restore_mask,
299 Word sigsetSzB ); /* unused */
300 #else
301 # error "Unknown OS"
302 #endif
303
304
305 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)306 void do_syscall_for_client ( Int syscallno,
307 ThreadState* tst,
308 const vki_sigset_t* syscall_mask )
309 {
310 vki_sigset_t saved;
311 UWord err;
312 # if defined(VGO_linux)
313 err = ML_(do_syscall_for_client_WRK)(
314 syscallno, &tst->arch.vex,
315 syscall_mask, &saved, sizeof(vki_sigset_t)
316 );
317 # elif defined(VGO_darwin)
318 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
319 case VG_DARWIN_SYSCALL_CLASS_UNIX:
320 err = ML_(do_syscall_for_client_unix_WRK)(
321 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
322 syscall_mask, &saved, 0/*unused:sigsetSzB*/
323 );
324 break;
325 case VG_DARWIN_SYSCALL_CLASS_MACH:
326 err = ML_(do_syscall_for_client_mach_WRK)(
327 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
328 syscall_mask, &saved, 0/*unused:sigsetSzB*/
329 );
330 break;
331 case VG_DARWIN_SYSCALL_CLASS_MDEP:
332 err = ML_(do_syscall_for_client_mdep_WRK)(
333 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
334 syscall_mask, &saved, 0/*unused:sigsetSzB*/
335 );
336 break;
337 default:
338 vg_assert(0);
339 /*NOTREACHED*/
340 break;
341 }
342 # else
343 # error "Unknown OS"
344 # endif
345 vg_assert2(
346 err == 0,
347 "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
348 (Int)(err & 0xFFF)
349 );
350 }
351
352
353 /* ---------------------------------------------------------------------
354 Impedance matchers and misc helpers
355 ------------------------------------------------------------------ */
356
357 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)358 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
359 {
360 return a1->sysno == a2->sysno
361 && a1->arg1 == a2->arg1
362 && a1->arg2 == a2->arg2
363 && a1->arg3 == a2->arg3
364 && a1->arg4 == a2->arg4
365 && a1->arg5 == a2->arg5
366 && a1->arg6 == a2->arg6
367 && a1->arg7 == a2->arg7
368 && a1->arg8 == a2->arg8;
369 }
370
371 static
eq_SyscallStatus(SyscallStatus * s1,SyscallStatus * s2)372 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
373 {
374 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
375 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
376 return True;
377 # if defined(VGO_darwin)
378 /* Darwin-specific debugging guff */
379 vg_assert(s1->what == s2->what);
380 VG_(printf)("eq_SyscallStatus:\n");
381 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
382 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
383 vg_assert(0);
384 # endif
385 return False;
386 }
387
388 /* Convert between SysRes and SyscallStatus, to the extent possible. */
389
390 static
convert_SysRes_to_SyscallStatus(SysRes res)391 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
392 {
393 SyscallStatus status;
394 status.what = SsComplete;
395 status.sres = res;
396 return status;
397 }
398
399
400 /* Impedance matchers. These convert syscall arg or result data from
401 the platform-specific in-guest-state format to the canonical
402 formats, and back. */
403
404 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)405 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical,
406 /*IN*/ VexGuestArchState* gst_vanilla,
407 /*IN*/ UInt trc )
408 {
409 #if defined(VGP_x86_linux)
410 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
411 canonical->sysno = gst->guest_EAX;
412 canonical->arg1 = gst->guest_EBX;
413 canonical->arg2 = gst->guest_ECX;
414 canonical->arg3 = gst->guest_EDX;
415 canonical->arg4 = gst->guest_ESI;
416 canonical->arg5 = gst->guest_EDI;
417 canonical->arg6 = gst->guest_EBP;
418 canonical->arg7 = 0;
419 canonical->arg8 = 0;
420
421 #elif defined(VGP_amd64_linux)
422 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
423 canonical->sysno = gst->guest_RAX;
424 canonical->arg1 = gst->guest_RDI;
425 canonical->arg2 = gst->guest_RSI;
426 canonical->arg3 = gst->guest_RDX;
427 canonical->arg4 = gst->guest_R10;
428 canonical->arg5 = gst->guest_R8;
429 canonical->arg6 = gst->guest_R9;
430 canonical->arg7 = 0;
431 canonical->arg8 = 0;
432
433 #elif defined(VGP_ppc32_linux)
434 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
435 canonical->sysno = gst->guest_GPR0;
436 canonical->arg1 = gst->guest_GPR3;
437 canonical->arg2 = gst->guest_GPR4;
438 canonical->arg3 = gst->guest_GPR5;
439 canonical->arg4 = gst->guest_GPR6;
440 canonical->arg5 = gst->guest_GPR7;
441 canonical->arg6 = gst->guest_GPR8;
442 canonical->arg7 = 0;
443 canonical->arg8 = 0;
444
445 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
446 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
447 canonical->sysno = gst->guest_GPR0;
448 canonical->arg1 = gst->guest_GPR3;
449 canonical->arg2 = gst->guest_GPR4;
450 canonical->arg3 = gst->guest_GPR5;
451 canonical->arg4 = gst->guest_GPR6;
452 canonical->arg5 = gst->guest_GPR7;
453 canonical->arg6 = gst->guest_GPR8;
454 canonical->arg7 = 0;
455 canonical->arg8 = 0;
456
457 #elif defined(VGP_arm_linux)
458 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
459 canonical->sysno = gst->guest_R7;
460 canonical->arg1 = gst->guest_R0;
461 canonical->arg2 = gst->guest_R1;
462 canonical->arg3 = gst->guest_R2;
463 canonical->arg4 = gst->guest_R3;
464 canonical->arg5 = gst->guest_R4;
465 canonical->arg6 = gst->guest_R5;
466 canonical->arg7 = 0;
467 canonical->arg8 = 0;
468
469 #elif defined(VGP_arm64_linux)
470 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
471 canonical->sysno = gst->guest_X8;
472 canonical->arg1 = gst->guest_X0;
473 canonical->arg2 = gst->guest_X1;
474 canonical->arg3 = gst->guest_X2;
475 canonical->arg4 = gst->guest_X3;
476 canonical->arg5 = gst->guest_X4;
477 canonical->arg6 = gst->guest_X5;
478 canonical->arg7 = 0;
479 canonical->arg8 = 0;
480
481 #elif defined(VGP_mips32_linux)
482 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
483 canonical->sysno = gst->guest_r2; // v0
484 if (canonical->sysno == __NR_exit) {
485 canonical->arg1 = gst->guest_r4; // a0
486 canonical->arg2 = 0;
487 canonical->arg3 = 0;
488 canonical->arg4 = 0;
489 canonical->arg5 = 0;
490 canonical->arg6 = 0;
491 canonical->arg8 = 0;
492 } else if (canonical->sysno != __NR_syscall) {
493 canonical->arg1 = gst->guest_r4; // a0
494 canonical->arg2 = gst->guest_r5; // a1
495 canonical->arg3 = gst->guest_r6; // a2
496 canonical->arg4 = gst->guest_r7; // a3
497 canonical->arg5 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp)
498 canonical->arg6 = *((UInt*) (gst->guest_r29 + 20)); // 20(sp)
499 canonical->arg8 = 0;
500 } else {
501 // Fixme hack handle syscall()
502 canonical->sysno = gst->guest_r4; // a0
503 canonical->arg1 = gst->guest_r5; // a1
504 canonical->arg2 = gst->guest_r6; // a2
505 canonical->arg3 = gst->guest_r7; // a3
506 canonical->arg4 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp)
507 canonical->arg5 = *((UInt*) (gst->guest_r29 + 20)); // 20(guest_SP/sp)
508 canonical->arg6 = *((UInt*) (gst->guest_r29 + 24)); // 24(guest_SP/sp)
509 canonical->arg8 = __NR_syscall;
510 }
511
512 #elif defined(VGP_mips64_linux)
513 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
514 canonical->sysno = gst->guest_r2; // v0
515 canonical->arg1 = gst->guest_r4; // a0
516 canonical->arg2 = gst->guest_r5; // a1
517 canonical->arg3 = gst->guest_r6; // a2
518 canonical->arg4 = gst->guest_r7; // a3
519 canonical->arg5 = gst->guest_r8; // a4
520 canonical->arg6 = gst->guest_r9; // a5
521
522 #elif defined(VGP_x86_darwin)
523 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
524 UWord *stack = (UWord *)gst->guest_ESP;
525 // GrP fixme hope syscalls aren't called with really shallow stacks...
526 canonical->sysno = gst->guest_EAX;
527 if (canonical->sysno != 0) {
528 // stack[0] is return address
529 canonical->arg1 = stack[1];
530 canonical->arg2 = stack[2];
531 canonical->arg3 = stack[3];
532 canonical->arg4 = stack[4];
533 canonical->arg5 = stack[5];
534 canonical->arg6 = stack[6];
535 canonical->arg7 = stack[7];
536 canonical->arg8 = stack[8];
537 } else {
538 // GrP fixme hack handle syscall()
539 // GrP fixme what about __syscall() ?
540 // stack[0] is return address
541 // DDD: the tool can't see that the params have been shifted! Can
542 // lead to incorrect checking, I think, because the PRRAn/PSARn
543 // macros will mention the pre-shifted args.
544 canonical->sysno = stack[1];
545 vg_assert(canonical->sysno != 0);
546 canonical->arg1 = stack[2];
547 canonical->arg2 = stack[3];
548 canonical->arg3 = stack[4];
549 canonical->arg4 = stack[5];
550 canonical->arg5 = stack[6];
551 canonical->arg6 = stack[7];
552 canonical->arg7 = stack[8];
553 canonical->arg8 = stack[9];
554
555 PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
556 VG_(getpid)(), /*tid,*/
557 VG_SYSNUM_STRING(canonical->sysno));
558 }
559
560 // Here we determine what kind of syscall it was by looking at the
561 // interrupt kind, and then encode the syscall number using the 64-bit
562 // encoding for Valgrind's internal use.
563 //
564 // DDD: Would it be better to stash the JMP kind into the Darwin
565 // thread state rather than passing in the trc?
566 switch (trc) {
567 case VEX_TRC_JMP_SYS_INT128:
568 // int $0x80 = Unix, 64-bit result
569 vg_assert(canonical->sysno >= 0);
570 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
571 break;
572 case VEX_TRC_JMP_SYS_SYSENTER:
573 // syscall = Unix, 32-bit result
574 // OR Mach, 32-bit result
575 if (canonical->sysno >= 0) {
576 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK
577 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
578 & 0xffff);
579 } else {
580 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
581 }
582 break;
583 case VEX_TRC_JMP_SYS_INT129:
584 // int $0x81 = Mach, 32-bit result
585 vg_assert(canonical->sysno < 0);
586 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
587 break;
588 case VEX_TRC_JMP_SYS_INT130:
589 // int $0x82 = mdep, 32-bit result
590 vg_assert(canonical->sysno >= 0);
591 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
592 break;
593 default:
594 vg_assert(0);
595 break;
596 }
597
598 #elif defined(VGP_amd64_darwin)
599 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
600 UWord *stack = (UWord *)gst->guest_RSP;
601
602 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
603
604 // GrP fixme hope syscalls aren't called with really shallow stacks...
605 canonical->sysno = gst->guest_RAX;
606 if (canonical->sysno != __NR_syscall) {
607 // stack[0] is return address
608 canonical->arg1 = gst->guest_RDI;
609 canonical->arg2 = gst->guest_RSI;
610 canonical->arg3 = gst->guest_RDX;
611 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn
612 canonical->arg5 = gst->guest_R8;
613 canonical->arg6 = gst->guest_R9;
614 canonical->arg7 = stack[1];
615 canonical->arg8 = stack[2];
616 } else {
617 // GrP fixme hack handle syscall()
618 // GrP fixme what about __syscall() ?
619 // stack[0] is return address
620 // DDD: the tool can't see that the params have been shifted! Can
621 // lead to incorrect checking, I think, because the PRRAn/PSARn
622 // macros will mention the pre-shifted args.
623 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
624 vg_assert(canonical->sysno != __NR_syscall);
625 canonical->arg1 = gst->guest_RSI;
626 canonical->arg2 = gst->guest_RDX;
627 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn
628 canonical->arg4 = gst->guest_R8;
629 canonical->arg5 = gst->guest_R9;
630 canonical->arg6 = stack[1];
631 canonical->arg7 = stack[2];
632 canonical->arg8 = stack[3];
633
634 PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
635 VG_(getpid)(), /*tid,*/
636 VG_SYSNUM_STRING(canonical->sysno));
637 }
638
639 // no canonical->sysno adjustment needed
640
641 #elif defined(VGP_s390x_linux)
642 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
643 canonical->sysno = gst->guest_SYSNO;
644 canonical->arg1 = gst->guest_r2;
645 canonical->arg2 = gst->guest_r3;
646 canonical->arg3 = gst->guest_r4;
647 canonical->arg4 = gst->guest_r5;
648 canonical->arg5 = gst->guest_r6;
649 canonical->arg6 = gst->guest_r7;
650 canonical->arg7 = 0;
651 canonical->arg8 = 0;
652
653 #elif defined(VGP_tilegx_linux)
654 VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
655 canonical->sysno = gst->guest_r10;
656 canonical->arg1 = gst->guest_r0;
657 canonical->arg2 = gst->guest_r1;
658 canonical->arg3 = gst->guest_r2;
659 canonical->arg4 = gst->guest_r3;
660 canonical->arg5 = gst->guest_r4;
661 canonical->arg6 = gst->guest_r5;
662 canonical->arg7 = 0;
663 canonical->arg8 = 0;
664
665 #else
666 # error "getSyscallArgsFromGuestState: unknown arch"
667 #endif
668 }
669
670 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)671 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical,
672 /*OUT*/VexGuestArchState* gst_vanilla )
673 {
674 #if defined(VGP_x86_linux)
675 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
676 gst->guest_EAX = canonical->sysno;
677 gst->guest_EBX = canonical->arg1;
678 gst->guest_ECX = canonical->arg2;
679 gst->guest_EDX = canonical->arg3;
680 gst->guest_ESI = canonical->arg4;
681 gst->guest_EDI = canonical->arg5;
682 gst->guest_EBP = canonical->arg6;
683
684 #elif defined(VGP_amd64_linux)
685 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
686 gst->guest_RAX = canonical->sysno;
687 gst->guest_RDI = canonical->arg1;
688 gst->guest_RSI = canonical->arg2;
689 gst->guest_RDX = canonical->arg3;
690 gst->guest_R10 = canonical->arg4;
691 gst->guest_R8 = canonical->arg5;
692 gst->guest_R9 = canonical->arg6;
693
694 #elif defined(VGP_ppc32_linux)
695 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
696 gst->guest_GPR0 = canonical->sysno;
697 gst->guest_GPR3 = canonical->arg1;
698 gst->guest_GPR4 = canonical->arg2;
699 gst->guest_GPR5 = canonical->arg3;
700 gst->guest_GPR6 = canonical->arg4;
701 gst->guest_GPR7 = canonical->arg5;
702 gst->guest_GPR8 = canonical->arg6;
703
704 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
705 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
706 gst->guest_GPR0 = canonical->sysno;
707 gst->guest_GPR3 = canonical->arg1;
708 gst->guest_GPR4 = canonical->arg2;
709 gst->guest_GPR5 = canonical->arg3;
710 gst->guest_GPR6 = canonical->arg4;
711 gst->guest_GPR7 = canonical->arg5;
712 gst->guest_GPR8 = canonical->arg6;
713
714 #elif defined(VGP_arm_linux)
715 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
716 gst->guest_R7 = canonical->sysno;
717 gst->guest_R0 = canonical->arg1;
718 gst->guest_R1 = canonical->arg2;
719 gst->guest_R2 = canonical->arg3;
720 gst->guest_R3 = canonical->arg4;
721 gst->guest_R4 = canonical->arg5;
722 gst->guest_R5 = canonical->arg6;
723
724 #elif defined(VGP_arm64_linux)
725 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
726 gst->guest_X8 = canonical->sysno;
727 gst->guest_X0 = canonical->arg1;
728 gst->guest_X1 = canonical->arg2;
729 gst->guest_X2 = canonical->arg3;
730 gst->guest_X3 = canonical->arg4;
731 gst->guest_X4 = canonical->arg5;
732 gst->guest_X5 = canonical->arg6;
733
734 #elif defined(VGP_x86_darwin)
735 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
736 UWord *stack = (UWord *)gst->guest_ESP;
737
738 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
739
740 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
741 // stack[0] is return address
742 stack[1] = canonical->arg1;
743 stack[2] = canonical->arg2;
744 stack[3] = canonical->arg3;
745 stack[4] = canonical->arg4;
746 stack[5] = canonical->arg5;
747 stack[6] = canonical->arg6;
748 stack[7] = canonical->arg7;
749 stack[8] = canonical->arg8;
750
751 #elif defined(VGP_amd64_darwin)
752 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
753 UWord *stack = (UWord *)gst->guest_RSP;
754
755 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
756 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
757
758 // stack[0] is return address
759 gst->guest_RDI = canonical->arg1;
760 gst->guest_RSI = canonical->arg2;
761 gst->guest_RDX = canonical->arg3;
762 gst->guest_RCX = canonical->arg4;
763 gst->guest_R8 = canonical->arg5;
764 gst->guest_R9 = canonical->arg6;
765 stack[1] = canonical->arg7;
766 stack[2] = canonical->arg8;
767
768 #elif defined(VGP_s390x_linux)
769 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
770 gst->guest_SYSNO = canonical->sysno;
771 gst->guest_r2 = canonical->arg1;
772 gst->guest_r3 = canonical->arg2;
773 gst->guest_r4 = canonical->arg3;
774 gst->guest_r5 = canonical->arg4;
775 gst->guest_r6 = canonical->arg5;
776 gst->guest_r7 = canonical->arg6;
777
778 #elif defined(VGP_mips32_linux)
779 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
780 if (canonical->arg8 != __NR_syscall) {
781 gst->guest_r2 = canonical->sysno;
782 gst->guest_r4 = canonical->arg1;
783 gst->guest_r5 = canonical->arg2;
784 gst->guest_r6 = canonical->arg3;
785 gst->guest_r7 = canonical->arg4;
786 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
787 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
788 } else {
789 canonical->arg8 = 0;
790 gst->guest_r2 = __NR_syscall;
791 gst->guest_r4 = canonical->sysno;
792 gst->guest_r5 = canonical->arg1;
793 gst->guest_r6 = canonical->arg2;
794 gst->guest_r7 = canonical->arg3;
795 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
796 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
797 *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
798 }
799
800 #elif defined(VGP_mips64_linux)
801 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
802 gst->guest_r2 = canonical->sysno;
803 gst->guest_r4 = canonical->arg1;
804 gst->guest_r5 = canonical->arg2;
805 gst->guest_r6 = canonical->arg3;
806 gst->guest_r7 = canonical->arg4;
807 gst->guest_r8 = canonical->arg5;
808 gst->guest_r9 = canonical->arg6;
809
810 #elif defined(VGP_tilegx_linux)
811 VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
812 gst->guest_r10 = canonical->sysno;
813 gst->guest_r0 = canonical->arg1;
814 gst->guest_r1 = canonical->arg2;
815 gst->guest_r2 = canonical->arg3;
816 gst->guest_r3 = canonical->arg4;
817 gst->guest_r4 = canonical->arg5;
818 gst->guest_r5 = canonical->arg6;
819
820 #else
821 # error "putSyscallArgsIntoGuestState: unknown arch"
822 #endif
823 }
824
825 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)826 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical,
827 /*IN*/ VexGuestArchState* gst_vanilla )
828 {
829 # if defined(VGP_x86_linux)
830 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
831 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
832 canonical->what = SsComplete;
833
834 # elif defined(VGP_amd64_linux)
835 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
836 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
837 canonical->what = SsComplete;
838
839 # elif defined(VGP_ppc32_linux)
840 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
841 UInt cr = LibVEX_GuestPPC32_get_CR( gst );
842 UInt cr0so = (cr >> 28) & 1;
843 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
844 canonical->what = SsComplete;
845
846 # elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
847 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
848 UInt cr = LibVEX_GuestPPC64_get_CR( gst );
849 UInt cr0so = (cr >> 28) & 1;
850 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
851 canonical->what = SsComplete;
852
853 # elif defined(VGP_arm_linux)
854 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
855 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
856 canonical->what = SsComplete;
857
858 # elif defined(VGP_arm64_linux)
859 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
860 canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
861 canonical->what = SsComplete;
862
863 # elif defined(VGP_mips32_linux)
864 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
865 UInt v0 = gst->guest_r2; // v0
866 UInt v1 = gst->guest_r3; // v1
867 UInt a3 = gst->guest_r7; // a3
868 canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
869 canonical->what = SsComplete;
870
871 # elif defined(VGP_mips64_linux)
872 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
873 ULong v0 = gst->guest_r2; // v0
874 ULong v1 = gst->guest_r3; // v1
875 ULong a3 = gst->guest_r7; // a3
876 canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3);
877 canonical->what = SsComplete;
878
879 # elif defined(VGP_x86_darwin)
880 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
881 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
882 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
883 UInt err = 0;
884 UInt wLO = 0;
885 UInt wHI = 0;
886 switch (gst->guest_SC_CLASS) {
887 case VG_DARWIN_SYSCALL_CLASS_UNIX:
888 // int $0x80 = Unix, 64-bit result
889 err = carry;
890 wLO = gst->guest_EAX;
891 wHI = gst->guest_EDX;
892 break;
893 case VG_DARWIN_SYSCALL_CLASS_MACH:
894 // int $0x81 = Mach, 32-bit result
895 wLO = gst->guest_EAX;
896 break;
897 case VG_DARWIN_SYSCALL_CLASS_MDEP:
898 // int $0x82 = mdep, 32-bit result
899 wLO = gst->guest_EAX;
900 break;
901 default:
902 vg_assert(0);
903 break;
904 }
905 canonical->sres = VG_(mk_SysRes_x86_darwin)(
906 gst->guest_SC_CLASS, err ? True : False,
907 wHI, wLO
908 );
909 canonical->what = SsComplete;
910
911 # elif defined(VGP_amd64_darwin)
912 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
913 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
914 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
915 ULong err = 0;
916 ULong wLO = 0;
917 ULong wHI = 0;
918 switch (gst->guest_SC_CLASS) {
919 case VG_DARWIN_SYSCALL_CLASS_UNIX:
920 // syscall = Unix, 128-bit result
921 err = carry;
922 wLO = gst->guest_RAX;
923 wHI = gst->guest_RDX;
924 break;
925 case VG_DARWIN_SYSCALL_CLASS_MACH:
926 // syscall = Mach, 64-bit result
927 wLO = gst->guest_RAX;
928 break;
929 case VG_DARWIN_SYSCALL_CLASS_MDEP:
930 // syscall = mdep, 64-bit result
931 wLO = gst->guest_RAX;
932 break;
933 default:
934 vg_assert(0);
935 break;
936 }
937 canonical->sres = VG_(mk_SysRes_amd64_darwin)(
938 gst->guest_SC_CLASS, err ? True : False,
939 wHI, wLO
940 );
941 canonical->what = SsComplete;
942
943 # elif defined(VGP_s390x_linux)
944 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
945 canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
946 canonical->what = SsComplete;
947
948 # elif defined(VGP_tilegx_linux)
949 VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
950 canonical->sres = VG_(mk_SysRes_tilegx_linux)( gst->guest_r0 );
951 canonical->what = SsComplete;
952
953 # else
954 # error "getSyscallStatusFromGuestState: unknown arch"
955 # endif
956 }
957
958 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)959 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
960 /*IN*/ SyscallStatus* canonical,
961 /*OUT*/VexGuestArchState* gst_vanilla )
962 {
963 # if defined(VGP_x86_linux)
964 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
965 vg_assert(canonical->what == SsComplete);
966 if (sr_isError(canonical->sres)) {
967 /* This isn't exactly right, in that really a Failure with res
968 not in the range 1 .. 4095 is unrepresentable in the
969 Linux-x86 scheme. Oh well. */
970 gst->guest_EAX = - (Int)sr_Err(canonical->sres);
971 } else {
972 gst->guest_EAX = sr_Res(canonical->sres);
973 }
974 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
975 OFFSET_x86_EAX, sizeof(UWord) );
976
977 # elif defined(VGP_amd64_linux)
978 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
979 vg_assert(canonical->what == SsComplete);
980 if (sr_isError(canonical->sres)) {
981 /* This isn't exactly right, in that really a Failure with res
982 not in the range 1 .. 4095 is unrepresentable in the
983 Linux-amd64 scheme. Oh well. */
984 gst->guest_RAX = - (Long)sr_Err(canonical->sres);
985 } else {
986 gst->guest_RAX = sr_Res(canonical->sres);
987 }
988 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
989 OFFSET_amd64_RAX, sizeof(UWord) );
990
991 # elif defined(VGP_ppc32_linux)
992 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
993 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
994 vg_assert(canonical->what == SsComplete);
995 if (sr_isError(canonical->sres)) {
996 /* set CR0.SO */
997 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
998 gst->guest_GPR3 = sr_Err(canonical->sres);
999 } else {
1000 /* clear CR0.SO */
1001 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
1002 gst->guest_GPR3 = sr_Res(canonical->sres);
1003 }
1004 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1005 OFFSET_ppc32_GPR3, sizeof(UWord) );
1006 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1007 OFFSET_ppc32_CR0_0, sizeof(UChar) );
1008
1009 # elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1010 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
1011 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
1012 vg_assert(canonical->what == SsComplete);
1013 if (sr_isError(canonical->sres)) {
1014 /* set CR0.SO */
1015 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
1016 gst->guest_GPR3 = sr_Err(canonical->sres);
1017 } else {
1018 /* clear CR0.SO */
1019 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
1020 gst->guest_GPR3 = sr_Res(canonical->sres);
1021 }
1022 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1023 OFFSET_ppc64_GPR3, sizeof(UWord) );
1024 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1025 OFFSET_ppc64_CR0_0, sizeof(UChar) );
1026
1027 # elif defined(VGP_arm_linux)
1028 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
1029 vg_assert(canonical->what == SsComplete);
1030 if (sr_isError(canonical->sres)) {
1031 /* This isn't exactly right, in that really a Failure with res
1032 not in the range 1 .. 4095 is unrepresentable in the
1033 Linux-arm scheme. Oh well. */
1034 gst->guest_R0 = - (Int)sr_Err(canonical->sres);
1035 } else {
1036 gst->guest_R0 = sr_Res(canonical->sres);
1037 }
1038 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1039 OFFSET_arm_R0, sizeof(UWord) );
1040
1041 # elif defined(VGP_arm64_linux)
1042 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
1043 vg_assert(canonical->what == SsComplete);
1044 if (sr_isError(canonical->sres)) {
1045 /* This isn't exactly right, in that really a Failure with res
1046 not in the range 1 .. 4095 is unrepresentable in the
1047 Linux-arm64 scheme. Oh well. */
1048 gst->guest_X0 = - (Long)sr_Err(canonical->sres);
1049 } else {
1050 gst->guest_X0 = sr_Res(canonical->sres);
1051 }
1052 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1053 OFFSET_arm64_X0, sizeof(UWord) );
1054
1055 #elif defined(VGP_x86_darwin)
1056 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1057 SysRes sres = canonical->sres;
1058 vg_assert(canonical->what == SsComplete);
1059 /* Unfortunately here we have to break abstraction and look
1060 directly inside 'res', in order to decide what to do. */
1061 switch (sres._mode) {
1062 case SysRes_MACH: // int $0x81 = Mach, 32-bit result
1063 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
1064 gst->guest_EAX = sres._wLO;
1065 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1066 OFFSET_x86_EAX, sizeof(UInt) );
1067 break;
1068 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result
1069 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
1070 gst->guest_EAX = sres._wLO;
1071 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1072 OFFSET_x86_EAX, sizeof(UInt) );
1073 gst->guest_EDX = sres._wHI;
1074 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1075 OFFSET_x86_EDX, sizeof(UInt) );
1076 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1077 gst );
1078 // GrP fixme sets defined for entire eflags, not just bit c
1079 // DDD: this breaks exp-ptrcheck.
1080 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1081 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
1082 break;
1083 default:
1084 vg_assert(0);
1085 break;
1086 }
1087
1088 #elif defined(VGP_amd64_darwin)
1089 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1090 SysRes sres = canonical->sres;
1091 vg_assert(canonical->what == SsComplete);
1092 /* Unfortunately here we have to break abstraction and look
1093 directly inside 'res', in order to decide what to do. */
1094 switch (sres._mode) {
1095 case SysRes_MACH: // syscall = Mach, 64-bit result
1096 case SysRes_MDEP: // syscall = mdep, 64-bit result
1097 gst->guest_RAX = sres._wLO;
1098 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1099 OFFSET_amd64_RAX, sizeof(ULong) );
1100 break;
1101 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result
1102 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
1103 gst->guest_RAX = sres._wLO;
1104 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1105 OFFSET_amd64_RAX, sizeof(ULong) );
1106 gst->guest_RDX = sres._wHI;
1107 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1108 OFFSET_amd64_RDX, sizeof(ULong) );
1109 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1110 gst );
1111 // GrP fixme sets defined for entire rflags, not just bit c
1112 // DDD: this breaks exp-ptrcheck.
1113 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1114 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1115 break;
1116 default:
1117 vg_assert(0);
1118 break;
1119 }
1120
1121 # elif defined(VGP_s390x_linux)
1122 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
1123 vg_assert(canonical->what == SsComplete);
1124 if (sr_isError(canonical->sres)) {
1125 gst->guest_r2 = - (Long)sr_Err(canonical->sres);
1126 } else {
1127 gst->guest_r2 = sr_Res(canonical->sres);
1128 }
1129
1130 # elif defined(VGP_mips32_linux)
1131 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1132 vg_assert(canonical->what == SsComplete);
1133 if (sr_isError(canonical->sres)) {
1134 gst->guest_r2 = (Int)sr_Err(canonical->sres);
1135 gst->guest_r7 = (Int)sr_Err(canonical->sres);
1136 } else {
1137 gst->guest_r2 = sr_Res(canonical->sres);
1138 gst->guest_r3 = sr_ResEx(canonical->sres);
1139 gst->guest_r7 = (Int)sr_Err(canonical->sres);
1140 }
1141 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1142 OFFSET_mips32_r2, sizeof(UWord) );
1143 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1144 OFFSET_mips32_r3, sizeof(UWord) );
1145 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1146 OFFSET_mips32_r7, sizeof(UWord) );
1147
1148 # elif defined(VGP_mips64_linux)
1149 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1150 vg_assert(canonical->what == SsComplete);
1151 if (sr_isError(canonical->sres)) {
1152 gst->guest_r2 = (Int)sr_Err(canonical->sres);
1153 gst->guest_r7 = (Int)sr_Err(canonical->sres);
1154 } else {
1155 gst->guest_r2 = sr_Res(canonical->sres);
1156 gst->guest_r3 = sr_ResEx(canonical->sres);
1157 gst->guest_r7 = (Int)sr_Err(canonical->sres);
1158 }
1159 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1160 OFFSET_mips64_r2, sizeof(UWord) );
1161 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1162 OFFSET_mips64_r3, sizeof(UWord) );
1163 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1164 OFFSET_mips64_r7, sizeof(UWord) );
1165
1166 # elif defined(VGP_tilegx_linux)
1167 VexGuestTILEGXState* gst = (VexGuestTILEGXState*)gst_vanilla;
1168 vg_assert(canonical->what == SsComplete);
1169 if (sr_isError(canonical->sres)) {
1170 gst->guest_r0 = - (Long)sr_Err(canonical->sres);
1171 // r1 hold errno
1172 gst->guest_r1 = (Long)sr_Err(canonical->sres);
1173 } else {
1174 gst->guest_r0 = sr_Res(canonical->sres);
1175 gst->guest_r1 = 0;
1176 }
1177
1178 # else
1179 # error "putSyscallStatusIntoGuestState: unknown arch"
1180 # endif
1181 }
1182
1183
1184 /* Tell me the offsets in the guest state of the syscall params, so
1185 that the scalar argument checkers don't have to have this info
1186 hardwired. */
1187
1188 static
getSyscallArgLayout(SyscallArgLayout * layout)1189 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1190 {
1191 VG_(bzero_inline)(layout, sizeof(*layout));
1192
1193 #if defined(VGP_x86_linux)
1194 layout->o_sysno = OFFSET_x86_EAX;
1195 layout->o_arg1 = OFFSET_x86_EBX;
1196 layout->o_arg2 = OFFSET_x86_ECX;
1197 layout->o_arg3 = OFFSET_x86_EDX;
1198 layout->o_arg4 = OFFSET_x86_ESI;
1199 layout->o_arg5 = OFFSET_x86_EDI;
1200 layout->o_arg6 = OFFSET_x86_EBP;
1201 layout->uu_arg7 = -1; /* impossible value */
1202 layout->uu_arg8 = -1; /* impossible value */
1203
1204 #elif defined(VGP_amd64_linux)
1205 layout->o_sysno = OFFSET_amd64_RAX;
1206 layout->o_arg1 = OFFSET_amd64_RDI;
1207 layout->o_arg2 = OFFSET_amd64_RSI;
1208 layout->o_arg3 = OFFSET_amd64_RDX;
1209 layout->o_arg4 = OFFSET_amd64_R10;
1210 layout->o_arg5 = OFFSET_amd64_R8;
1211 layout->o_arg6 = OFFSET_amd64_R9;
1212 layout->uu_arg7 = -1; /* impossible value */
1213 layout->uu_arg8 = -1; /* impossible value */
1214
1215 #elif defined(VGP_ppc32_linux)
1216 layout->o_sysno = OFFSET_ppc32_GPR0;
1217 layout->o_arg1 = OFFSET_ppc32_GPR3;
1218 layout->o_arg2 = OFFSET_ppc32_GPR4;
1219 layout->o_arg3 = OFFSET_ppc32_GPR5;
1220 layout->o_arg4 = OFFSET_ppc32_GPR6;
1221 layout->o_arg5 = OFFSET_ppc32_GPR7;
1222 layout->o_arg6 = OFFSET_ppc32_GPR8;
1223 layout->uu_arg7 = -1; /* impossible value */
1224 layout->uu_arg8 = -1; /* impossible value */
1225
1226 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1227 layout->o_sysno = OFFSET_ppc64_GPR0;
1228 layout->o_arg1 = OFFSET_ppc64_GPR3;
1229 layout->o_arg2 = OFFSET_ppc64_GPR4;
1230 layout->o_arg3 = OFFSET_ppc64_GPR5;
1231 layout->o_arg4 = OFFSET_ppc64_GPR6;
1232 layout->o_arg5 = OFFSET_ppc64_GPR7;
1233 layout->o_arg6 = OFFSET_ppc64_GPR8;
1234 layout->uu_arg7 = -1; /* impossible value */
1235 layout->uu_arg8 = -1; /* impossible value */
1236
1237 #elif defined(VGP_arm_linux)
1238 layout->o_sysno = OFFSET_arm_R7;
1239 layout->o_arg1 = OFFSET_arm_R0;
1240 layout->o_arg2 = OFFSET_arm_R1;
1241 layout->o_arg3 = OFFSET_arm_R2;
1242 layout->o_arg4 = OFFSET_arm_R3;
1243 layout->o_arg5 = OFFSET_arm_R4;
1244 layout->o_arg6 = OFFSET_arm_R5;
1245 layout->uu_arg7 = -1; /* impossible value */
1246 layout->uu_arg8 = -1; /* impossible value */
1247
1248 #elif defined(VGP_arm64_linux)
1249 layout->o_sysno = OFFSET_arm64_X8;
1250 layout->o_arg1 = OFFSET_arm64_X0;
1251 layout->o_arg2 = OFFSET_arm64_X1;
1252 layout->o_arg3 = OFFSET_arm64_X2;
1253 layout->o_arg4 = OFFSET_arm64_X3;
1254 layout->o_arg5 = OFFSET_arm64_X4;
1255 layout->o_arg6 = OFFSET_arm64_X5;
1256 layout->uu_arg7 = -1; /* impossible value */
1257 layout->uu_arg8 = -1; /* impossible value */
1258
1259 #elif defined(VGP_mips32_linux)
1260 layout->o_sysno = OFFSET_mips32_r2;
1261 layout->o_arg1 = OFFSET_mips32_r4;
1262 layout->o_arg2 = OFFSET_mips32_r5;
1263 layout->o_arg3 = OFFSET_mips32_r6;
1264 layout->o_arg4 = OFFSET_mips32_r7;
1265 layout->s_arg5 = sizeof(UWord) * 4;
1266 layout->s_arg6 = sizeof(UWord) * 5;
1267 layout->uu_arg7 = -1; /* impossible value */
1268 layout->uu_arg8 = -1; /* impossible value */
1269
1270 #elif defined(VGP_mips64_linux)
1271 layout->o_sysno = OFFSET_mips64_r2;
1272 layout->o_arg1 = OFFSET_mips64_r4;
1273 layout->o_arg2 = OFFSET_mips64_r5;
1274 layout->o_arg3 = OFFSET_mips64_r6;
1275 layout->o_arg4 = OFFSET_mips64_r7;
1276 layout->o_arg5 = OFFSET_mips64_r8;
1277 layout->o_arg6 = OFFSET_mips64_r9;
1278 layout->uu_arg7 = -1; /* impossible value */
1279 layout->uu_arg8 = -1; /* impossible value */
1280
1281 #elif defined(VGP_x86_darwin)
1282 layout->o_sysno = OFFSET_x86_EAX;
1283 // syscall parameters are on stack in C convention
1284 layout->s_arg1 = sizeof(UWord) * 1;
1285 layout->s_arg2 = sizeof(UWord) * 2;
1286 layout->s_arg3 = sizeof(UWord) * 3;
1287 layout->s_arg4 = sizeof(UWord) * 4;
1288 layout->s_arg5 = sizeof(UWord) * 5;
1289 layout->s_arg6 = sizeof(UWord) * 6;
1290 layout->s_arg7 = sizeof(UWord) * 7;
1291 layout->s_arg8 = sizeof(UWord) * 8;
1292
1293 #elif defined(VGP_amd64_darwin)
1294 layout->o_sysno = OFFSET_amd64_RAX;
1295 layout->o_arg1 = OFFSET_amd64_RDI;
1296 layout->o_arg2 = OFFSET_amd64_RSI;
1297 layout->o_arg3 = OFFSET_amd64_RDX;
1298 layout->o_arg4 = OFFSET_amd64_RCX;
1299 layout->o_arg5 = OFFSET_amd64_R8;
1300 layout->o_arg6 = OFFSET_amd64_R9;
1301 layout->s_arg7 = sizeof(UWord) * 1;
1302 layout->s_arg8 = sizeof(UWord) * 2;
1303
1304 #elif defined(VGP_s390x_linux)
1305 layout->o_sysno = OFFSET_s390x_SYSNO;
1306 layout->o_arg1 = OFFSET_s390x_r2;
1307 layout->o_arg2 = OFFSET_s390x_r3;
1308 layout->o_arg3 = OFFSET_s390x_r4;
1309 layout->o_arg4 = OFFSET_s390x_r5;
1310 layout->o_arg5 = OFFSET_s390x_r6;
1311 layout->o_arg6 = OFFSET_s390x_r7;
1312 layout->uu_arg7 = -1; /* impossible value */
1313 layout->uu_arg8 = -1; /* impossible value */
1314 #elif defined(VGP_tilegx_linux)
1315 layout->o_sysno = OFFSET_tilegx_r(10);
1316 layout->o_arg1 = OFFSET_tilegx_r(0);
1317 layout->o_arg2 = OFFSET_tilegx_r(1);
1318 layout->o_arg3 = OFFSET_tilegx_r(2);
1319 layout->o_arg4 = OFFSET_tilegx_r(3);
1320 layout->o_arg5 = OFFSET_tilegx_r(4);
1321 layout->o_arg6 = OFFSET_tilegx_r(5);
1322 layout->uu_arg7 = -1; /* impossible value */
1323 layout->uu_arg8 = -1; /* impossible value */
1324
1325 #else
1326 # error "getSyscallLayout: unknown arch"
1327 #endif
1328 }
1329
1330
1331 /* ---------------------------------------------------------------------
1332 The main driver logic
1333 ------------------------------------------------------------------ */
1334
1335 /* Finding the handlers for a given syscall, or faking up one
1336 when no handler is found. */
1337
1338 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1339 void bad_before ( ThreadId tid,
1340 SyscallArgLayout* layout,
1341 /*MOD*/SyscallArgs* args,
1342 /*OUT*/SyscallStatus* status,
1343 /*OUT*/UWord* flags )
1344 {
1345 VG_(dmsg)("WARNING: unhandled %s syscall: %s\n",
1346 VG_PLATFORM, VG_SYSNUM_STRING(args->sysno));
1347 if (VG_(clo_verbosity) > 1) {
1348 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1349 }
1350 VG_(dmsg)("You may be able to write your own handler.\n");
1351 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1352 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n");
1353 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1354
1355 SET_STATUS_Failure(VKI_ENOSYS);
1356 }
1357
1358 static SyscallTableEntry bad_sys =
1359 { bad_before, NULL };
1360
get_syscall_entry(Int syscallno)1361 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1362 {
1363 const SyscallTableEntry* sys = NULL;
1364
1365 # if defined(VGO_linux)
1366 sys = ML_(get_linux_syscall_entry)( syscallno );
1367
1368 # elif defined(VGO_darwin)
1369 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1370
1371 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1372 case VG_DARWIN_SYSCALL_CLASS_UNIX:
1373 if (idx >= 0 && idx < ML_(syscall_table_size) &&
1374 ML_(syscall_table)[idx].before != NULL)
1375 sys = &ML_(syscall_table)[idx];
1376 break;
1377 case VG_DARWIN_SYSCALL_CLASS_MACH:
1378 if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1379 ML_(mach_trap_table)[idx].before != NULL)
1380 sys = &ML_(mach_trap_table)[idx];
1381 break;
1382 case VG_DARWIN_SYSCALL_CLASS_MDEP:
1383 if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1384 ML_(mdep_trap_table)[idx].before != NULL)
1385 sys = &ML_(mdep_trap_table)[idx];
1386 break;
1387 default:
1388 vg_assert(0);
1389 break;
1390 }
1391
1392 # else
1393 # error Unknown OS
1394 # endif
1395
1396 return sys == NULL ? &bad_sys : sys;
1397 }
1398
1399
1400 /* Add and remove signals from mask so that we end up telling the
1401 kernel the state we actually want rather than what the client
1402 wants. */
sanitize_client_sigmask(vki_sigset_t * mask)1403 static void sanitize_client_sigmask(vki_sigset_t *mask)
1404 {
1405 VG_(sigdelset)(mask, VKI_SIGKILL);
1406 VG_(sigdelset)(mask, VKI_SIGSTOP);
1407 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1408 }
1409
1410 typedef
1411 struct {
1412 SyscallArgs orig_args;
1413 SyscallArgs args;
1414 SyscallStatus status;
1415 UWord flags;
1416 }
1417 SyscallInfo;
1418
1419 SyscallInfo *syscallInfo;
1420
1421 /* The scheduler needs to be able to zero out these records after a
1422 fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1423 void VG_(clear_syscallInfo) ( Int tid )
1424 {
1425 vg_assert(syscallInfo);
1426 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1427 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1428 syscallInfo[tid].status.what = SsIdle;
1429 }
1430
VG_(is_in_syscall)1431 Bool VG_(is_in_syscall) ( Int tid )
1432 {
1433 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1434 return (syscallInfo[tid].status.what != SsIdle);
1435 }
1436
ensure_initialised(void)1437 static void ensure_initialised ( void )
1438 {
1439 Int i;
1440 static Bool init_done = False;
1441 if (init_done)
1442 return;
1443 init_done = True;
1444
1445 syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
1446
1447 for (i = 0; i < VG_N_THREADS; i++) {
1448 VG_(clear_syscallInfo)( i );
1449 }
1450 }
1451
1452 /* --- This is the main function of this file. --- */
1453
VG_(client_syscall)1454 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1455 {
1456 Word sysno;
1457 ThreadState* tst;
1458 const SyscallTableEntry* ent;
1459 SyscallArgLayout layout;
1460 SyscallInfo* sci;
1461
1462 ensure_initialised();
1463
1464 vg_assert(VG_(is_valid_tid)(tid));
1465 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1466 vg_assert(VG_(is_running_thread)(tid));
1467
1468 # if !defined(VGO_darwin)
1469 // Resync filtering is meaningless on non-Darwin targets.
1470 vg_assert(VG_(clo_resync_filter) == 0);
1471 # endif
1472
1473 tst = VG_(get_ThreadState)(tid);
1474
1475 /* BEGIN ensure root thread's stack is suitably mapped */
1476 /* In some rare circumstances, we may do the syscall without the
1477 bottom page of the stack being mapped, because the stack pointer
1478 was moved down just a few instructions before the syscall
1479 instruction, and there have been no memory references since
1480 then, that would cause a call to VG_(extend_stack) to have
1481 happened.
1482
1483 In native execution that's OK: the kernel automagically extends
1484 the stack's mapped area down to cover the stack pointer (or sp -
1485 redzone, really). In simulated normal execution that's OK too,
1486 since any signals we get from accessing below the mapped area of
1487 the (guest's) stack lead us to VG_(extend_stack), where we
1488 simulate the kernel's stack extension logic. But that leaves
1489 the problem of entering a syscall with the SP unmapped. Because
1490 the kernel doesn't know that the segment immediately above SP is
1491 supposed to be a grow-down segment, it causes the syscall to
1492 fail, and thereby causes a divergence between native behaviour
1493 (syscall succeeds) and simulated behaviour (syscall fails).
1494
1495 This is quite a rare failure mode. It has only been seen
1496 affecting calls to sys_readlink on amd64-linux, and even then it
1497 requires a certain code sequence around the syscall to trigger
1498 it. Here is one:
1499
1500 extern int my_readlink ( const char* path );
1501 asm(
1502 ".text\n"
1503 ".globl my_readlink\n"
1504 "my_readlink:\n"
1505 "\tsubq $0x1008,%rsp\n"
1506 "\tmovq %rdi,%rdi\n" // path is in rdi
1507 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi
1508 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx
1509 "\tmovl $"__NR_READLINK",%eax\n" // syscall number
1510 "\tsyscall\n"
1511 "\taddq $0x1008,%rsp\n"
1512 "\tret\n"
1513 ".previous\n"
1514 );
1515
1516 For more details, see bug #156404
1517 (https://bugs.kde.org/show_bug.cgi?id=156404).
1518
1519 The fix is actually very simple. We simply need to call
1520 VG_(extend_stack) for this thread, handing it the lowest
1521 possible valid address for stack (sp - redzone), to ensure the
1522 pages all the way down to that address, are mapped. Because
1523 this is a potentially expensive and frequent operation, we
1524 do the following:
1525
1526 Only the main thread (tid=1) has a growdown stack. So
1527 ignore all others. It is conceivable, although highly unlikely,
1528 that the main thread exits, and later another thread is
1529 allocated tid=1, but that's harmless, I believe;
1530 VG_(extend_stack) will do nothing when applied to a non-root
1531 thread.
1532
1533 All this guff is of course Linux-specific. Hence the ifdef.
1534 */
1535 # if defined(VGO_linux)
1536 if (tid == 1/*ROOT THREAD*/) {
1537 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1538
1539 /* The precise thing to do here would be to extend the stack only
1540 if the system call can be proven to access unmapped user stack
1541 memory. That is an enormous amount of work even if a proper
1542 spec of system calls was available.
1543
1544 In the case where the system call does not access user memory
1545 the stack pointer here can have any value. A legitimate testcase
1546 that exercises this is none/tests/s390x/stmg.c:
1547 The stack pointer happens to be in the reservation segment near
1548 the end of the addressable memory and there is no SkAnonC segment
1549 above.
1550
1551 So the approximation we're taking here is to extend the stack only
1552 if the client stack pointer does not look bogus. */
1553 if (VG_(am_addr_is_in_extensible_client_stack)(stackMin))
1554 VG_(extend_stack)( tid, stackMin );
1555 }
1556 # endif
1557 /* END ensure root thread's stack is suitably mapped */
1558
1559 /* First off, get the syscall args and number. This is a
1560 platform-dependent action. */
1561
1562 sci = & syscallInfo[tid];
1563 vg_assert(sci->status.what == SsIdle);
1564
1565 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1566
1567 /* Copy .orig_args to .args. The pre-handler may modify .args, but
1568 we want to keep the originals too, just in case. */
1569 sci->args = sci->orig_args;
1570
1571 /* Save the syscall number in the thread state in case the syscall
1572 is interrupted by a signal. */
1573 sysno = sci->orig_args.sysno;
1574
1575 /* It's sometimes useful, as a crude debugging hack, to get a
1576 stack trace at each (or selected) syscalls. */
1577 if (0 && sysno == __NR_ioctl) {
1578 VG_(umsg)("\nioctl:\n");
1579 VG_(get_and_pp_StackTrace)(tid, 10);
1580 VG_(umsg)("\n");
1581 }
1582
1583 # if defined(VGO_darwin)
1584 /* Record syscall class. But why? Because the syscall might be
1585 interrupted by a signal, and in the signal handler (which will
1586 be m_signals.async_signalhandler) we will need to build a SysRes
1587 reflecting the syscall return result. In order to do that we
1588 need to know the syscall class. Hence stash it in the guest
1589 state of this thread. This madness is not needed on Linux
1590 because it only has a single syscall return convention and so
1591 there is no ambiguity involved in converting the post-signal
1592 machine state into a SysRes. */
1593 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1594 # endif
1595
1596 /* The default what-to-do-next thing is hand the syscall to the
1597 kernel, so we pre-set that here. Set .sres to something
1598 harmless looking (is irrelevant because .what is not
1599 SsComplete.) */
1600 sci->status.what = SsHandToKernel;
1601 sci->status.sres = VG_(mk_SysRes_Error)(0);
1602 sci->flags = 0;
1603
1604 /* Fetch the syscall's handlers. If no handlers exist for this
1605 syscall, we are given dummy handlers which force an immediate
1606 return with ENOSYS. */
1607 ent = get_syscall_entry(sysno);
1608
1609 /* Fetch the layout information, which tells us where in the guest
1610 state the syscall args reside. This is a platform-dependent
1611 action. This info is needed so that the scalar syscall argument
1612 checks (PRE_REG_READ calls) know which bits of the guest state
1613 they need to inspect. */
1614 getSyscallArgLayout( &layout );
1615
1616 /* Make sure the tmp signal mask matches the real signal mask;
1617 sigsuspend may change this. */
1618 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1619
1620 /* Right, we're finally ready to Party. Call the pre-handler and
1621 see what we get back. At this point:
1622
1623 sci->status.what is Unset (we don't know yet).
1624 sci->orig_args contains the original args.
1625 sci->args is the same as sci->orig_args.
1626 sci->flags is zero.
1627 */
1628
1629 PRINT("SYSCALL[%d,%d](%s) ",
1630 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1631
1632 /* Do any pre-syscall actions */
1633 if (VG_(needs).syscall_wrapper) {
1634 UWord tmpv[8];
1635 tmpv[0] = sci->orig_args.arg1;
1636 tmpv[1] = sci->orig_args.arg2;
1637 tmpv[2] = sci->orig_args.arg3;
1638 tmpv[3] = sci->orig_args.arg4;
1639 tmpv[4] = sci->orig_args.arg5;
1640 tmpv[5] = sci->orig_args.arg6;
1641 tmpv[6] = sci->orig_args.arg7;
1642 tmpv[7] = sci->orig_args.arg8;
1643 VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1644 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1645 }
1646
1647 vg_assert(ent);
1648 vg_assert(ent->before);
1649 (ent->before)( tid,
1650 &layout,
1651 &sci->args, &sci->status, &sci->flags );
1652
1653 /* The pre-handler may have modified:
1654 sci->args
1655 sci->status
1656 sci->flags
1657 All else remains unchanged.
1658 Although the args may be modified, pre handlers are not allowed
1659 to change the syscall number.
1660 */
1661 /* Now we proceed according to what the pre-handler decided. */
1662 vg_assert(sci->status.what == SsHandToKernel
1663 || sci->status.what == SsComplete);
1664 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1665
1666 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1667 /* The pre-handler completed the syscall itself, declaring
1668 success. */
1669 if (sci->flags & SfNoWriteResult) {
1670 PRINT(" --> [pre-success] NoWriteResult");
1671 } else {
1672 PRINT(" --> [pre-success] %s", VG_(sr_as_string)(sci->status.sres));
1673 }
1674 /* In this case the allowable flags are to ask for a signal-poll
1675 and/or a yield after the call. Changing the args isn't
1676 allowed. */
1677 vg_assert(0 == (sci->flags
1678 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1679 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1680 }
1681
1682 else
1683 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1684 /* The pre-handler decided to fail syscall itself. */
1685 PRINT(" --> [pre-fail] %s", VG_(sr_as_string)(sci->status.sres));
1686 /* In this case, the pre-handler is also allowed to ask for the
1687 post-handler to be run anyway. Changing the args is not
1688 allowed. */
1689 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1690 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1691 }
1692
1693 else
1694 if (sci->status.what != SsHandToKernel) {
1695 /* huh?! */
1696 vg_assert(0);
1697 }
1698
1699 else /* (sci->status.what == HandToKernel) */ {
1700 /* Ok, this is the usual case -- and the complicated one. There
1701 are two subcases: sync and async. async is the general case
1702 and is to be used when there is any possibility that the
1703 syscall might block [a fact that the pre-handler must tell us
1704 via the sci->flags field.] Because the tidying-away /
1705 context-switch overhead of the async case could be large, if
1706 we are sure that the syscall will not block, we fast-track it
1707 by doing it directly in this thread, which is a lot
1708 simpler. */
1709
1710 /* Check that the given flags are allowable: MayBlock, PollAfter
1711 and PostOnFail are ok. */
1712 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1713
1714 if (sci->flags & SfMayBlock) {
1715
1716 /* Syscall may block, so run it asynchronously */
1717 vki_sigset_t mask;
1718
1719 PRINT(" --> [async] ... \n");
1720
1721 mask = tst->sig_mask;
1722 sanitize_client_sigmask(&mask);
1723
1724 /* Gack. More impedance matching. Copy the possibly
1725 modified syscall args back into the guest state. */
1726 /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1727 then this assertion is senseless:
1728 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1729 The case that exposed it was sys_posix_spawn on Darwin,
1730 which heavily modifies its arguments but then lets the call
1731 go through anyway, with SfToBlock set, hence we end up here. */
1732 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1733
1734 /* Drop the bigLock */
1735 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1736 /* Urr. We're now in a race against other threads trying to
1737 acquire the bigLock. I guess that doesn't matter provided
1738 that do_syscall_for_client only touches thread-local
1739 state. */
1740
1741 /* Do the call, which operates directly on the guest state,
1742 not on our abstracted copies of the args/result. */
1743 do_syscall_for_client(sysno, tst, &mask);
1744
1745 /* do_syscall_for_client may not return if the syscall was
1746 interrupted by a signal. In that case, flow of control is
1747 first to m_signals.async_sighandler, which calls
1748 VG_(fixup_guest_state_after_syscall_interrupted), which
1749 fixes up the guest state, and possibly calls
1750 VG_(post_syscall). Once that's done, control drops back
1751 to the scheduler. */
1752
1753 /* Darwin: do_syscall_for_client may not return if the
1754 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1755 responded by starting the thread at wqthread_hijack(reuse=1)
1756 (to run another workqueue item). In that case, wqthread_hijack
1757 calls ML_(wqthread_continue), which is similar to
1758 VG_(fixup_guest_state_after_syscall_interrupted). */
1759
1760 /* Reacquire the lock */
1761 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1762
1763 /* Even more impedance matching. Extract the syscall status
1764 from the guest state. */
1765 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1766 vg_assert(sci->status.what == SsComplete);
1767
1768 /* Be decorative, if required. */
1769 if (VG_(clo_trace_syscalls)) {
1770 PRINT("SYSCALL[%d,%d](%s) ... [async] --> %s",
1771 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1772 VG_(sr_as_string)(sci->status.sres));
1773 }
1774
1775 } else {
1776
1777 /* run the syscall directly */
1778 /* The pre-handler may have modified the syscall args, but
1779 since we're passing values in ->args directly to the
1780 kernel, there's no point in flushing them back to the
1781 guest state. Indeed doing so could be construed as
1782 incorrect. */
1783 SysRes sres
1784 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1785 sci->args.arg3, sci->args.arg4,
1786 sci->args.arg5, sci->args.arg6,
1787 sci->args.arg7, sci->args.arg8 );
1788 sci->status = convert_SysRes_to_SyscallStatus(sres);
1789
1790 /* Be decorative, if required. */
1791 if (VG_(clo_trace_syscalls)) {
1792 PRINT("[sync] --> %s", VG_(sr_as_string)(sci->status.sres));
1793 }
1794 }
1795 }
1796
1797 vg_assert(sci->status.what == SsComplete);
1798
1799 vg_assert(VG_(is_running_thread)(tid));
1800
1801 /* Dump the syscall result back in the guest state. This is
1802 a platform-specific action. */
1803 if (!(sci->flags & SfNoWriteResult))
1804 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1805
1806 /* Situation now:
1807 - the guest state is now correctly modified following the syscall
1808 - modified args, original args and syscall status are still
1809 available in the syscallInfo[] entry for this syscall.
1810
1811 Now go on to do the post-syscall actions (read on down ..)
1812 */
1813 PRINT(" ");
1814 VG_(post_syscall)(tid);
1815 PRINT("\n");
1816 }
1817
1818
1819 /* Perform post syscall actions. The expected state on entry is
1820 precisely as at the end of VG_(client_syscall), that is:
1821
1822 - guest state up to date following the syscall
1823 - modified args, original args and syscall status are still
1824 available in the syscallInfo[] entry for this syscall.
1825 - syscall status matches what's in the guest state.
1826
1827 There are two ways to get here: the normal way -- being called by
1828 VG_(client_syscall), and the unusual way, from
1829 VG_(fixup_guest_state_after_syscall_interrupted).
1830 Darwin: there's a third way, ML_(wqthread_continue).
1831 */
VG_(post_syscall)1832 void VG_(post_syscall) (ThreadId tid)
1833 {
1834 SyscallInfo* sci;
1835 const SyscallTableEntry* ent;
1836 SyscallStatus test_status;
1837 ThreadState* tst;
1838 Word sysno;
1839
1840 /* Preliminaries */
1841 vg_assert(VG_(is_valid_tid)(tid));
1842 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1843 vg_assert(VG_(is_running_thread)(tid));
1844
1845 tst = VG_(get_ThreadState)(tid);
1846 sci = & syscallInfo[tid];
1847
1848 /* m_signals.sigvgkill_handler might call here even when not in
1849 a syscall. */
1850 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1851 sci->status.what = SsIdle;
1852 return;
1853 }
1854
1855 /* Validate current syscallInfo entry. In particular we require
1856 that the current .status matches what's actually in the guest
1857 state. At least in the normal case where we have actually
1858 previously written the result into the guest state. */
1859 vg_assert(sci->status.what == SsComplete);
1860
1861 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1862 if (!(sci->flags & SfNoWriteResult))
1863 vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1864 /* Failure of the above assertion on Darwin can indicate a problem
1865 in the syscall wrappers that pre-fail or pre-succeed the
1866 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1867 when they really should call SET_STATUS_from_SysRes. The former
1868 create a UNIX-class syscall result on Darwin, which may not be
1869 correct for the syscall; if that's the case then this assertion
1870 fires. See PRE(thread_fast_set_cthread_self) for an example. On
1871 non-Darwin platforms this assertion is should never fail, and this
1872 comment is completely irrelevant. */
1873 /* Ok, looks sane */
1874
1875 /* Get the system call number. Because the pre-handler isn't
1876 allowed to mess with it, it should be the same for both the
1877 original and potentially-modified args. */
1878 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1879 sysno = sci->args.sysno;
1880 ent = get_syscall_entry(sysno);
1881
1882 /* pre: status == Complete (asserted above) */
1883 /* Consider either success or failure. Now run the post handler if:
1884 - it exists, and
1885 - Success or (Failure and PostOnFail is set)
1886 */
1887 if (ent->after
1888 && ((!sr_isError(sci->status.sres))
1889 || (sr_isError(sci->status.sres)
1890 && (sci->flags & SfPostOnFail) ))) {
1891
1892 (ent->after)( tid, &sci->args, &sci->status );
1893 }
1894
1895 /* Because the post handler might have changed the status (eg, the
1896 post-handler for sys_open can change the result from success to
1897 failure if the kernel supplied a fd that it doesn't like), once
1898 again dump the syscall result back in the guest state.*/
1899 if (!(sci->flags & SfNoWriteResult))
1900 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1901
1902 /* Do any post-syscall actions required by the tool. */
1903 if (VG_(needs).syscall_wrapper) {
1904 UWord tmpv[8];
1905 tmpv[0] = sci->orig_args.arg1;
1906 tmpv[1] = sci->orig_args.arg2;
1907 tmpv[2] = sci->orig_args.arg3;
1908 tmpv[3] = sci->orig_args.arg4;
1909 tmpv[4] = sci->orig_args.arg5;
1910 tmpv[5] = sci->orig_args.arg6;
1911 tmpv[6] = sci->orig_args.arg7;
1912 tmpv[7] = sci->orig_args.arg8;
1913 VG_TDICT_CALL(tool_post_syscall, tid,
1914 sysno,
1915 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1916 sci->status.sres);
1917 }
1918
1919 /* The syscall is done. */
1920 vg_assert(sci->status.what == SsComplete);
1921 sci->status.what = SsIdle;
1922
1923 /* The pre/post wrappers may have concluded that pending signals
1924 might have been created, and will have set SfPollAfter to
1925 request a poll for them once the syscall is done. */
1926 if (sci->flags & SfPollAfter)
1927 VG_(poll_signals)(tid);
1928
1929 /* Similarly, the wrappers might have asked for a yield
1930 afterwards. */
1931 if (sci->flags & SfYieldAfter)
1932 VG_(vg_yield)();
1933 }
1934
1935
1936 /* ---------------------------------------------------------------------
1937 Dealing with syscalls which get interrupted by a signal:
1938 VG_(fixup_guest_state_after_syscall_interrupted)
1939 ------------------------------------------------------------------ */
1940
1941 /* Syscalls done on behalf of the client are finally handed off to the
1942 kernel in VG_(client_syscall) above, either by calling
1943 do_syscall_for_client (the async case), or by calling
1944 VG_(do_syscall6) (the sync case).
1945
1946 If the syscall is not interrupted by a signal (it may block and
1947 later unblock, but that's irrelevant here) then those functions
1948 eventually return and so control is passed to VG_(post_syscall).
1949 NB: not sure if the sync case can actually get interrupted, as it
1950 operates with all signals masked.
1951
1952 However, the syscall may get interrupted by an async-signal. In
1953 that case do_syscall_for_client/VG_(do_syscall6) do not
1954 return. Instead we wind up in m_signals.async_sighandler. We need
1955 to fix up the guest state to make it look like the syscall was
1956 interrupted for guest. So async_sighandler calls here, and this
1957 does the fixup. Note that from here we wind up calling
1958 VG_(post_syscall) too.
1959 */
1960
1961
1962 /* These are addresses within ML_(do_syscall_for_client_WRK). See
1963 syscall-$PLAT.S for details.
1964 */
1965 #if defined(VGO_linux)
1966 extern const Addr ML_(blksys_setup);
1967 extern const Addr ML_(blksys_restart);
1968 extern const Addr ML_(blksys_complete);
1969 extern const Addr ML_(blksys_committed);
1970 extern const Addr ML_(blksys_finished);
1971 #elif defined(VGO_darwin)
1972 /* Darwin requires extra uglyness */
1973 extern const Addr ML_(blksys_setup_MACH);
1974 extern const Addr ML_(blksys_restart_MACH);
1975 extern const Addr ML_(blksys_complete_MACH);
1976 extern const Addr ML_(blksys_committed_MACH);
1977 extern const Addr ML_(blksys_finished_MACH);
1978 extern const Addr ML_(blksys_setup_MDEP);
1979 extern const Addr ML_(blksys_restart_MDEP);
1980 extern const Addr ML_(blksys_complete_MDEP);
1981 extern const Addr ML_(blksys_committed_MDEP);
1982 extern const Addr ML_(blksys_finished_MDEP);
1983 extern const Addr ML_(blksys_setup_UNIX);
1984 extern const Addr ML_(blksys_restart_UNIX);
1985 extern const Addr ML_(blksys_complete_UNIX);
1986 extern const Addr ML_(blksys_committed_UNIX);
1987 extern const Addr ML_(blksys_finished_UNIX);
1988 #else
1989 # error "Unknown OS"
1990 #endif
1991
1992
1993 /* Back up guest state to restart a system call. */
1994
ML_(fixup_guest_state_to_restart_syscall)1995 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1996 {
1997 #if defined(VGP_x86_linux)
1998 arch->vex.guest_EIP -= 2; // sizeof(int $0x80)
1999
2000 /* Make sure our caller is actually sane, and we're really backing
2001 back over a syscall.
2002
2003 int $0x80 == CD 80
2004 */
2005 {
2006 UChar *p = (UChar *)arch->vex.guest_EIP;
2007
2008 if (p[0] != 0xcd || p[1] != 0x80)
2009 VG_(message)(Vg_DebugMsg,
2010 "?! restarting over syscall at %#x %02x %02x\n",
2011 arch->vex.guest_EIP, p[0], p[1]);
2012
2013 vg_assert(p[0] == 0xcd && p[1] == 0x80);
2014 }
2015
2016 #elif defined(VGP_amd64_linux)
2017 arch->vex.guest_RIP -= 2; // sizeof(syscall)
2018
2019 /* Make sure our caller is actually sane, and we're really backing
2020 back over a syscall.
2021
2022 syscall == 0F 05
2023 */
2024 {
2025 UChar *p = (UChar *)arch->vex.guest_RIP;
2026
2027 if (p[0] != 0x0F || p[1] != 0x05)
2028 VG_(message)(Vg_DebugMsg,
2029 "?! restarting over syscall at %#llx %02x %02x\n",
2030 arch->vex.guest_RIP, p[0], p[1]);
2031
2032 vg_assert(p[0] == 0x0F && p[1] == 0x05);
2033 }
2034
2035 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
2036 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
2037
2038 /* Make sure our caller is actually sane, and we're really backing
2039 back over a syscall.
2040
2041 sc == 44 00 00 02
2042 */
2043 {
2044 UChar *p = (UChar *)arch->vex.guest_CIA;
2045
2046 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
2047 VG_(message)(Vg_DebugMsg,
2048 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2049 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
2050
2051 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2052 }
2053
2054 #elif defined(VGP_ppc64le_linux)
2055 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
2056
2057 /* Make sure our caller is actually sane, and we're really backing
2058 back over a syscall.
2059
2060 sc == 44 00 00 02
2061 */
2062 {
2063 UChar *p = (UChar *)arch->vex.guest_CIA;
2064
2065 if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
2066 VG_(message)(Vg_DebugMsg,
2067 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2068 arch->vex.guest_CIA + 0ULL, p[3], p[2], p[1], p[0]);
2069
2070 vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
2071 }
2072
2073 #elif defined(VGP_arm_linux)
2074 if (arch->vex.guest_R15T & 1) {
2075 // Thumb mode. SVC is a encoded as
2076 // 1101 1111 imm8
2077 // where imm8 is the SVC number, and we only accept 0.
2078 arch->vex.guest_R15T -= 2; // sizeof(thumb 16 bit insn)
2079 UChar* p = (UChar*)(arch->vex.guest_R15T - 1);
2080 Bool valid = p[0] == 0 && p[1] == 0xDF;
2081 if (!valid) {
2082 VG_(message)(Vg_DebugMsg,
2083 "?! restarting over (Thumb) syscall that is not syscall "
2084 "at %#llx %02x %02x\n",
2085 arch->vex.guest_R15T - 1ULL, p[0], p[1]);
2086 }
2087 vg_assert(valid);
2088 // FIXME: NOTE, this really isn't right. We need to back up
2089 // ITSTATE to what it was before the SVC instruction, but we
2090 // don't know what it was. At least assert that it is now
2091 // zero, because if it is nonzero then it must also have
2092 // been nonzero for the SVC itself, which means it was
2093 // conditional. Urk.
2094 vg_assert(arch->vex.guest_ITSTATE == 0);
2095 } else {
2096 // ARM mode. SVC is encoded as
2097 // cond 1111 imm24
2098 // where imm24 is the SVC number, and we only accept 0.
2099 arch->vex.guest_R15T -= 4; // sizeof(arm instr)
2100 UChar* p = (UChar*)arch->vex.guest_R15T;
2101 Bool valid = p[0] == 0 && p[1] == 0 && p[2] == 0
2102 && (p[3] & 0xF) == 0xF;
2103 if (!valid) {
2104 VG_(message)(Vg_DebugMsg,
2105 "?! restarting over (ARM) syscall that is not syscall "
2106 "at %#llx %02x %02x %02x %02x\n",
2107 arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
2108 }
2109 vg_assert(valid);
2110 }
2111
2112 #elif defined(VGP_arm64_linux)
2113 arch->vex.guest_PC -= 4; // sizeof(arm64 instr)
2114
2115 /* Make sure our caller is actually sane, and we're really backing
2116 back over a syscall.
2117
2118 svc #0 == d4 00 00 01
2119 */
2120 {
2121 UChar *p = (UChar *)arch->vex.guest_PC;
2122
2123 if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4)
2124 VG_(message)(
2125 Vg_DebugMsg,
2126 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2127 arch->vex.guest_PC + 0ULL, p[0], p[1], p[2], p[3]
2128 );
2129
2130 vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4);
2131 }
2132
2133 #elif defined(VGP_x86_darwin)
2134 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
2135
2136 /* Make sure our caller is actually sane, and we're really backing
2137 back over a syscall.
2138
2139 int $0x80 == CD 80
2140 int $0x81 == CD 81
2141 int $0x82 == CD 82
2142 sysenter == 0F 34
2143 */
2144 {
2145 UChar *p = (UChar *)arch->vex.guest_EIP;
2146 Bool ok = (p[0] == 0xCD && p[1] == 0x80)
2147 || (p[0] == 0xCD && p[1] == 0x81)
2148 || (p[0] == 0xCD && p[1] == 0x82)
2149 || (p[0] == 0x0F && p[1] == 0x34);
2150 if (!ok)
2151 VG_(message)(Vg_DebugMsg,
2152 "?! restarting over syscall at %#x %02x %02x\n",
2153 arch->vex.guest_EIP, p[0], p[1]);
2154 vg_assert(ok);
2155 }
2156
2157 #elif defined(VGP_amd64_darwin)
2158 // DDD: #warning GrP fixme amd64 restart unimplemented
2159 vg_assert(0);
2160
2161 #elif defined(VGP_s390x_linux)
2162 arch->vex.guest_IA -= 2; // sizeof(syscall)
2163
2164 /* Make sure our caller is actually sane, and we're really backing
2165 back over a syscall.
2166
2167 syscall == 0A <num>
2168 */
2169 {
2170 UChar *p = (UChar *)arch->vex.guest_IA;
2171 if (p[0] != 0x0A)
2172 VG_(message)(Vg_DebugMsg,
2173 "?! restarting over syscall at %#llx %02x %02x\n",
2174 arch->vex.guest_IA, p[0], p[1]);
2175
2176 vg_assert(p[0] == 0x0A);
2177 }
2178
2179 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
2180
2181 arch->vex.guest_PC -= 4; // sizeof(mips instr)
2182
2183 /* Make sure our caller is actually sane, and we're really backing
2184 back over a syscall.
2185
2186 syscall == 00 00 00 0C
2187 big endian
2188 syscall == 0C 00 00 00
2189 */
2190 {
2191 UChar *p = (UChar *)(arch->vex.guest_PC);
2192 # if defined (VG_LITTLEENDIAN)
2193 if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
2194 VG_(message)(Vg_DebugMsg,
2195 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2196 (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2197
2198 vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
2199 # elif defined (VG_BIGENDIAN)
2200 if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
2201 VG_(message)(Vg_DebugMsg,
2202 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2203 (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2204
2205 vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
2206 # else
2207 # error "Unknown endianness"
2208 # endif
2209 }
2210 #elif defined(VGP_tilegx_linux)
2211 arch->vex.guest_pc -= 8; // sizeof({ swint1 })
2212
2213 /* Make sure our caller is actually sane, and we're really backing
2214 back over a syscall. no other instruction in same bundle.
2215 */
2216 {
2217 unsigned long *p = (unsigned long *)arch->vex.guest_pc;
2218
2219 if (p[0] != 0x286b180051485000ULL ) // "swint1", little enidan only
2220 VG_(message)(Vg_DebugMsg,
2221 "?! restarting over syscall at 0x%lx %lx\n",
2222 arch->vex.guest_pc, p[0]);
2223 vg_assert(p[0] == 0x286b180051485000ULL);
2224 }
2225
2226 #else
2227 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2228 #endif
2229 }
2230
2231
2232 /*
2233 Fix up the guest state when a syscall is interrupted by a signal
2234 and so has been forced to return 'sysret'.
2235
2236 To do this, we determine the precise state of the syscall by
2237 looking at the (real) IP at the time the signal happened. The
2238 syscall sequence looks like:
2239
2240 1. unblock signals
2241 2. perform syscall
2242 3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
2243 4. re-block signals
2244
2245 If a signal
2246 happens at Then Why?
2247 [1-2) restart nothing has happened (restart syscall)
2248 [2] restart syscall hasn't started, or kernel wants to restart
2249 [2-3) save syscall complete, but results not saved
2250 [3-4) syscall complete, results saved
2251
2252 Sometimes we never want to restart an interrupted syscall (because
2253 sigaction says not to), so we only restart if "restart" is True.
2254
2255 This will also call VG_(post_syscall) if the syscall has actually
2256 completed (either because it was interrupted, or because it
2257 actually finished). It will not call VG_(post_syscall) if the
2258 syscall is set up for restart, which means that the pre-wrapper may
2259 get called multiple times.
2260 */
2261
2262 void
VG_(fixup_guest_state_after_syscall_interrupted)2263 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2264 Addr ip,
2265 SysRes sres,
2266 Bool restart)
2267 {
2268 /* Note that we don't know the syscall number here, since (1) in
2269 general there's no reliable way to get hold of it short of
2270 stashing it in the guest state before the syscall, and (2) in
2271 any case we don't need to know it for the actions done by this
2272 routine.
2273
2274 Furthermore, 'sres' is only used in the case where the syscall
2275 is complete, but the result has not been committed to the guest
2276 state yet. In any other situation it will be meaningless and
2277 therefore ignored. */
2278
2279 ThreadState* tst;
2280 SyscallStatus canonical;
2281 ThreadArchState* th_regs;
2282 SyscallInfo* sci;
2283
2284 /* Compute some Booleans indicating which range we're in. */
2285 Bool outside_range,
2286 in_setup_to_restart, // [1,2) in the .S files
2287 at_restart, // [2] in the .S files
2288 in_complete_to_committed, // [3,4) in the .S files
2289 in_committed_to_finished; // [4,5) in the .S files
2290
2291 # if defined(VGO_linux)
2292 outside_range
2293 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2294 in_setup_to_restart
2295 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2296 at_restart
2297 = ip == ML_(blksys_restart);
2298 in_complete_to_committed
2299 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2300 in_committed_to_finished
2301 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2302 # elif defined(VGO_darwin)
2303 outside_range
2304 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2305 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2306 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2307 in_setup_to_restart
2308 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2309 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2310 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2311 at_restart
2312 = (ip == ML_(blksys_restart_MACH))
2313 || (ip == ML_(blksys_restart_MDEP))
2314 || (ip == ML_(blksys_restart_UNIX));
2315 in_complete_to_committed
2316 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2317 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2318 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2319 in_committed_to_finished
2320 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2321 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2322 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2323 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */
2324 # else
2325 # error "Unknown OS"
2326 # endif
2327
2328 if (VG_(clo_trace_signals))
2329 VG_(message)( Vg_DebugMsg,
2330 "interrupted_syscall: tid=%d, ip=0x%llx, "
2331 "restart=%s, sres.isErr=%s, sres.val=%lld\n",
2332 (Int)tid,
2333 (ULong)ip,
2334 restart ? "True" : "False",
2335 sr_isError(sres) ? "True" : "False",
2336 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2337
2338 vg_assert(VG_(is_valid_tid)(tid));
2339 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2340 vg_assert(VG_(is_running_thread)(tid));
2341
2342 tst = VG_(get_ThreadState)(tid);
2343 th_regs = &tst->arch;
2344 sci = & syscallInfo[tid];
2345
2346 /* Figure out what the state of the syscall was by examining the
2347 (real) IP at the time of the signal, and act accordingly. */
2348 if (outside_range) {
2349 if (VG_(clo_trace_signals))
2350 VG_(message)( Vg_DebugMsg,
2351 " not in syscall at all: hmm, very suspicious\n" );
2352 /* Looks like we weren't in a syscall at all. Hmm. */
2353 vg_assert(sci->status.what != SsIdle);
2354 return;
2355 }
2356
2357 /* We should not be here unless this thread had first started up
2358 the machinery for a syscall by calling VG_(client_syscall).
2359 Hence: */
2360 vg_assert(sci->status.what != SsIdle);
2361
2362 /* now, do one of four fixup actions, depending on where the IP has
2363 got to. */
2364
2365 if (in_setup_to_restart) {
2366 /* syscall hasn't even started; go around again */
2367 if (VG_(clo_trace_signals))
2368 VG_(message)( Vg_DebugMsg, " not started: restarting\n");
2369 vg_assert(sci->status.what == SsHandToKernel);
2370 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2371 }
2372
2373 else
2374 if (at_restart) {
2375 /* We're either about to run the syscall, or it was interrupted
2376 and the kernel restarted it. Restart if asked, otherwise
2377 EINTR it. */
2378 if (restart) {
2379 if (VG_(clo_trace_signals))
2380 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n");
2381 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2382 } else {
2383 if (VG_(clo_trace_signals))
2384 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n");
2385 canonical = convert_SysRes_to_SyscallStatus(
2386 VG_(mk_SysRes_Error)( VKI_EINTR )
2387 );
2388 if (!(sci->flags & SfNoWriteResult))
2389 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2390 sci->status = canonical;
2391 VG_(post_syscall)(tid);
2392 }
2393 }
2394
2395 else
2396 if (in_complete_to_committed) {
2397 /* Syscall complete, but result hasn't been written back yet.
2398 Write the SysRes we were supplied with back to the guest
2399 state. */
2400 if (VG_(clo_trace_signals))
2401 VG_(message)( Vg_DebugMsg,
2402 " completed, but uncommitted: committing\n");
2403 canonical = convert_SysRes_to_SyscallStatus( sres );
2404 if (!(sci->flags & SfNoWriteResult))
2405 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2406 sci->status = canonical;
2407 VG_(post_syscall)(tid);
2408 }
2409
2410 else
2411 if (in_committed_to_finished) {
2412 /* Result committed, but the signal mask has not been restored;
2413 we expect our caller (the signal handler) will have fixed
2414 this up. */
2415 if (VG_(clo_trace_signals))
2416 VG_(message)( Vg_DebugMsg,
2417 " completed and committed: nothing to do\n");
2418 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2419 vg_assert(sci->status.what == SsComplete);
2420 VG_(post_syscall)(tid);
2421 }
2422
2423 else
2424 VG_(core_panic)("?? strange syscall interrupt state?");
2425
2426 /* In all cases, the syscall is now finished (even if we called
2427 ML_(fixup_guest_state_to_restart_syscall), since that just
2428 re-positions the guest's IP for another go at it). So we need
2429 to record that fact. */
2430 sci->status.what = SsIdle;
2431 }
2432
2433
2434 #if defined(VGO_darwin)
2435 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2436 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2437 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2438 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2439 {
2440 ThreadState* tst;
2441 SyscallInfo* sci;
2442
2443 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2444
2445 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2446 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2447
2448 vg_assert(VG_(is_valid_tid)(tid));
2449 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2450 vg_assert(VG_(is_running_thread)(tid));
2451
2452 tst = VG_(get_ThreadState)(tid);
2453 sci = & syscallInfo[tid];
2454 vg_assert(sci->status.what != SsIdle);
2455 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall
2456
2457 // Pretend the syscall completed normally, but don't touch the thread state.
2458 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2459 sci->flags |= SfNoWriteResult;
2460 VG_(post_syscall)(tid);
2461
2462 ML_(sync_mappings)("in", "ML_(wqthread_continue_NORETURN)", 0);
2463
2464 sci->status.what = SsIdle;
2465
2466 vg_assert(tst->sched_jmpbuf_valid);
2467 VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2468
2469 /* NOTREACHED */
2470 vg_assert(0);
2471 }
2472 #endif
2473
2474
2475 /* ---------------------------------------------------------------------
2476 A place to store the where-to-call-when-really-done pointer
2477 ------------------------------------------------------------------ */
2478
2479 // When the final thread is done, where shall I call to shutdown the
2480 // system cleanly? Is set once at startup (in m_main) and never
2481 // changes after that. Is basically a pointer to the exit
2482 // continuation. This is all just a nasty hack to avoid calling
2483 // directly from m_syswrap to m_main at exit, since that would cause
2484 // m_main to become part of a module cycle, which is silly.
2485 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2486 (ThreadId,VgSchedReturnCode)
2487 = NULL;
2488
2489 /*--------------------------------------------------------------------*/
2490 /*--- end ---*/
2491 /*--------------------------------------------------------------------*/
2492