1 
2 /*--------------------------------------------------------------------*/
3 /*--- Darwin-specific syscalls, etc.          syswrap-x86-darwin.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2005-2013 Apple Inc.
11       Greg Parker  gparker@apple.com
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGP_x86_darwin)
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 #include "pub_core_threadstate.h"
36 #include "pub_core_aspacemgr.h"
37 #include "pub_core_xarray.h"
38 #include "pub_core_clientstate.h"
39 #include "pub_core_debuglog.h"
40 #include "pub_core_debuginfo.h"    // VG_(di_notify_*)
41 #include "pub_core_transtab.h"     // VG_(discard_translations)
42 #include "pub_core_libcbase.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcfile.h"
45 #include "pub_core_libcprint.h"
46 #include "pub_core_libcproc.h"
47 #include "pub_core_libcsignal.h"
48 #include "pub_core_mallocfree.h"
49 #include "pub_core_options.h"
50 #include "pub_core_scheduler.h"
51 #include "pub_core_signals.h"
52 #include "pub_core_syscall.h"
53 #include "pub_core_syswrap.h"
54 #include "pub_core_tooliface.h"
55 
56 #include "priv_types_n_macros.h"
57 #include "priv_syswrap-generic.h"   /* for decls of generic wrappers */
58 #include "priv_syswrap-darwin.h"    /* for decls of darwin-ish wrappers */
59 #include "priv_syswrap-main.h"
60 
61 
62 #include <mach/mach.h>
63 
x86_thread_state32_from_vex(i386_thread_state_t * mach,VexGuestX86State * vex)64 static void x86_thread_state32_from_vex(i386_thread_state_t *mach,
65                                         VexGuestX86State *vex)
66 {
67     mach->__eax = vex->guest_EAX;
68     mach->__ebx = vex->guest_EBX;
69     mach->__ecx = vex->guest_ECX;
70     mach->__edx = vex->guest_EDX;
71     mach->__edi = vex->guest_EDI;
72     mach->__esi = vex->guest_ESI;
73     mach->__ebp = vex->guest_EBP;
74     mach->__esp = vex->guest_ESP;
75     mach->__ss = vex->guest_SS;
76     mach->__eflags = LibVEX_GuestX86_get_eflags(vex);
77     mach->__eip = vex->guest_EIP;
78     mach->__cs = vex->guest_CS;
79     mach->__ds = vex->guest_DS;
80     mach->__es = vex->guest_ES;
81     mach->__fs = vex->guest_FS;
82     mach->__gs = vex->guest_GS;
83 }
84 
85 
x86_float_state32_from_vex(i386_float_state_t * mach,VexGuestX86State * vex)86 static void x86_float_state32_from_vex(i386_float_state_t *mach,
87                                        VexGuestX86State *vex)
88 {
89    // DDD: #warning GrP fixme fp state
90 
91    VG_(memcpy)(&mach->__fpu_xmm0, &vex->guest_XMM0, 8 * sizeof(mach->__fpu_xmm0));
92 }
93 
94 
thread_state_from_vex(thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,VexGuestArchState * vex_generic)95 void thread_state_from_vex(thread_state_t mach_generic,
96                            thread_state_flavor_t flavor,
97                            mach_msg_type_number_t count,
98                            VexGuestArchState *vex_generic)
99 {
100    VexGuestX86State *vex = (VexGuestX86State *)vex_generic;
101 
102    switch (flavor) {
103    case i386_THREAD_STATE:
104       vg_assert(count == i386_THREAD_STATE_COUNT);
105       x86_thread_state32_from_vex((i386_thread_state_t *)mach_generic, vex);
106       break;
107 
108    case i386_FLOAT_STATE:
109       vg_assert(count == i386_FLOAT_STATE_COUNT);
110       x86_float_state32_from_vex((i386_float_state_t *)mach_generic, vex);
111       break;
112 
113    default:
114       vg_assert(0);
115    }
116 }
117 
118 
x86_thread_state32_to_vex(const i386_thread_state_t * mach,VexGuestX86State * vex)119 static void x86_thread_state32_to_vex(const i386_thread_state_t *mach,
120                                       VexGuestX86State *vex)
121 {
122    LibVEX_GuestX86_initialise(vex);
123    vex->guest_EAX = mach->__eax;
124    vex->guest_EBX = mach->__ebx;
125    vex->guest_ECX = mach->__ecx;
126    vex->guest_EDX = mach->__edx;
127    vex->guest_EDI = mach->__edi;
128    vex->guest_ESI = mach->__esi;
129    vex->guest_EBP = mach->__ebp;
130    vex->guest_ESP = mach->__esp;
131    vex->guest_SS = mach->__ss;
132    // DDD: #warning GrP fixme eflags
133    vex->guest_EIP = mach->__eip;
134    vex->guest_CS = mach->__cs;
135    vex->guest_DS = mach->__ds;
136    vex->guest_ES = mach->__es;
137    vex->guest_FS = mach->__fs;
138    vex->guest_GS = mach->__gs;
139 }
140 
x86_float_state32_to_vex(const i386_float_state_t * mach,VexGuestX86State * vex)141 static void x86_float_state32_to_vex(const i386_float_state_t *mach,
142                                      VexGuestX86State *vex)
143 {
144    // DDD: #warning GrP fixme fp state
145 
146    VG_(memcpy)(&vex->guest_XMM0, &mach->__fpu_xmm0, 8 * sizeof(mach->__fpu_xmm0));
147 }
148 
149 
thread_state_to_vex(const thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,VexGuestArchState * vex_generic)150 void thread_state_to_vex(const thread_state_t mach_generic,
151                          thread_state_flavor_t flavor,
152                          mach_msg_type_number_t count,
153                          VexGuestArchState *vex_generic)
154 {
155    VexGuestX86State *vex = (VexGuestX86State *)vex_generic;
156 
157    switch(flavor) {
158    case i386_THREAD_STATE:
159       vg_assert(count == i386_THREAD_STATE_COUNT);
160       x86_thread_state32_to_vex((const i386_thread_state_t*)mach_generic,vex);
161       break;
162    case i386_FLOAT_STATE:
163       vg_assert(count == i386_FLOAT_STATE_COUNT);
164       x86_float_state32_to_vex((const i386_float_state_t*)mach_generic,vex);
165       break;
166 
167    default:
168       vg_assert(0);
169       break;
170    }
171 }
172 
173 
build_thread(const thread_state_t state,thread_state_flavor_t flavor,mach_msg_type_number_t count)174 ThreadState *build_thread(const thread_state_t state,
175                           thread_state_flavor_t flavor,
176                           mach_msg_type_number_t count)
177 {
178    ThreadId tid = VG_(alloc_ThreadState)();
179    ThreadState *tst = VG_(get_ThreadState)(tid);
180 
181    vg_assert(flavor == i386_THREAD_STATE);
182    vg_assert(count == i386_THREAD_STATE_COUNT);
183 
184    // Initialize machine registers
185 
186    thread_state_to_vex(state, flavor, count, &tst->arch.vex);
187 
188    I_die_here;
189    // GrP fixme signals, sig_mask, tmp_sig_mask, os_state.parent
190 
191    find_stack_segment(tid, tst->arch.vex.guest_ESP);
192 
193    return tst;
194 }
195 
196 
197 // Edit the thread state to send to the real kernel.
198 // The real thread will run start_thread_NORETURN(tst)
199 // on a separate non-client stack.
hijack_thread_state(thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,ThreadState * tst)200 void hijack_thread_state(thread_state_t mach_generic,
201                          thread_state_flavor_t flavor,
202                          mach_msg_type_number_t count,
203                          ThreadState *tst)
204 {
205    i386_thread_state_t *mach = (i386_thread_state_t *)mach_generic;
206    char *stack;
207 
208    vg_assert(flavor == i386_THREAD_STATE);
209    vg_assert(count == i386_THREAD_STATE_COUNT);
210 
211    stack = (char *)allocstack(tst->tid);
212    stack -= 64+320;                       // make room for top frame
213    memset(stack, 0, 64+320);              // ...and clear it
214    *(uintptr_t *)stack = (uintptr_t)tst;  // set parameter
215    stack -= sizeof(uintptr_t);
216    *(uintptr_t *)stack = 0;               // push fake return address
217 
218    mach->__eip = (uintptr_t)&start_thread_NORETURN;
219    mach->__esp = (uintptr_t)stack;
220 }
221 
222 
223 /* Call f(arg1), but first switch stacks, using 'stack' as the new
224    stack, and use 'retaddr' as f's return-to address.  Also, clear all
225    the integer registers before entering f.*/
226 __attribute__((noreturn))
227 void call_on_new_stack_0_1 ( Addr stack,
228 			     Addr retaddr,
229 			     void (*f)(Word),
230                              Word arg1 );
231 //  4(%esp) == stack (must be 16-byte aligned)
232 //  8(%esp) == retaddr
233 // 12(%esp) == f
234 // 16(%esp) == arg1
235 asm(
236 ".globl _call_on_new_stack_0_1\n"
237 "_call_on_new_stack_0_1:\n"
238 "   movl %esp, %esi\n"     // remember old stack pointer
239 "   movl 4(%esi), %esp\n"  // set new stack
240 "   pushl $0\n"            // align stack
241 "   pushl $0\n"            // align stack
242 "   pushl $0\n"            // align stack
243 "   pushl 16(%esi)\n"      // arg1 to stack
244 "   pushl  8(%esi)\n"      // retaddr to stack
245 "   pushl 12(%esi)\n"      // f to stack
246 "   movl $0, %eax\n"       // zero all GP regs
247 "   movl $0, %ebx\n"
248 "   movl $0, %ecx\n"
249 "   movl $0, %edx\n"
250 "   movl $0, %esi\n"
251 "   movl $0, %edi\n"
252 "   movl $0, %ebp\n"
253 "   ret\n"                 // jump to f
254 "   ud2\n"                 // should never get here
255 );
256 
257 
258 asm(
259 ".globl _pthread_hijack_asm\n"
260 "_pthread_hijack_asm:\n"
261 "   movl %esp,%ebp\n"
262 "   push $0\n"    // alignment pad
263 "   push %ebp\n"  // original sp
264 "   push %esi\n"  // flags
265 "   push %edi\n"  // stacksize
266 "   push %edx\n"  // func_arg
267 "   push %ecx\n"  // func
268 "   push %ebx\n"  // kport
269 "   push %eax\n"  // self
270 "   push $0\n"    // fake return address
271 "   jmp _pthread_hijack\n"
272     );
273 
274 
275 
pthread_hijack(Addr self,Addr kport,Addr func,Addr func_arg,Addr stacksize,Addr flags,Addr sp)276 void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg,
277                     Addr stacksize, Addr flags, Addr sp)
278 {
279    vki_sigset_t blockall;
280    ThreadState *tst = (ThreadState *)func_arg;
281    VexGuestX86State *vex = &tst->arch.vex;
282 
283    // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp);
284 
285    // Wait for parent thread's permission.
286    // The parent thread holds V's lock on our behalf.
287    semaphore_wait(tst->os_state.child_go);
288 
289    /* Start the thread with all signals blocked.  VG_(scheduler) will
290       set the mask correctly when we finally get there. */
291    VG_(sigfillset)(&blockall);
292    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
293 
294    // Set thread's registers
295    // Do this FIRST because some code below tries to collect a backtrace,
296    // which requires valid register data.
297    // DDD: need to do post_reg_write events here?
298    LibVEX_GuestX86_initialise(vex);
299    vex->guest_EIP = pthread_starter;
300    vex->guest_EAX = self;
301    vex->guest_EBX = kport;
302    vex->guest_ECX = func;
303    vex->guest_EDX = tst->os_state.func_arg;
304    vex->guest_EDI = stacksize;
305    vex->guest_ESI = flags;
306    vex->guest_ESP = sp;
307 
308    // Record thread's stack and Mach port and pthread struct
309    tst->os_state.pthread = self;
310    tst->os_state.lwpid = kport;
311    record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p");
312 
313    if ((flags & 0x01000000) == 0) {
314       // kernel allocated stack - needs mapping
315       Addr stack = VG_PGROUNDUP(sp) - stacksize;
316       tst->client_stack_highest_byte = stack+stacksize-1;
317       tst->client_stack_szB = stacksize;
318 
319       // pthread structure
320       ML_(notify_core_and_tool_of_mmap)(
321             stack+stacksize, pthread_structsize,
322             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
323       // stack contents
324       ML_(notify_core_and_tool_of_mmap)(
325             stack, stacksize,
326             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
327       // guard page
328       ML_(notify_core_and_tool_of_mmap)(
329             stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
330             0, VKI_MAP_PRIVATE, -1, 0);
331    } else {
332       // client allocated stack
333       find_stack_segment(tst->tid, sp);
334    }
335    ML_(sync_mappings)("after", "pthread_hijack", 0);
336 
337    // DDD: should this be here rather than in POST(sys_bsdthread_create)?
338    // But we don't have ptid here...
339    //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid );
340 
341    // Tell parent thread's POST(sys_bsdthread_create) that we're done
342    // initializing registers and mapping memory.
343    semaphore_signal(tst->os_state.child_done);
344    // LOCK IS GONE BELOW THIS POINT
345 
346    // Go!
347    call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
348                          start_thread_NORETURN, (Word)tst);
349 
350    /*NOTREACHED*/
351    vg_assert(0);
352 }
353 
354 
355 
356 asm(
357 ".globl _wqthread_hijack_asm\n"
358 "_wqthread_hijack_asm:\n"
359 "   movl %esp,%ebp\n"
360 "   push $0\n"    // alignment
361 "   push $0\n"    // alignment
362 "   push %ebp\n"  // original sp
363 "   push %edi\n"  // reuse
364 "   push %edx\n"  // workitem
365 "   push %ecx\n"  // stackaddr
366 "   push %ebx\n"  // kport
367 "   push %eax\n"  // self
368 "   push $0\n"    // fake return address
369 "   jmp _wqthread_hijack\n"
370     );
371 
372 
373 /*  wqthread note: The kernel may create or destroy pthreads in the
374     wqthread pool at any time with no userspace interaction,
375     and wqthread_start may be entered at any time with no userspace
376     interaction.
377     To handle this in valgrind, we create and destroy a valgrind
378     thread for every work item.
379 */
wqthread_hijack(Addr self,Addr kport,Addr stackaddr,Addr workitem,Int reuse,Addr sp)380 void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem,
381                      Int reuse, Addr sp)
382 {
383    ThreadState *tst;
384    VexGuestX86State *vex;
385    Addr stack;
386    SizeT stacksize;
387    vki_sigset_t blockall;
388 
389    /* When we enter here we hold no lock (!), so we better acquire it
390       pronto.  Why do we hold no lock?  Because (presumably) the only
391       way to get here is as a result of a SfMayBlock syscall
392       "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
393       lock.  At least that's clear for the 'reuse' case.  The
394       non-reuse case?  Dunno, perhaps it's a new thread the kernel
395       pulled out of a hat.  In any case we still need to take a
396       lock. */
397    VG_(acquire_BigLock_LL)("wqthread_hijack");
398 
399    if (0) VG_(printf)(
400              "wqthread_hijack: self %#lx, kport %#lx, "
401              "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n",
402              self, kport, stackaddr, workitem, reuse, sp);
403 
404    /* Start the thread with all signals blocked.  VG_(scheduler) will
405       set the mask correctly when we finally get there. */
406    VG_(sigfillset)(&blockall);
407    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
408 
409    /* For 10.7 and earlier, |reuse| appeared to be used as a simple
410       boolean.  In 10.8 and later its name changed to |flags| and has
411       various other bits OR-d into it too, so it's necessary to fish
412       out just the relevant parts.  Hence: */
413 #  if DARWIN_VERS <= DARWIN_10_7
414    Bool is_reuse = reuse != 0;
415 #  elif DARWIN_VERS == DARWIN_10_8 || DARWIN_VERS == DARWIN_10_9 || DARWIN_VERS == DARWIN_10_10
416    Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0;
417 #  else
418 #    error "Unsupported Darwin version"
419 #  endif
420 
421    if (is_reuse) {
422 
423       /* For whatever reason, tst->os_state.pthread appear to have a
424          constant offset of 72 on 10.7, but zero on 10.6 and 10.5.  No
425          idea why. */
426 #     if DARWIN_VERS <= DARWIN_10_6
427       UWord magic_delta = 0;
428 #     elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8
429       UWord magic_delta = 0x48;
430 #     elif DARWIN_VERS == DARWIN_10_9 || DARWIN_VERS == DARWIN_10_10
431       UWord magic_delta = 0xB0;
432 #     else
433 #       error "magic_delta: to be computed on new OS version"
434         // magic_delta = tst->os_state.pthread - self
435 #     endif
436 
437       // This thread already exists; we're merely re-entering
438       // after leaving via workq_ops(WQOPS_THREAD_RETURN).
439       // Don't allocate any V thread resources.
440       // Do reset thread registers.
441       ThreadId tid = VG_(lwpid_to_vgtid)(kport);
442       vg_assert(VG_(is_valid_tid)(tid));
443       vg_assert(mach_thread_self() == kport);
444 
445       tst = VG_(get_ThreadState)(tid);
446 
447       if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, "
448                          "tst->os_state.pthread %#lx, self %#lx\n",
449                          tst->os_state.pthread == self ? "SAME" : "DIFF",
450                          tid, tst, tst->os_state.pthread, self);
451 
452       vex = &tst->arch.vex;
453       vg_assert(tst->os_state.pthread - magic_delta == self);
454    }
455    else {
456       // This is a new thread.
457       tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());
458       vex = &tst->arch.vex;
459       allocstack(tst->tid);
460       LibVEX_GuestX86_initialise(vex);
461    }
462 
463    // Set thread's registers
464    // Do this FIRST because some code below tries to collect a backtrace,
465    // which requires valid register data.
466    vex->guest_EIP = wqthread_starter;
467    vex->guest_EAX = self;
468    vex->guest_EBX = kport;
469    vex->guest_ECX = stackaddr;
470    vex->guest_EDX = workitem;
471    vex->guest_EDI = reuse;
472    vex->guest_ESI = 0;
473    vex->guest_ESP = sp;
474 
475    stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
476    stack = VG_PGROUNDUP(sp) - stacksize;
477 
478    if (is_reuse) {
479        // Continue V's thread back in the scheduler.
480        // The client thread is of course in another location entirely.
481 
482       /* Drop the lock before going into
483          ML_(wqthread_continue_NORETURN).  The latter will immediately
484          attempt to reacquire it in non-LL mode, which is a bit
485          wasteful but I don't think is harmful.  A better solution
486          would be to not drop the lock but instead "upgrade" it from a
487          LL lock to a full lock, but that's too much like hard work
488          right now. */
489        VG_(release_BigLock_LL)("wqthread_hijack(1)");
490        ML_(wqthread_continue_NORETURN)(tst->tid);
491    }
492    else {
493       // Record thread's stack and Mach port and pthread struct
494       tst->os_state.pthread = self;
495       tst->os_state.lwpid = kport;
496       record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
497 
498       // kernel allocated stack - needs mapping
499       tst->client_stack_highest_byte = stack+stacksize-1;
500       tst->client_stack_szB = stacksize;
501 
502       // GrP fixme scheduler lock?!
503 
504       // pthread structure
505       ML_(notify_core_and_tool_of_mmap)(
506             stack+stacksize, pthread_structsize,
507             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
508       // stack contents
509       // GrP fixme uninitialized!
510       ML_(notify_core_and_tool_of_mmap)(
511             stack, stacksize,
512             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
513       // guard page
514       // GrP fixme ban_mem_stack!
515       ML_(notify_core_and_tool_of_mmap)(
516             stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
517             0, VKI_MAP_PRIVATE, -1, 0);
518 
519       ML_(sync_mappings)("after", "wqthread_hijack", 0);
520 
521       // Go!
522       /* Same comments as the 'release' in the then-clause.
523          start_thread_NORETURN calls run_thread_NORETURN calls
524          thread_wrapper which acquires the lock before continuing.
525          Let's hope nothing non-thread-local happens until that point.
526 
527          DDD: I think this is plain wrong .. if we get to
528          thread_wrapper not holding the lock, and someone has recycled
529          this thread slot in the meantime, we're hosed.  Is that
530          possible, though? */
531       VG_(release_BigLock_LL)("wqthread_hijack(2)");
532       call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
533                             start_thread_NORETURN, (Word)tst);
534    }
535 
536    /*NOTREACHED*/
537    vg_assert(0);
538 }
539 
540 #endif // defined(VGP_x86_darwin)
541 
542 /*--------------------------------------------------------------------*/
543 /*--- end                                                          ---*/
544 /*--------------------------------------------------------------------*/
545