1 
2 /*--------------------------------------------------------------------*/
3 /*--- Darwin-specific syscalls, etc.        syswrap-amd64-darwin.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2005-2015 Apple Inc.
11       Greg Parker  gparker@apple.com
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGP_amd64_darwin)
32 
33 #include "config.h"                // DARWIN_VERS
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_threadstate.h"
37 #include "pub_core_aspacemgr.h"
38 #include "pub_core_xarray.h"
39 #include "pub_core_clientstate.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_debuginfo.h"    // VG_(di_notify_*)
42 #include "pub_core_transtab.h"     // VG_(discard_translations)
43 #include "pub_core_libcbase.h"
44 #include "pub_core_libcassert.h"
45 #include "pub_core_libcfile.h"
46 #include "pub_core_libcprint.h"
47 #include "pub_core_libcproc.h"
48 #include "pub_core_libcsignal.h"
49 #include "pub_core_mallocfree.h"
50 #include "pub_core_options.h"
51 #include "pub_core_scheduler.h"
52 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
53 #include "pub_core_signals.h"
54 #include "pub_core_syscall.h"
55 #include "pub_core_syswrap.h"
56 #include "pub_core_tooliface.h"
57 
58 #include "priv_types_n_macros.h"
59 #include "priv_syswrap-generic.h"   /* for decls of generic wrappers */
60 #include "priv_syswrap-darwin.h"    /* for decls of darwin-ish wrappers */
61 #include "priv_syswrap-main.h"
62 
63 
64 #include <mach/mach.h>
65 
x86_thread_state64_from_vex(x86_thread_state64_t * mach,VexGuestAMD64State * vex)66 static void x86_thread_state64_from_vex(x86_thread_state64_t *mach,
67                                         VexGuestAMD64State *vex)
68 {
69     mach->__rax = vex->guest_RAX;
70     mach->__rbx = vex->guest_RBX;
71     mach->__rcx = vex->guest_RCX;
72     mach->__rdx = vex->guest_RDX;
73     mach->__rdi = vex->guest_RDI;
74     mach->__rsi = vex->guest_RSI;
75     mach->__rbp = vex->guest_RBP;
76     mach->__rsp = vex->guest_RSP;
77     mach->__rflags = LibVEX_GuestAMD64_get_rflags(vex);
78     mach->__rip = vex->guest_RIP;
79     mach->__r8  = vex->guest_R8;
80     mach->__r9  = vex->guest_R9;
81     mach->__r10 = vex->guest_R10;
82     mach->__r11 = vex->guest_R11;
83     mach->__r12 = vex->guest_R12;
84     mach->__r13 = vex->guest_R13;
85     mach->__r14 = vex->guest_R14;
86     mach->__r15 = vex->guest_R15;
87     /* GrP fixme
88     mach->__cs = vex->guest_CS;
89     mach->__fs = vex->guest_FS;
90     mach->__gs = vex->guest_GS;
91     */
92 }
93 
94 
x86_float_state64_from_vex(x86_float_state64_t * mach,VexGuestAMD64State * vex)95 static void x86_float_state64_from_vex(x86_float_state64_t *mach,
96                                        VexGuestAMD64State *vex)
97 {
98    // DDD: #warning GrP fixme fp state
99    // JRS: what about the YMMHI bits?  Are they important?
100    VG_(memcpy)(&mach->__fpu_xmm0,  &vex->guest_YMM0,   sizeof(mach->__fpu_xmm0));
101    VG_(memcpy)(&mach->__fpu_xmm1,  &vex->guest_YMM1,   sizeof(mach->__fpu_xmm1));
102    VG_(memcpy)(&mach->__fpu_xmm2,  &vex->guest_YMM2,   sizeof(mach->__fpu_xmm2));
103    VG_(memcpy)(&mach->__fpu_xmm3,  &vex->guest_YMM3,   sizeof(mach->__fpu_xmm3));
104    VG_(memcpy)(&mach->__fpu_xmm4,  &vex->guest_YMM4,   sizeof(mach->__fpu_xmm4));
105    VG_(memcpy)(&mach->__fpu_xmm5,  &vex->guest_YMM5,   sizeof(mach->__fpu_xmm5));
106    VG_(memcpy)(&mach->__fpu_xmm6,  &vex->guest_YMM6,   sizeof(mach->__fpu_xmm6));
107    VG_(memcpy)(&mach->__fpu_xmm7,  &vex->guest_YMM7,   sizeof(mach->__fpu_xmm7));
108    VG_(memcpy)(&mach->__fpu_xmm8,  &vex->guest_YMM8,   sizeof(mach->__fpu_xmm8));
109    VG_(memcpy)(&mach->__fpu_xmm9,  &vex->guest_YMM9,   sizeof(mach->__fpu_xmm9));
110    VG_(memcpy)(&mach->__fpu_xmm10, &vex->guest_YMM10,  sizeof(mach->__fpu_xmm10));
111    VG_(memcpy)(&mach->__fpu_xmm11, &vex->guest_YMM11,  sizeof(mach->__fpu_xmm11));
112    VG_(memcpy)(&mach->__fpu_xmm12, &vex->guest_YMM12,  sizeof(mach->__fpu_xmm12));
113    VG_(memcpy)(&mach->__fpu_xmm13, &vex->guest_YMM13,  sizeof(mach->__fpu_xmm13));
114    VG_(memcpy)(&mach->__fpu_xmm14, &vex->guest_YMM14,  sizeof(mach->__fpu_xmm14));
115    VG_(memcpy)(&mach->__fpu_xmm15, &vex->guest_YMM15,  sizeof(mach->__fpu_xmm15));
116 }
117 
118 
thread_state_from_vex(thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,VexGuestArchState * vex_generic)119 void thread_state_from_vex(thread_state_t mach_generic,
120                            thread_state_flavor_t flavor,
121                            mach_msg_type_number_t count,
122                            VexGuestArchState *vex_generic)
123 {
124    VexGuestAMD64State *vex = (VexGuestAMD64State *)vex_generic;
125 
126    switch (flavor) {
127    case x86_THREAD_STATE64:
128       vg_assert(count == x86_THREAD_STATE64_COUNT);
129       x86_thread_state64_from_vex((x86_thread_state64_t *)mach_generic, vex);
130       break;
131 
132    case x86_FLOAT_STATE64:
133       vg_assert(count == x86_FLOAT_STATE64_COUNT);
134       x86_float_state64_from_vex((x86_float_state64_t *)mach_generic, vex);
135       break;
136 
137    case x86_THREAD_STATE:
138       ((x86_float_state_t *)mach_generic)->fsh.flavor = flavor;
139       ((x86_float_state_t *)mach_generic)->fsh.count = count;
140       x86_thread_state64_from_vex(&((x86_thread_state_t *)mach_generic)->uts.ts64, vex);
141       break;
142 
143    case x86_FLOAT_STATE:
144       ((x86_float_state_t *)mach_generic)->fsh.flavor = flavor;
145       ((x86_float_state_t *)mach_generic)->fsh.count = count;
146       x86_float_state64_from_vex(&((x86_float_state_t *)mach_generic)->ufs.fs64, vex);
147       break;
148 
149    case x86_EXCEPTION_STATE:
150       VG_(printf)("thread_state_from_vex: TODO, want exception state\n");
151       vg_assert(0);
152 
153    default:
154       VG_(printf)("thread_state_from_vex: flavor:%#x\n",  flavor);
155       vg_assert(0);
156    }
157 }
158 
159 
x86_thread_state64_to_vex(const x86_thread_state64_t * mach,VexGuestAMD64State * vex)160 static void x86_thread_state64_to_vex(const x86_thread_state64_t *mach,
161                                       VexGuestAMD64State *vex)
162 {
163    LibVEX_GuestAMD64_initialise(vex);
164    vex->guest_RAX = mach->__rax;
165    vex->guest_RBX = mach->__rbx;
166    vex->guest_RCX = mach->__rcx;
167    vex->guest_RDX = mach->__rdx;
168    vex->guest_RDI = mach->__rdi;
169    vex->guest_RSI = mach->__rsi;
170    vex->guest_RBP = mach->__rbp;
171    vex->guest_RSP = mach->__rsp;
172    // DDD: #warning GrP fixme eflags
173    vex->guest_RIP = mach->__rip;
174    vex->guest_R8  = mach->__r8;
175    vex->guest_R9  = mach->__r9;
176    vex->guest_R10 = mach->__r10;
177    vex->guest_R11 = mach->__r11;
178    vex->guest_R12 = mach->__r12;
179    vex->guest_R13 = mach->__r13;
180    vex->guest_R14 = mach->__r14;
181    vex->guest_R15 = mach->__r15;
182    /* GrP fixme
183    vex->guest_CS = mach->__cs;
184    vex->guest_FS = mach->__fs;
185    vex->guest_GS = mach->__gs;
186    */
187 }
188 
x86_float_state64_to_vex(const x86_float_state64_t * mach,VexGuestAMD64State * vex)189 static void x86_float_state64_to_vex(const x86_float_state64_t *mach,
190                                      VexGuestAMD64State *vex)
191 {
192    // DDD: #warning GrP fixme fp state
193    // JRS: what about the YMMHI bits?  Are they important?
194    VG_(memcpy)(&vex->guest_YMM0,  &mach->__fpu_xmm0,  sizeof(mach->__fpu_xmm0));
195    VG_(memcpy)(&vex->guest_YMM1,  &mach->__fpu_xmm1,  sizeof(mach->__fpu_xmm1));
196    VG_(memcpy)(&vex->guest_YMM2,  &mach->__fpu_xmm2,  sizeof(mach->__fpu_xmm2));
197    VG_(memcpy)(&vex->guest_YMM3,  &mach->__fpu_xmm3,  sizeof(mach->__fpu_xmm3));
198    VG_(memcpy)(&vex->guest_YMM4,  &mach->__fpu_xmm4,  sizeof(mach->__fpu_xmm4));
199    VG_(memcpy)(&vex->guest_YMM5,  &mach->__fpu_xmm5,  sizeof(mach->__fpu_xmm5));
200    VG_(memcpy)(&vex->guest_YMM6,  &mach->__fpu_xmm6,  sizeof(mach->__fpu_xmm6));
201    VG_(memcpy)(&vex->guest_YMM7,  &mach->__fpu_xmm7,  sizeof(mach->__fpu_xmm7));
202    VG_(memcpy)(&vex->guest_YMM8,  &mach->__fpu_xmm8,  sizeof(mach->__fpu_xmm8));
203    VG_(memcpy)(&vex->guest_YMM9,  &mach->__fpu_xmm9,  sizeof(mach->__fpu_xmm9));
204    VG_(memcpy)(&vex->guest_YMM10, &mach->__fpu_xmm10, sizeof(mach->__fpu_xmm10));
205    VG_(memcpy)(&vex->guest_YMM11, &mach->__fpu_xmm11, sizeof(mach->__fpu_xmm11));
206    VG_(memcpy)(&vex->guest_YMM12, &mach->__fpu_xmm12, sizeof(mach->__fpu_xmm12));
207    VG_(memcpy)(&vex->guest_YMM13, &mach->__fpu_xmm13, sizeof(mach->__fpu_xmm13));
208    VG_(memcpy)(&vex->guest_YMM14, &mach->__fpu_xmm14, sizeof(mach->__fpu_xmm14));
209    VG_(memcpy)(&vex->guest_YMM15, &mach->__fpu_xmm15, sizeof(mach->__fpu_xmm15));
210 }
211 
212 
thread_state_to_vex(const thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,VexGuestArchState * vex_generic)213 void thread_state_to_vex(const thread_state_t mach_generic,
214                          thread_state_flavor_t flavor,
215                          mach_msg_type_number_t count,
216                          VexGuestArchState *vex_generic)
217 {
218    VexGuestAMD64State *vex = (VexGuestAMD64State *)vex_generic;
219 
220    switch(flavor) {
221    case x86_THREAD_STATE64:
222       vg_assert(count == x86_THREAD_STATE64_COUNT);
223       x86_thread_state64_to_vex((const x86_thread_state64_t*)mach_generic,vex);
224       break;
225    case x86_FLOAT_STATE64:
226       vg_assert(count == x86_FLOAT_STATE64_COUNT);
227       x86_float_state64_to_vex((const x86_float_state64_t*)mach_generic,vex);
228       break;
229 
230    default:
231       vg_assert(0);
232       break;
233    }
234 }
235 
236 
build_thread(const thread_state_t state,thread_state_flavor_t flavor,mach_msg_type_number_t count)237 ThreadState *build_thread(const thread_state_t state,
238                           thread_state_flavor_t flavor,
239                           mach_msg_type_number_t count)
240 {
241    ThreadId tid = VG_(alloc_ThreadState)();
242    ThreadState *tst = VG_(get_ThreadState)(tid);
243 
244    vg_assert(flavor == x86_THREAD_STATE64);
245    vg_assert(count == x86_THREAD_STATE64_COUNT);
246 
247    // Initialize machine registers
248 
249    thread_state_to_vex(state, flavor, count, &tst->arch.vex);
250 
251    I_die_here;
252    // GrP fixme signals, sig_mask, tmp_sig_mask, os_state.parent
253 
254    find_stack_segment(tid, tst->arch.vex.guest_RSP);
255 
256    return tst;
257 }
258 
259 
260 // Edit the thread state to send to the real kernel.
261 // The real thread will run start_thread_NORETURN(tst)
262 // on a separate non-client stack.
hijack_thread_state(thread_state_t mach_generic,thread_state_flavor_t flavor,mach_msg_type_number_t count,ThreadState * tst)263 void hijack_thread_state(thread_state_t mach_generic,
264                          thread_state_flavor_t flavor,
265                          mach_msg_type_number_t count,
266                          ThreadState *tst)
267 {
268    x86_thread_state64_t *mach = (x86_thread_state64_t *)mach_generic;
269    char *stack;
270 
271    vg_assert(flavor == x86_THREAD_STATE64);
272    vg_assert(count == x86_THREAD_STATE64_COUNT);
273 
274    stack = (char *)allocstack(tst->tid);
275    stack -= 64+320;                       // make room for top frame
276    memset(stack, 0, 64+320);              // ...and clear it
277    *(uintptr_t *)stack = 0;               // push fake return address
278 
279    mach->__rdi = (uintptr_t)tst;          // arg1 = tst
280    mach->__rip = (uintptr_t)&start_thread_NORETURN;
281    mach->__rsp = (uintptr_t)stack;
282 }
283 
284 
285 /* Call f(arg1), but first switch stacks, using 'stack' as the new
286    stack, and use 'retaddr' as f's return-to address.  Also, clear all
287    the integer registers before entering f.*/
288 __attribute__((noreturn))
289 void call_on_new_stack_0_1 ( Addr stack,
290 			     Addr retaddr,
291 			     void (*f)(Word),
292                              Word arg1 );
293 // %rdi == stack (must be 16-byte aligned)
294 // %rsi == retaddr
295 // %rdx == f
296 // %rcx == arg1
297 asm(
298 ".globl _call_on_new_stack_0_1\n"
299 "_call_on_new_stack_0_1:\n"
300 "   movq  %rsp, %rbp\n"     // remember old stack pointer
301 "   movq  %rdi, %rsp\n"     // set new stack
302 "   movq  %rcx, %rdi\n"     // set arg1
303 "   pushq %rsi\n"           // retaddr to new stack
304 "   pushq %rdx\n"           // f to new stack
305 "   movq $0, %rax\n"        // zero all other GP regs
306 "   movq $0, %rbx\n"
307 "   movq $0, %rcx\n"
308 "   movq $0, %rdx\n"
309 "   movq $0, %rsi\n"
310 "   movq $0, %rbp\n"
311 "   movq $0, %r8\n"
312 "   movq $0, %r9\n"
313 "   movq $0, %r10\n"
314 "   movq $0, %r11\n"
315 "   movq $0, %r12\n"
316 "   movq $0, %r13\n"
317 "   movq $0, %r14\n"
318 "   movq $0, %r15\n"
319 "   ret\n"                 // jump to f
320 "   ud2\n"                 // should never get here
321 );
322 
323 asm(
324 ".globl _pthread_hijack_asm\n"
325 "_pthread_hijack_asm:\n"
326 "   movq %rsp,%rbp\n"
327 "   push $0\n"    // alignment pad
328 "   push %rbp\n"  // original sp
329                   // other values stay where they are in registers
330 "   push $0\n"    // fake return address
331 "   jmp _pthread_hijack\n"
332 );
333 
334 
335 
pthread_hijack(Addr self,Addr kport,Addr func,Addr func_arg,Addr stacksize,Addr flags,Addr sp)336 void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg,
337                     Addr stacksize, Addr flags, Addr sp)
338 {
339    vki_sigset_t blockall;
340    ThreadState *tst = (ThreadState *)func_arg;
341    VexGuestAMD64State *vex = &tst->arch.vex;
342 
343    // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp);
344 
345    // Wait for parent thread's permission.
346    // The parent thread holds V's lock on our behalf.
347    semaphore_wait(tst->os_state.child_go);
348 
349    /* Start the thread with all signals blocked.  VG_(scheduler) will
350       set the mask correctly when we finally get there. */
351    VG_(sigfillset)(&blockall);
352    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
353 
354    // Set thread's registers
355    // Do this FIRST because some code below tries to collect a backtrace,
356    // which requires valid register data.
357    LibVEX_GuestAMD64_initialise(vex);
358    vex->guest_RIP = pthread_starter;
359    vex->guest_RDI = self;
360    vex->guest_RSI = kport;
361    vex->guest_RDX = func;
362    vex->guest_RCX = tst->os_state.func_arg;
363    vex->guest_R8  = stacksize;
364    vex->guest_R9  = flags;
365    vex->guest_RSP = sp;
366 
367    // Record thread's stack and Mach port and pthread struct
368    tst->os_state.pthread = self;
369    tst->os_state.lwpid = kport;
370    record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p");
371 
372    if ((flags & 0x01000000) == 0) {
373       // kernel allocated stack - needs mapping
374       Addr stack = VG_PGROUNDUP(sp) - stacksize;
375       tst->client_stack_highest_byte = stack+stacksize-1;
376       tst->client_stack_szB = stacksize;
377 
378       // pthread structure
379       ML_(notify_core_and_tool_of_mmap)(
380             stack+stacksize, pthread_structsize,
381             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
382       // stack contents
383       ML_(notify_core_and_tool_of_mmap)(
384             stack, stacksize,
385             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
386       // guard page
387       ML_(notify_core_and_tool_of_mmap)(
388             stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
389             0, VKI_MAP_PRIVATE, -1, 0);
390    } else {
391       // client allocated stack
392       find_stack_segment(tst->tid, sp);
393    }
394    ML_(sync_mappings)("after", "pthread_hijack", 0);
395 
396    // DDD: should this be here rather than in POST(sys_bsdthread_create)?
397    // But we don't have ptid here...
398    //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid );
399 
400    // Tell parent thread's POST(sys_bsdthread_create) that we're done
401    // initializing registers and mapping memory.
402    semaphore_signal(tst->os_state.child_done);
403    // LOCK IS GONE BELOW THIS POINT
404 
405    // Go!
406    call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
407                          start_thread_NORETURN, (Word)tst);
408 
409    /*NOTREACHED*/
410    vg_assert(0);
411 }
412 
413 
414 
415 asm(
416 ".globl _wqthread_hijack_asm\n"
417 "_wqthread_hijack_asm:\n"
418 "   movq %rsp,%r9\n"  // original sp
419                       // other values stay where they are in registers
420 "   push $0\n"        // fake return address
421 "   jmp _wqthread_hijack\n"
422 );
423 
424 
425 /*  wqthread note: The kernel may create or destroy pthreads in the
426     wqthread pool at any time with no userspace interaction,
427     and wqthread_start may be entered at any time with no userspace
428     interaction.
429     To handle this in valgrind, we create and destroy a valgrind
430     thread for every work item.
431 */
wqthread_hijack(Addr self,Addr kport,Addr stackaddr,Addr workitem,Int reuse,Addr sp)432 void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem,
433                      Int reuse, Addr sp)
434 {
435    ThreadState *tst;
436    VexGuestAMD64State *vex;
437    Addr stack;
438    SizeT stacksize;
439    vki_sigset_t blockall;
440 
441    /* When we enter here we hold no lock (!), so we better acquire it
442       pronto.  Why do we hold no lock?  Because (presumably) the only
443       way to get here is as a result of a SfMayBlock syscall
444       "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
445       lock.  At least that's clear for the 'reuse' case.  The
446       non-reuse case?  Dunno, perhaps it's a new thread the kernel
447       pulled out of a hat.  In any case we still need to take a
448       lock. */
449    VG_(acquire_BigLock_LL)("wqthread_hijack");
450 
451    if (0) VG_(printf)(
452              "wqthread_hijack: self %#lx, kport %#lx, "
453 	     "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n",
454 	     self, kport, stackaddr, workitem, (UInt)reuse, sp);
455 
456    /* Start the thread with all signals blocked.  VG_(scheduler) will
457       set the mask correctly when we finally get there. */
458    VG_(sigfillset)(&blockall);
459    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);
460 
461    /* For 10.7 and earlier, |reuse| appeared to be used as a simple
462       boolean.  In 10.8 and later its name changed to |flags| and has
463       various other bits OR-d into it too, so it's necessary to fish
464       out just the relevant parts.  Hence: */
465 #  if DARWIN_VERS <= DARWIN_10_7
466    Bool is_reuse = reuse != 0;
467 #  elif DARWIN_VERS > DARWIN_10_7
468    Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0;
469 #  else
470 #    error "Unsupported Darwin version"
471 #  endif
472 
473    if (is_reuse) {
474 
475      /* For whatever reason, tst->os_state.pthread appear to have a
476         constant offset of 96 on 10.7, but zero on 10.6 and 10.5.  No
477         idea why. */
478 #      if DARWIN_VERS <= DARWIN_10_6
479        UWord magic_delta = 0;
480 #      elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8
481        UWord magic_delta = 0x60;
482 #      elif DARWIN_VERS == DARWIN_10_9 || DARWIN_VERS == DARWIN_10_10
483        UWord magic_delta = 0xE0;
484 #      elif DARWIN_VERS == DARWIN_10_11
485        UWord magic_delta = 0x100;
486 #      else
487 #        error "magic_delta: to be computed on new OS version"
488          // magic_delta = tst->os_state.pthread - self
489 #      endif
490 
491        // This thread already exists; we're merely re-entering
492        // after leaving via workq_ops(WQOPS_THREAD_RETURN).
493        // Don't allocate any V thread resources.
494        // Do reset thread registers.
495        ThreadId tid = VG_(lwpid_to_vgtid)(kport);
496        vg_assert(VG_(is_valid_tid)(tid));
497        vg_assert(mach_thread_self() == kport);
498 
499        tst = VG_(get_ThreadState)(tid);
500 
501        if (0) VG_(printf)("wqthread_hijack reuse %s: tid %u, tst %p, "
502                           "tst->os_state.pthread %#lx, self %#lx\n",
503                           tst->os_state.pthread == self ? "SAME" : "DIFF",
504                           tid, (void *)tst, tst->os_state.pthread, self);
505 
506        vex = &tst->arch.vex;
507        vg_assert(tst->os_state.pthread - magic_delta == self);
508    }
509    else {
510        // This is a new thread.
511        tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());
512        vex = &tst->arch.vex;
513        allocstack(tst->tid);
514        LibVEX_GuestAMD64_initialise(vex);
515    }
516 
517    // Set thread's registers
518    // Do this FIRST because some code below tries to collect a backtrace,
519    // which requires valid register data.
520    vex->guest_RIP = wqthread_starter;
521    vex->guest_RDI = self;
522    vex->guest_RSI = kport;
523    vex->guest_RDX = stackaddr;
524    vex->guest_RCX = workitem;
525    vex->guest_R8  = reuse;
526    vex->guest_R9  = 0;
527    vex->guest_RSP = sp;
528 
529    stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
530    stack = VG_PGROUNDUP(sp) - stacksize;
531 
532    if (is_reuse) {
533       // Continue V's thread back in the scheduler.
534       // The client thread is of course in another location entirely.
535 
536       /* Drop the lock before going into
537          ML_(wqthread_continue_NORETURN).  The latter will immediately
538          attempt to reacquire it in non-LL mode, which is a bit
539          wasteful but I don't think is harmful.  A better solution
540          would be to not drop the lock but instead "upgrade" it from a
541          LL lock to a full lock, but that's too much like hard work
542          right now. */
543       VG_(release_BigLock_LL)("wqthread_hijack(1)");
544       ML_(wqthread_continue_NORETURN)(tst->tid);
545    }
546    else {
547       // Record thread's stack and Mach port and pthread struct
548       tst->os_state.pthread = self;
549       tst->os_state.lwpid = kport;
550       record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
551 
552       // kernel allocated stack - needs mapping
553       tst->client_stack_highest_byte = stack+stacksize-1;
554       tst->client_stack_szB = stacksize;
555 
556       // GrP fixme scheduler lock?!
557 
558       // pthread structure
559       ML_(notify_core_and_tool_of_mmap)(
560             stack+stacksize, pthread_structsize,
561             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
562       // stack contents
563       // GrP fixme uninitialized!
564       ML_(notify_core_and_tool_of_mmap)(
565             stack, stacksize,
566             VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
567       // guard page
568       // GrP fixme ban_mem_stack!
569       ML_(notify_core_and_tool_of_mmap)(
570             stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
571             0, VKI_MAP_PRIVATE, -1, 0);
572 
573       ML_(sync_mappings)("after", "wqthread_hijack", 0);
574 
575       // Go!
576       /* Same comments as the 'release' in the then-clause.
577          start_thread_NORETURN calls run_thread_NORETURN calls
578          thread_wrapper which acquires the lock before continuing.
579          Let's hope nothing non-thread-local happens until that point.
580 
581          DDD: I think this is plain wrong .. if we get to
582          thread_wrapper not holding the lock, and someone has recycled
583          this thread slot in the meantime, we're hosed.  Is that
584          possible, though? */
585       VG_(release_BigLock_LL)("wqthread_hijack(2)");
586       call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0,
587                             start_thread_NORETURN, (Word)tst);
588    }
589 
590    /*NOTREACHED*/
591    vg_assert(0);
592 }
593 
594 #endif // defined(VGP_amd64_darwin)
595 
596 /*--------------------------------------------------------------------*/
597 /*--- end                                                          ---*/
598 /*--------------------------------------------------------------------*/
599