1 
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling.                               scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /*
32    Overview
33 
34    Valgrind tries to emulate the kernel's threading as closely as
35    possible.  The client does all threading via the normal syscalls
36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
37    the same process structure as would be created without Valgrind.
38    There are no extra threads.
39 
40    The main difference is that Valgrind only allows one client thread
41    to run at once.  This is controlled with the CPU Big Lock,
42    "the_BigLock".  Any time a thread wants to run client code or
43    manipulate any shared state (which is anything other than its own
44    ThreadState entry), it must hold the_BigLock.
45 
46    When a thread is about to block in a blocking syscall, it releases
47    the_BigLock, and re-takes it when it becomes runnable again (either
48    because the syscall finished, or we took a signal).
49 
50    VG_(scheduler) therefore runs in each thread.  It returns only when
51    the thread is exiting, either because it exited itself, or it was
52    told to exit by another thread.
53 
54    This file is almost entirely OS-independent.  The details of how
55    the OS handles threading and signalling are abstracted away and
56    implemented elsewhere.  [Some of the functions have worked their
57    way back for the moment, until we do an OS port in earnest...]
58 */
59 
60 
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
65 #include "pub_core_threadstate.h"
66 #include "pub_core_clientstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h"      // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_sbprofile.h"
85 #include "pub_core_signals.h"
86 #include "pub_core_stacks.h"
87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
88 #include "pub_core_syscall.h"
89 #include "pub_core_syswrap.h"
90 #include "pub_core_tooliface.h"
91 #include "pub_core_translate.h"     // For VG_(translate)()
92 #include "pub_core_transtab.h"
93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
94 #include "priv_sched-lock.h"
95 #include "pub_core_scheduler.h"     // self
96 #include "pub_core_redir.h"
97 #include "libvex_emnote.h"          // VexEmNote
98 
99 
100 /* ---------------------------------------------------------------------
101    Types and globals for the scheduler.
102    ------------------------------------------------------------------ */
103 
104 /* ThreadId and ThreadState are defined elsewhere*/
105 
106 /* Defines the thread-scheduling timeslice, in terms of the number of
107    basic blocks we attempt to run each thread for.  Smaller values
108    give finer interleaving but much increased scheduling overheads. */
109 #define SCHEDULING_QUANTUM   100000
110 
111 /* If False, a fault is Valgrind-internal (ie, a bug) */
112 Bool VG_(in_generated_code) = False;
113 
114 /* 64-bit counter for the number of basic blocks done. */
115 static ULong bbs_done = 0;
116 
117 /* Counter to see if vgdb activity is to be verified.
118    When nr of bbs done reaches vgdb_next_poll, scheduler will
119    poll for gdbserver activity. VG_(force_vgdb_poll) and
120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
121    to control when the next poll will be done. */
122 static ULong vgdb_next_poll;
123 
124 /* Forwards */
125 static void do_client_request ( ThreadId tid );
126 static void scheduler_sanity ( ThreadId tid );
127 static void mostly_clear_thread_record ( ThreadId tid );
128 
129 /* Stats. */
130 static ULong n_scheduling_events_MINOR = 0;
131 static ULong n_scheduling_events_MAJOR = 0;
132 
133 /* Stats: number of XIndirs, and number that missed in the fast
134    cache. */
135 static ULong stats__n_xindirs = 0;
136 static ULong stats__n_xindir_misses = 0;
137 
138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
139    have to do 64 bit incs on the hot path through
140    VG_(cp_disp_xindir). */
141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
143 
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count = 0;
146 static UInt sanity_slow_count = 0;
147 
VG_(print_scheduler_stats)148 void VG_(print_scheduler_stats)(void)
149 {
150    VG_(message)(Vg_DebugMsg,
151       "scheduler: %'llu event checks.\n", bbs_done );
152    VG_(message)(Vg_DebugMsg,
153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
154                 stats__n_xindirs, stats__n_xindir_misses,
155                 stats__n_xindirs / (stats__n_xindir_misses
156                                     ? stats__n_xindir_misses : 1));
157    VG_(message)(Vg_DebugMsg,
158       "scheduler: %'llu/%'llu major/minor sched events.\n",
159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
160    VG_(message)(Vg_DebugMsg,
161                 "   sanity: %u cheap, %u expensive checks.\n",
162                 sanity_fast_count, sanity_slow_count );
163 }
164 
165 /*
166  * Mutual exclusion object used to serialize threads.
167  */
168 static struct sched_lock *the_BigLock;
169 
170 
171 /* ---------------------------------------------------------------------
172    Helper functions for the scheduler.
173    ------------------------------------------------------------------ */
174 
175 static
print_sched_event(ThreadId tid,const HChar * what)176 void print_sched_event ( ThreadId tid, const HChar* what )
177 {
178    VG_(message)(Vg_DebugMsg, "  SCHED[%u]: %s\n", tid, what );
179 }
180 
181 /* For showing SB profiles, if the user asks to see them. */
182 static
maybe_show_sb_profile(void)183 void maybe_show_sb_profile ( void )
184 {
185    /* DO NOT MAKE NON-STATIC */
186    static ULong bbs_done_lastcheck = 0;
187    /* */
188    vg_assert(VG_(clo_profyle_interval) > 0);
189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
190    vg_assert(delta >= 0);
191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
192       bbs_done_lastcheck = bbs_done;
193       VG_(get_and_show_SB_profile)(bbs_done);
194    }
195 }
196 
197 static
name_of_sched_event(UInt event)198 const HChar* name_of_sched_event ( UInt event )
199 {
200    switch (event) {
201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
208       case VEX_TRC_JMP_SIGFPE_INTOVF:
209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
213       case VEX_TRC_JMP_YIELD:          return "YIELD";
214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
221       case VEX_TRC_JMP_SYS_INT145:     return "INT145";
222       case VEX_TRC_JMP_SYS_INT210:     return "INT210";
223       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
224       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
225 
226       case VG_TRC_BORING:              return "VG_BORING";
227       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
228       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
229       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
230       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
231       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
232       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
233       default:                         return "??UNKNOWN??";
234   }
235 }
236 
237 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)238 ThreadId VG_(alloc_ThreadState) ( void )
239 {
240    Int i;
241    for (i = 1; i < VG_N_THREADS; i++) {
242       if (VG_(threads)[i].status == VgTs_Empty) {
243 	 VG_(threads)[i].status = VgTs_Init;
244 	 VG_(threads)[i].exitreason = VgSrc_None;
245          if (VG_(threads)[i].thread_name)
246             VG_(free)(VG_(threads)[i].thread_name);
247          VG_(threads)[i].thread_name = NULL;
248          return i;
249       }
250    }
251    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
252                "and rerun valgrind\n");
253    VG_(core_panic)("Max number of threads is too low");
254    /*NOTREACHED*/
255 }
256 
257 /*
258    Mark a thread as Runnable.  This will block until the_BigLock is
259    available, so that we get exclusive access to all the shared
260    structures and the CPU.  Up until we get the_BigLock, we must not
261    touch any shared state.
262 
263    When this returns, we'll actually be running.
264  */
VG_(acquire_BigLock)265 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
266 {
267    ThreadState *tst;
268 
269 #if 0
270    if (VG_(clo_trace_sched)) {
271       HChar buf[VG_(strlen)(who) + 30];
272       VG_(sprintf)(buf, "waiting for lock (%s)", who);
273       print_sched_event(tid, buf);
274    }
275 #endif
276 
277    /* First, acquire the_BigLock.  We can't do anything else safely
278       prior to this point.  Even doing debug printing prior to this
279       point is, technically, wrong. */
280    VG_(acquire_BigLock_LL)(NULL);
281 
282    tst = VG_(get_ThreadState)(tid);
283 
284    vg_assert(tst->status != VgTs_Runnable);
285 
286    tst->status = VgTs_Runnable;
287 
288    if (VG_(running_tid) != VG_INVALID_THREADID)
289       VG_(printf)("tid %u found %u running\n", tid, VG_(running_tid));
290    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
291    VG_(running_tid) = tid;
292 
293    { Addr gsp = VG_(get_SP)(tid);
294       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
295          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
296       else
297          VG_(unknown_SP_update)(gsp, gsp);
298    }
299 
300    if (VG_(clo_trace_sched)) {
301       HChar buf[VG_(strlen)(who) + 30];
302       VG_(sprintf)(buf, " acquired lock (%s)", who);
303       print_sched_event(tid, buf);
304    }
305 }
306 
307 /*
308    Set a thread into a sleeping state, and give up exclusive access to
309    the CPU.  On return, the thread must be prepared to block until it
310    is ready to run again (generally this means blocking in a syscall,
311    but it may mean that we remain in a Runnable state and we're just
312    yielding the CPU to another thread).
313  */
VG_(release_BigLock)314 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
315                           const HChar* who)
316 {
317    ThreadState *tst = VG_(get_ThreadState)(tid);
318 
319    vg_assert(tst->status == VgTs_Runnable);
320 
321    vg_assert(sleepstate == VgTs_WaitSys ||
322 	     sleepstate == VgTs_Yielding);
323 
324    tst->status = sleepstate;
325 
326    vg_assert(VG_(running_tid) == tid);
327    VG_(running_tid) = VG_INVALID_THREADID;
328 
329    if (VG_(clo_trace_sched)) {
330       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
331       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
332       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
333       print_sched_event(tid, buf);
334    }
335 
336    /* Release the_BigLock; this will reschedule any runnable
337       thread. */
338    VG_(release_BigLock_LL)(NULL);
339 }
340 
init_BigLock(void)341 static void init_BigLock(void)
342 {
343    vg_assert(!the_BigLock);
344    the_BigLock = ML_(create_sched_lock)();
345 }
346 
deinit_BigLock(void)347 static void deinit_BigLock(void)
348 {
349    ML_(destroy_sched_lock)(the_BigLock);
350    the_BigLock = NULL;
351 }
352 
353 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)354 void VG_(acquire_BigLock_LL) ( const HChar* who )
355 {
356    ML_(acquire_sched_lock)(the_BigLock);
357 }
358 
359 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)360 void VG_(release_BigLock_LL) ( const HChar* who )
361 {
362    ML_(release_sched_lock)(the_BigLock);
363 }
364 
VG_(owns_BigLock_LL)365 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
366 {
367    return (ML_(get_sched_lock_owner)(the_BigLock)
368            == VG_(threads)[tid].os_state.lwpid);
369 }
370 
371 
372 /* Clear out the ThreadState and release the semaphore. Leaves the
373    ThreadState in VgTs_Zombie state, so that it doesn't get
374    reallocated until the caller is really ready. */
VG_(exit_thread)375 void VG_(exit_thread)(ThreadId tid)
376 {
377    vg_assert(VG_(is_valid_tid)(tid));
378    vg_assert(VG_(is_running_thread)(tid));
379    vg_assert(VG_(is_exiting)(tid));
380 
381    mostly_clear_thread_record(tid);
382    VG_(running_tid) = VG_INVALID_THREADID;
383 
384    /* There should still be a valid exitreason for this thread */
385    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
386 
387    if (VG_(clo_trace_sched))
388       print_sched_event(tid, "release lock in VG_(exit_thread)");
389 
390    VG_(release_BigLock_LL)(NULL);
391 }
392 
393 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
394    out of the syscall and onto doing the next thing, whatever that is.
395    If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)396 void VG_(get_thread_out_of_syscall)(ThreadId tid)
397 {
398    vg_assert(VG_(is_valid_tid)(tid));
399    vg_assert(!VG_(is_running_thread)(tid));
400 
401    if (VG_(threads)[tid].status == VgTs_WaitSys) {
402       if (VG_(clo_trace_signals)) {
403 	 VG_(message)(Vg_DebugMsg,
404                       "get_thread_out_of_syscall zaps tid %u lwp %d\n",
405 		      tid, VG_(threads)[tid].os_state.lwpid);
406       }
407 #     if defined(VGO_darwin)
408       {
409          // GrP fixme use mach primitives on darwin?
410          // GrP fixme thread_abort_safely?
411          // GrP fixme race for thread with WaitSys set but not in syscall yet?
412          extern kern_return_t thread_abort(mach_port_t);
413          thread_abort(VG_(threads)[tid].os_state.lwpid);
414       }
415 #     else
416       {
417          __attribute__((unused))
418          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
419          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
420             I'm really not sure.  Here's a race scenario which argues
421             that we shoudn't; but equally I'm not sure the scenario is
422             even possible, because of constraints caused by the question
423             of who holds the BigLock when.
424 
425             Target thread tid does sys_read on a socket and blocks.  This
426             function gets called, and we observe correctly that tid's
427             status is WaitSys but then for whatever reason this function
428             goes very slowly for a while.  Then data arrives from
429             wherever, tid's sys_read returns, tid exits.  Then we do
430             tkill on tid, but tid no longer exists; tkill returns an
431             error code and the assert fails. */
432          /* vg_assert(r == 0); */
433       }
434 #     endif
435    }
436 }
437 
438 /*
439    Yield the CPU for a short time to let some other thread run.
440  */
VG_(vg_yield)441 void VG_(vg_yield)(void)
442 {
443    ThreadId tid = VG_(running_tid);
444 
445    vg_assert(tid != VG_INVALID_THREADID);
446    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
447 
448    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
449 
450    /*
451       Tell the kernel we're yielding.
452     */
453 #  if defined(VGO_linux) || defined(VGO_darwin)
454    VG_(do_syscall0)(__NR_sched_yield);
455 #  elif defined(VGO_solaris)
456    VG_(do_syscall0)(__NR_yield);
457 #  else
458 #    error Unknown OS
459 #  endif
460 
461    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
462 }
463 
464 
465 /* Set the standard set of blocked signals, used whenever we're not
466    running a client syscall. */
block_signals(void)467 static void block_signals(void)
468 {
469    vki_sigset_t mask;
470 
471    VG_(sigfillset)(&mask);
472 
473    /* Don't block these because they're synchronous */
474    VG_(sigdelset)(&mask, VKI_SIGSEGV);
475    VG_(sigdelset)(&mask, VKI_SIGBUS);
476    VG_(sigdelset)(&mask, VKI_SIGFPE);
477    VG_(sigdelset)(&mask, VKI_SIGILL);
478    VG_(sigdelset)(&mask, VKI_SIGTRAP);
479 
480    /* Can't block these anyway */
481    VG_(sigdelset)(&mask, VKI_SIGSTOP);
482    VG_(sigdelset)(&mask, VKI_SIGKILL);
483 
484    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
485 }
486 
os_state_clear(ThreadState * tst)487 static void os_state_clear(ThreadState *tst)
488 {
489    tst->os_state.lwpid       = 0;
490    tst->os_state.threadgroup = 0;
491 #  if defined(VGO_linux)
492    /* no other fields to clear */
493 #  elif defined(VGO_darwin)
494    tst->os_state.post_mach_trap_fn = NULL;
495    tst->os_state.pthread           = 0;
496    tst->os_state.func_arg          = 0;
497    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
498    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
499    tst->os_state.wq_jmpbuf_valid   = False;
500    tst->os_state.remote_port       = 0;
501    tst->os_state.msgh_id           = 0;
502    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
503 #  elif defined(VGO_solaris)
504 #  if defined(VGP_x86_solaris)
505    tst->os_state.thrptr = 0;
506 #  endif
507    tst->os_state.stk_id = (UWord)-1;
508    tst->os_state.ustack = NULL;
509    tst->os_state.in_door_return = False;
510    tst->os_state.door_return_procedure = 0;
511    tst->os_state.oldcontext = NULL;
512    tst->os_state.schedctl_data = 0;
513    tst->os_state.daemon_thread = False;
514 #  else
515 #    error "Unknown OS"
516 #  endif
517 }
518 
os_state_init(ThreadState * tst)519 static void os_state_init(ThreadState *tst)
520 {
521    tst->os_state.valgrind_stack_base    = 0;
522    tst->os_state.valgrind_stack_init_SP = 0;
523    os_state_clear(tst);
524 }
525 
526 static
mostly_clear_thread_record(ThreadId tid)527 void mostly_clear_thread_record ( ThreadId tid )
528 {
529    vki_sigset_t savedmask;
530 
531    vg_assert(tid >= 0 && tid < VG_N_THREADS);
532    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
533    VG_(threads)[tid].tid = tid;
534 
535    /* Leave the thread in Zombie, so that it doesn't get reallocated
536       until the caller is finally done with the thread stack. */
537    VG_(threads)[tid].status               = VgTs_Zombie;
538 
539    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
540    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
541 
542    os_state_clear(&VG_(threads)[tid]);
543 
544    /* start with no altstack */
545    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
546    VG_(threads)[tid].altstack.ss_size = 0;
547    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
548 
549    VG_(clear_out_queued_signals)(tid, &savedmask);
550 
551    VG_(threads)[tid].sched_jmpbuf_valid = False;
552 }
553 
554 /*
555    Called in the child after fork.  If the parent has multiple
556    threads, then we've inherited a VG_(threads) array describing them,
557    but only the thread which called fork() is actually alive in the
558    child.  This functions needs to clean up all those other thread
559    structures.
560 
561    Whichever tid in the parent which called fork() becomes the
562    master_tid in the child.  That's because the only living slot in
563    VG_(threads) in the child after fork is VG_(threads)[tid], and it
564    would be too hard to try to re-number the thread and relocate the
565    thread state down to VG_(threads)[1].
566 
567    This function also needs to reinitialize the_BigLock, since
568    otherwise we may end up sharing its state with the parent, which
569    would be deeply confusing.
570 */
sched_fork_cleanup(ThreadId me)571 static void sched_fork_cleanup(ThreadId me)
572 {
573    ThreadId tid;
574    vg_assert(VG_(running_tid) == me);
575 
576 #  if defined(VGO_darwin)
577    // GrP fixme hack reset Mach ports
578    VG_(mach_init)();
579 #  endif
580 
581    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
582    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
583 
584    /* clear out all the unused thread slots */
585    for (tid = 1; tid < VG_N_THREADS; tid++) {
586       if (tid != me) {
587          mostly_clear_thread_record(tid);
588 	 VG_(threads)[tid].status = VgTs_Empty;
589          VG_(clear_syscallInfo)(tid);
590       }
591    }
592 
593    /* re-init and take the sema */
594    deinit_BigLock();
595    init_BigLock();
596    VG_(acquire_BigLock_LL)(NULL);
597 }
598 
599 
600 /* First phase of initialisation of the scheduler.  Initialise the
601    bigLock, zeroise the VG_(threads) structure and decide on the
602    ThreadId of the root thread.
603 */
VG_(scheduler_init_phase1)604 ThreadId VG_(scheduler_init_phase1) ( void )
605 {
606    Int i;
607    ThreadId tid_main;
608 
609    VG_(debugLog)(1,"sched","sched_init_phase1\n");
610 
611    if (VG_(clo_fair_sched) != disable_fair_sched
612        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
613        && VG_(clo_fair_sched) == enable_fair_sched)
614    {
615       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
616       VG_(exit)(1);
617    }
618 
619    if (VG_(clo_verbosity) > 1) {
620       VG_(message)(Vg_DebugMsg,
621                    "Scheduler: using %s scheduler lock implementation.\n",
622                    ML_(get_sched_lock_name)());
623    }
624 
625    init_BigLock();
626 
627    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
628       /* Paranoia .. completely zero it out. */
629       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
630 
631       VG_(threads)[i].sig_queue = NULL;
632 
633       os_state_init(&VG_(threads)[i]);
634       mostly_clear_thread_record(i);
635 
636       VG_(threads)[i].status                    = VgTs_Empty;
637       VG_(threads)[i].client_stack_szB          = 0;
638       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
639       VG_(threads)[i].err_disablement_level     = 0;
640       VG_(threads)[i].thread_name               = NULL;
641    }
642 
643    tid_main = VG_(alloc_ThreadState)();
644 
645    /* Bleh.  Unfortunately there are various places in the system that
646       assume that the main thread has a ThreadId of 1.
647       - Helgrind (possibly)
648       - stack overflow message in default_action() in m_signals.c
649       - definitely a lot more places
650    */
651    vg_assert(tid_main == 1);
652 
653    return tid_main;
654 }
655 
656 
657 /* Second phase of initialisation of the scheduler.  Given the root
658    ThreadId computed by first phase of initialisation, fill in stack
659    details and acquire bigLock.  Initialise the scheduler.  This is
660    called at startup.  The caller subsequently initialises the guest
661    state components of this main thread.
662 */
VG_(scheduler_init_phase2)663 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
664                                   Addr     clstack_end,
665                                   SizeT    clstack_size )
666 {
667    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%u, "
668                    "cls_end=0x%lx, cls_sz=%lu\n",
669                    tid_main, clstack_end, clstack_size);
670 
671    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
672    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
673 
674    VG_(threads)[tid_main].client_stack_highest_byte
675       = clstack_end;
676    VG_(threads)[tid_main].client_stack_szB
677       = clstack_size;
678 
679    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
680 }
681 
682 
683 /* ---------------------------------------------------------------------
684    Helpers for running translations.
685    ------------------------------------------------------------------ */
686 
687 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
688    mask state, but does need to pass "val" through.  jumped must be a
689    volatile UWord. */
690 #define SCHEDSETJMP(tid, jumped, stmt)					\
691    do {									\
692       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
693 									\
694       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
695       if ((jumped) == ((UWord)0)) {                                     \
696 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
697 	 _qq_tst->sched_jmpbuf_valid = True;				\
698 	 stmt;								\
699       }	else if (VG_(clo_trace_sched))					\
700 	 VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n",       \
701                      __LINE__, tid, jumped);                            \
702       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
703       _qq_tst->sched_jmpbuf_valid = False;				\
704    } while(0)
705 
706 
707 /* Do various guest state alignment checks prior to running a thread.
708    Specifically, check that what we have matches Vex's guest state
709    layout requirements.  See libvex.h for details, but in short the
710    requirements are: There must be no holes in between the primary
711    guest state, its two copies, and the spill area.  In short, all 4
712    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
713    be placed back-to-back without holes in between. */
do_pre_run_checks(volatile ThreadState * tst)714 static void do_pre_run_checks ( volatile ThreadState* tst )
715 {
716    Addr a_vex     = (Addr) & tst->arch.vex;
717    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
718    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
719    Addr a_spill   = (Addr) & tst->arch.vex_spill;
720    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
721    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
722    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
723    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
724 
725    if (0)
726    VG_(printf)("gst %p %u, sh1 %p %u, "
727                "sh2 %p %u, spill %p %u\n",
728                (void*)a_vex, sz_vex,
729                (void*)a_vexsh1, sz_vexsh1,
730                (void*)a_vexsh2, sz_vexsh2,
731                (void*)a_spill, sz_spill );
732 
733    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
734    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
735    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
736    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
737 
738    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
739    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
740    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
741    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
742 
743    /* Check that the guest state and its two shadows have the same
744       size, and that there are no holes in between.  The latter is
745       important because Memcheck assumes that it can reliably access
746       the shadows by indexing off a pointer to the start of the
747       primary guest state area. */
748    vg_assert(sz_vex == sz_vexsh1);
749    vg_assert(sz_vex == sz_vexsh2);
750    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
751    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
752    /* Also check there's no hole between the second shadow area and
753       the spill area. */
754    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
755    vg_assert(a_vex + 3 * sz_vex == a_spill);
756 
757 #  if defined(VGA_x86)
758    /* x86 XMM regs must form an array, ie, have no holes in
759       between. */
760    vg_assert(
761       (offsetof(VexGuestX86State,guest_XMM7)
762        - offsetof(VexGuestX86State,guest_XMM0))
763       == (8/*#regs*/-1) * 16/*bytes per reg*/
764    );
765    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
766    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
767    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
768    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
769    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
770 #  endif
771 
772 #  if defined(VGA_amd64)
773    /* amd64 YMM regs must form an array, ie, have no holes in
774       between. */
775    vg_assert(
776       (offsetof(VexGuestAMD64State,guest_YMM16)
777        - offsetof(VexGuestAMD64State,guest_YMM0))
778       == (17/*#regs*/-1) * 32/*bytes per reg*/
779    );
780    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
781    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
782    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
783    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
784    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
785 #  endif
786 
787 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
788    /* ppc guest_state vector regs must be 16 byte aligned for
789       loads/stores.  This is important! */
790    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
791    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
792    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
793    /* be extra paranoid .. */
794    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
795    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
796    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
797 #  endif
798 
799 #  if defined(VGA_arm)
800    /* arm guest_state VFP regs must be 8 byte aligned for
801       loads/stores.  Let's use 16 just to be on the safe side. */
802    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
803    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
804    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
805    /* be extra paranoid .. */
806    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
807    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
808    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
809 #  endif
810 
811 #  if defined(VGA_arm64)
812    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
813    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
814    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
815    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
816    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
817    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
818 #  endif
819 
820 #  if defined(VGA_s390x)
821    /* no special requirements */
822 #  endif
823 
824 #  if defined(VGA_mips32) || defined(VGA_mips64)
825    /* no special requirements */
826 #  endif
827 }
828 
829 // NO_VGDB_POLL value ensures vgdb is not polled, while
830 // VGDB_POLL_ASAP ensures that the next scheduler call
831 // will cause a poll.
832 #define NO_VGDB_POLL    0xffffffffffffffffULL
833 #define VGDB_POLL_ASAP  0x0ULL
834 
VG_(disable_vgdb_poll)835 void VG_(disable_vgdb_poll) (void )
836 {
837    vgdb_next_poll = NO_VGDB_POLL;
838 }
VG_(force_vgdb_poll)839 void VG_(force_vgdb_poll) ( void )
840 {
841    vgdb_next_poll = VGDB_POLL_ASAP;
842 }
843 
844 /* Run the thread tid for a while, and return a VG_TRC_* value
845    indicating why VG_(disp_run_translations) stopped, and possibly an
846    auxiliary word.  Also, only allow the thread to run for at most
847    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
848    is False, we are running ordinary redir'd translations, and we
849    should therefore start by looking up the guest next IP in TT.  If
850    it is True then we ignore the guest next IP and just run from
851    alt_host_addr, which presumably points at host code for a no-redir
852    translation.
853 
854    Return results are placed in two_words.  two_words[0] is set to the
855    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
856    the address to patch is placed in two_words[1].
857 */
858 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)859 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
860                               /*MOD*/Int*   dispatchCtrP,
861                               ThreadId      tid,
862                               HWord         alt_host_addr,
863                               Bool          use_alt_host_addr )
864 {
865    volatile HWord        jumped         = 0;
866    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
867    volatile Int          done_this_time = 0;
868    volatile HWord        host_code_addr = 0;
869 
870    /* Paranoia */
871    vg_assert(VG_(is_valid_tid)(tid));
872    vg_assert(VG_(is_running_thread)(tid));
873    vg_assert(!VG_(is_exiting)(tid));
874    vg_assert(*dispatchCtrP > 0);
875 
876    tst = VG_(get_ThreadState)(tid);
877    do_pre_run_checks( tst );
878    /* end Paranoia */
879 
880    /* Futz with the XIndir stats counters. */
881    vg_assert(VG_(stats__n_xindirs_32) == 0);
882    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
883 
884    /* Clear return area. */
885    two_words[0] = two_words[1] = 0;
886 
887    /* Figure out where we're starting from. */
888    if (use_alt_host_addr) {
889       /* unusual case -- no-redir translation */
890       host_code_addr = alt_host_addr;
891    } else {
892       /* normal case -- redir translation */
893       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
894       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
895          host_code_addr = VG_(tt_fast)[cno].host;
896       else {
897          Addr res = 0;
898          /* not found in VG_(tt_fast). Searching here the transtab
899             improves the performance compared to returning directly
900             to the scheduler. */
901          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
902                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
903                                             True/*upd cache*/
904                                             );
905          if (LIKELY(found)) {
906             host_code_addr = res;
907          } else {
908             /* At this point, we know that we intended to start at a
909                normal redir translation, but it was not found.  In
910                which case we can return now claiming it's not
911                findable. */
912             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
913             return;
914          }
915       }
916    }
917    /* We have either a no-redir or a redir translation. */
918    vg_assert(host_code_addr != 0); /* implausible */
919 
920    /* there should be no undealt-with signals */
921    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
922 
923    /* Set up event counter stuff for the run. */
924    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
925    tst->arch.vex.host_EvC_FAILADDR
926       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
927 
928    if (0) {
929       vki_sigset_t m;
930       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
931       vg_assert(err == 0);
932       VG_(printf)("tid %u: entering code with unblocked signals: ", tid);
933       for (i = 1; i <= _VKI_NSIG; i++)
934          if (!VG_(sigismember)(&m, i))
935             VG_(printf)("%d ", i);
936       VG_(printf)("\n");
937    }
938 
939    /* Set up return-value area. */
940 
941    // Tell the tool this thread is about to run client code
942    VG_TRACK( start_client_code, tid, bbs_done );
943 
944    vg_assert(VG_(in_generated_code) == False);
945    VG_(in_generated_code) = True;
946 
947    SCHEDSETJMP(
948       tid,
949       jumped,
950       VG_(disp_run_translations)(
951          two_words,
952          (volatile void*)&tst->arch.vex,
953          host_code_addr
954       )
955    );
956 
957    vg_assert(VG_(in_generated_code) == True);
958    VG_(in_generated_code) = False;
959 
960    if (jumped != (HWord)0) {
961       /* We get here if the client took a fault that caused our signal
962          handler to longjmp. */
963       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
964       two_words[0] = VG_TRC_FAULT_SIGNAL;
965       two_words[1] = 0;
966       block_signals();
967    }
968 
969    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
970       and zero out the 32-bit ones in preparation for the next run of
971       generated code. */
972    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
973    VG_(stats__n_xindirs_32) = 0;
974    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
975    VG_(stats__n_xindir_misses_32) = 0;
976 
977    /* Inspect the event counter. */
978    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
979    vg_assert(tst->arch.vex.host_EvC_FAILADDR
980              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
981 
982    /* The number of events done this time is the difference between
983       the event counter originally and what it is now.  Except -- if
984       it has gone negative (to -1) then the transition 0 to -1 doesn't
985       correspond to a real executed block, so back it out.  It's like
986       this because the event checks decrement the counter first and
987       check it for negativeness second, hence the 0 to -1 transition
988       causes a bailout and the block it happens in isn't executed. */
989    {
990      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
991      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
992      if (dispatchCtrAfterwards == -1) {
993         done_this_time--;
994      } else {
995         /* If the generated code drives the counter below -1, something
996            is seriously wrong. */
997         vg_assert(dispatchCtrAfterwards >= 0);
998      }
999    }
1000 
1001    vg_assert(done_this_time >= 0);
1002    bbs_done += (ULong)done_this_time;
1003 
1004    *dispatchCtrP -= done_this_time;
1005    vg_assert(*dispatchCtrP >= 0);
1006 
1007    // Tell the tool this thread has stopped running client code
1008    VG_TRACK( stop_client_code, tid, bbs_done );
1009 
1010    if (bbs_done >= vgdb_next_poll) {
1011       if (VG_(clo_vgdb_poll))
1012          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
1013       else
1014          /* value was changed due to gdbserver invocation via ptrace */
1015          vgdb_next_poll = NO_VGDB_POLL;
1016       if (VG_(gdbserver_activity) (tid))
1017          VG_(gdbserver) (tid);
1018    }
1019 
1020    /* TRC value and possible auxiliary patch-address word are already
1021       in two_words[0] and [1] respectively, as a result of the call to
1022       VG_(run_innerloop). */
1023    /* Stay sane .. */
1024    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1025        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1026       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1027    } else {
1028       vg_assert(two_words[1] == 0); /* nobody messed with it */
1029    }
1030 }
1031 
1032 
1033 /* ---------------------------------------------------------------------
1034    The scheduler proper.
1035    ------------------------------------------------------------------ */
1036 
handle_tt_miss(ThreadId tid)1037 static void handle_tt_miss ( ThreadId tid )
1038 {
1039    Bool found;
1040    Addr ip = VG_(get_IP)(tid);
1041 
1042    /* Trivial event.  Miss in the fast-cache.  Do a full
1043       lookup for it. */
1044    found = VG_(search_transtab)( NULL, NULL, NULL,
1045                                  ip, True/*upd_fast_cache*/ );
1046    if (UNLIKELY(!found)) {
1047       /* Not found; we need to request a translation. */
1048       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1049                           bbs_done, True/*allow redirection*/ )) {
1050          found = VG_(search_transtab)( NULL, NULL, NULL,
1051                                        ip, True );
1052          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1053 
1054       } else {
1055 	 // If VG_(translate)() fails, it's because it had to throw a
1056 	 // signal because the client jumped to a bad address.  That
1057 	 // means that either a signal has been set up for delivery,
1058 	 // or the thread has been marked for termination.  Either
1059 	 // way, we just need to go back into the scheduler loop.
1060       }
1061    }
1062 }
1063 
1064 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1065 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1066 {
1067    Bool found          = False;
1068    Addr ip             = VG_(get_IP)(tid);
1069    SECno to_sNo         = INV_SNO;
1070    TTEno to_tteNo       = INV_TTE;
1071 
1072    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1073                                  ip, False/*dont_upd_fast_cache*/ );
1074    if (!found) {
1075       /* Not found; we need to request a translation. */
1076       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1077                           bbs_done, True/*allow redirection*/ )) {
1078          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1079                                        ip, False );
1080          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1081       } else {
1082 	 // If VG_(translate)() fails, it's because it had to throw a
1083 	 // signal because the client jumped to a bad address.  That
1084 	 // means that either a signal has been set up for delivery,
1085 	 // or the thread has been marked for termination.  Either
1086 	 // way, we just need to go back into the scheduler loop.
1087         return;
1088       }
1089    }
1090    vg_assert(found);
1091    vg_assert(to_sNo != INV_SNO);
1092    vg_assert(to_tteNo != INV_TTE);
1093 
1094    /* So, finally we know where to patch through to.  Do the patching
1095       and update the various admin tables that allow it to be undone
1096       in the case that the destination block gets deleted. */
1097    VG_(tt_tc_do_chaining)( place_to_chain,
1098                            to_sNo, to_tteNo, toFastEP );
1099 }
1100 
handle_syscall(ThreadId tid,UInt trc)1101 static void handle_syscall(ThreadId tid, UInt trc)
1102 {
1103    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1104    volatile UWord jumped;
1105 
1106    /* Syscall may or may not block; either way, it will be
1107       complete by the time this call returns, and we'll be
1108       runnable again.  We could take a signal while the
1109       syscall runs. */
1110 
1111    if (VG_(clo_sanity_level) >= 3) {
1112       HChar buf[50];    // large enough
1113       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %u)", tid);
1114       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1115       vg_assert(ok);
1116    }
1117 
1118    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1119 
1120    if (VG_(clo_sanity_level) >= 3) {
1121       HChar buf[50];    // large enough
1122       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %u)", tid);
1123       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1124       vg_assert(ok);
1125    }
1126 
1127    if (!VG_(is_running_thread)(tid))
1128       VG_(printf)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
1129 		  tid, VG_(running_tid), tid, tst->status);
1130    vg_assert(VG_(is_running_thread)(tid));
1131 
1132    if (jumped != (UWord)0) {
1133       block_signals();
1134       VG_(poll_signals)(tid);
1135    }
1136 }
1137 
1138 /* tid just requested a jump to the noredir version of its current
1139    program counter.  So make up that translation if needed, run it,
1140    and return the resulting thread return code in two_words[]. */
1141 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1142 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1143                            /*MOD*/Int*   dispatchCtrP,
1144                            ThreadId tid )
1145 {
1146    /* Clear return area. */
1147    two_words[0] = two_words[1] = 0;
1148 
1149    Addr  hcode = 0;
1150    Addr  ip    = VG_(get_IP)(tid);
1151 
1152    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1153    if (!found) {
1154       /* Not found; we need to request a translation. */
1155       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1156                           False/*NO REDIRECTION*/ )) {
1157 
1158          found = VG_(search_unredir_transtab)( &hcode, ip );
1159          vg_assert2(found, "unredir translation missing after creation?!");
1160       } else {
1161 	 // If VG_(translate)() fails, it's because it had to throw a
1162 	 // signal because the client jumped to a bad address.  That
1163 	 // means that either a signal has been set up for delivery,
1164 	 // or the thread has been marked for termination.  Either
1165 	 // way, we just need to go back into the scheduler loop.
1166          two_words[0] = VG_TRC_BORING;
1167          return;
1168       }
1169 
1170    }
1171 
1172    vg_assert(found);
1173    vg_assert(hcode != 0);
1174 
1175    /* Otherwise run it and return the resulting VG_TRC_* value. */
1176    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1177    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1178                            hcode, True/*use hcode*/ );
1179 }
1180 
1181 
1182 /*
1183    Run a thread until it wants to exit.
1184 
1185    We assume that the caller has already called VG_(acquire_BigLock) for
1186    us, so we own the VCPU.  Also, all signals are blocked.
1187  */
VG_(scheduler)1188 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1189 {
1190    /* Holds the remaining size of this thread's "timeslice". */
1191    Int dispatch_ctr = 0;
1192 
1193    ThreadState *tst = VG_(get_ThreadState)(tid);
1194    static Bool vgdb_startup_action_done = False;
1195 
1196    if (VG_(clo_trace_sched))
1197       print_sched_event(tid, "entering VG_(scheduler)");
1198 
1199    /* Do vgdb initialization (but once). Only the first (main) task
1200       starting up will do the below.
1201       Initialize gdbserver earlier than at the first
1202       thread VG_(scheduler) is causing problems:
1203       * at the end of VG_(scheduler_init_phase2) :
1204         The main thread is in VgTs_Init state, but in a not yet
1205         consistent state => the thread cannot be reported to gdb
1206         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1207         back the guest registers to gdb).
1208       * at end of valgrind_main, just
1209         before VG_(main_thread_wrapper_NORETURN)(1) :
1210         The main thread is still in VgTs_Init state but in a
1211         more advanced state. However, the thread state is not yet
1212         completely initialized : a.o., the os_state is not yet fully
1213         set => the thread is then not properly reported to gdb,
1214         which is then confused (causing e.g. a duplicate thread be
1215         shown, without thread id).
1216       * it would be possible to initialize gdbserver "lower" in the
1217         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1218         these are platform dependent and the place at which
1219         the thread state is completely initialized is not
1220         specific anymore to the main thread (so a similar "do it only
1221         once" would be needed).
1222 
1223         => a "once only" initialization here is the best compromise. */
1224    if (!vgdb_startup_action_done) {
1225       vg_assert(tid == 1); // it must be the main thread.
1226       vgdb_startup_action_done = True;
1227       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1228          /* If we have to poll, ensures we do an initial poll at first
1229             scheduler call. Otherwise, ensure no poll (unless interrupted
1230             by ptrace). */
1231          if (VG_(clo_vgdb_poll))
1232             VG_(force_vgdb_poll) ();
1233          else
1234             VG_(disable_vgdb_poll) ();
1235 
1236          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1237          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1238             changed yet. */
1239 
1240          VG_(gdbserver_prerun_action) (1);
1241       } else {
1242          VG_(disable_vgdb_poll) ();
1243       }
1244    }
1245 
1246    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1247        && tid != 1) {
1248       /* We disable the stack cache the first time we see a thread other
1249          than the main thread appearing. At this moment, we are sure the pthread
1250          lib loading is done/variable was initialised by pthread lib/... */
1251       if (VG_(client__stack_cache_actsize__addr)) {
1252          if (*VG_(client__stack_cache_actsize__addr) == 0) {
1253             VG_(debugLog)(1,"sched",
1254                           "pthread stack cache size disable done"
1255                           " via kludge\n");
1256             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1257             /* Set a value big enough to be above the hardcoded maximum stack
1258                cache size in glibc, small enough to allow a pthread stack size
1259                to be added without risk of overflow. */
1260          }
1261       } else {
1262           VG_(debugLog)(0,"sched",
1263                         "WARNING: pthread stack cache cannot be disabled!\n");
1264           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1265           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1266              to avoid having a msg for all following threads. */
1267       }
1268    }
1269 
1270    /* set the proper running signal mask */
1271    block_signals();
1272 
1273    vg_assert(VG_(is_running_thread)(tid));
1274 
1275    dispatch_ctr = SCHEDULING_QUANTUM;
1276 
1277    while (!VG_(is_exiting)(tid)) {
1278 
1279       vg_assert(dispatch_ctr >= 0);
1280       if (dispatch_ctr == 0) {
1281 
1282 	 /* Our slice is done, so yield the CPU to another thread.  On
1283             Linux, this doesn't sleep between sleeping and running,
1284             since that would take too much time. */
1285 
1286 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
1287             cause async thread cancellation (canceller.c) to terminate
1288             in finite time; else it is in some kind of race/starvation
1289             situation and completion is arbitrarily delayed (although
1290             this is not a deadlock).
1291 
1292             Unfortunately these sleeps cause MPI jobs not to terminate
1293             sometimes (some kind of livelock).  So sleeping once
1294             every N opportunities appears to work. */
1295 
1296 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1297             sys_yield also helps the problem, whilst not crashing apps. */
1298 
1299 	 VG_(release_BigLock)(tid, VgTs_Yielding,
1300                                    "VG_(scheduler):timeslice");
1301 	 /* ------------ now we don't have The Lock ------------ */
1302 
1303 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1304 	 /* ------------ now we do have The Lock ------------ */
1305 
1306 	 /* OK, do some relatively expensive housekeeping stuff */
1307 	 scheduler_sanity(tid);
1308 	 VG_(sanity_check_general)(False);
1309 
1310 	 /* Look for any pending signals for this thread, and set them up
1311 	    for delivery */
1312 	 VG_(poll_signals)(tid);
1313 
1314 	 if (VG_(is_exiting)(tid))
1315 	    break;		/* poll_signals picked up a fatal signal */
1316 
1317 	 /* For stats purposes only. */
1318 	 n_scheduling_events_MAJOR++;
1319 
1320 	 /* Figure out how many bbs to ask vg_run_innerloop to do. */
1321          dispatch_ctr = SCHEDULING_QUANTUM;
1322 
1323 	 /* paranoia ... */
1324 	 vg_assert(tst->tid == tid);
1325 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1326       }
1327 
1328       /* For stats purposes only. */
1329       n_scheduling_events_MINOR++;
1330 
1331       if (0)
1332          VG_(message)(Vg_DebugMsg, "thread %u: running for %d bbs\n",
1333                                    tid, dispatch_ctr - 1 );
1334 
1335       HWord trc[2]; /* "two_words" */
1336       run_thread_for_a_while( &trc[0],
1337                               &dispatch_ctr,
1338                               tid, 0/*ignored*/, False );
1339 
1340       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1341          const HChar *name = name_of_sched_event(trc[0]);
1342          HChar buf[VG_(strlen)(name) + 10];    // large enough
1343 	 VG_(sprintf)(buf, "TRC: %s", name);
1344 	 print_sched_event(tid, buf);
1345       }
1346 
1347       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1348          /* If we got a request to run a no-redir version of
1349             something, do so now -- handle_noredir_jump just (creates
1350             and) runs that one translation.  The flip side is that the
1351             noredir translation can't itself return another noredir
1352             request -- that would be nonsensical.  It can, however,
1353             return VG_TRC_BORING, which just means keep going as
1354             normal. */
1355          /* Note that the fact that we need to continue with a
1356             no-redir jump is not recorded anywhere else in this
1357             thread's state.  So we *must* execute the block right now
1358             -- we can't fail to execute it and later resume with it,
1359             because by then we'll have forgotten the fact that it
1360             should be run as no-redir, but will get run as a normal
1361             potentially-redir'd, hence screwing up.  This really ought
1362             to be cleaned up, by noting in the guest state that the
1363             next block to be executed should be no-redir.  Then we can
1364             suspend and resume at any point, which isn't the case at
1365             the moment. */
1366          /* We can't enter a no-redir translation with the dispatch
1367             ctr set to zero, for the reasons commented just above --
1368             we need to force it to execute right now.  So, if the
1369             dispatch ctr is zero, set it to one.  Note that this would
1370             have the bad side effect of holding the Big Lock arbitrary
1371             long should there be an arbitrarily long sequence of
1372             back-to-back no-redir translations to run.  But we assert
1373             just below that this translation cannot request another
1374             no-redir jump, so we should be safe against that. */
1375          if (dispatch_ctr == 0) {
1376             dispatch_ctr = 1;
1377          }
1378          handle_noredir_jump( &trc[0],
1379                               &dispatch_ctr,
1380                               tid );
1381          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1382 
1383          /* This can't be allowed to happen, since it means the block
1384             didn't execute, and we have no way to resume-as-noredir
1385             after we get more timeslice.  But I don't think it ever
1386             can, since handle_noredir_jump will assert if the counter
1387             is zero on entry. */
1388          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1389          /* This asserts the same thing. */
1390          vg_assert(dispatch_ctr >= 0);
1391 
1392          /* A no-redir translation can't return with a chain-me
1393             request, since chaining in the no-redir cache is too
1394             complex. */
1395          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1396                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1397       }
1398 
1399       switch (trc[0]) {
1400       case VEX_TRC_JMP_BORING:
1401          /* assisted dispatch, no event.  Used by no-redir
1402             translations to force return to the scheduler. */
1403       case VG_TRC_BORING:
1404          /* no special event, just keep going. */
1405          break;
1406 
1407       case VG_TRC_INNER_FASTMISS:
1408 	 vg_assert(dispatch_ctr >= 0);
1409 	 handle_tt_miss(tid);
1410 	 break;
1411 
1412       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1413          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1414          handle_chain_me(tid, (void*)trc[1], False);
1415          break;
1416       }
1417 
1418       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1419          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1420          handle_chain_me(tid, (void*)trc[1], True);
1421          break;
1422       }
1423 
1424       case VEX_TRC_JMP_CLIENTREQ:
1425 	 do_client_request(tid);
1426 	 break;
1427 
1428       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1429       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1430       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1431       case VEX_TRC_JMP_SYS_INT145:  /* x86-solaris */
1432       case VEX_TRC_JMP_SYS_INT210:  /* x86-solaris */
1433       /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
1434       case VEX_TRC_JMP_SYS_SYSCALL:
1435 	 handle_syscall(tid, trc[0]);
1436 	 if (VG_(clo_sanity_level) > 2)
1437 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
1438 	 break;
1439 
1440       case VEX_TRC_JMP_YIELD:
1441 	 /* Explicit yield, because this thread is in a spin-lock
1442 	    or something.  Only let the thread run for a short while
1443             longer.  Because swapping to another thread is expensive,
1444             we're prepared to let this thread eat a little more CPU
1445             before swapping to another.  That means that short term
1446             spins waiting for hardware to poke memory won't cause a
1447             thread swap. */
1448          if (dispatch_ctr > 1000)
1449             dispatch_ctr = 1000;
1450 	 break;
1451 
1452       case VG_TRC_INNER_COUNTERZERO:
1453 	 /* Timeslice is out.  Let a new thread be scheduled. */
1454 	 vg_assert(dispatch_ctr == 0);
1455 	 break;
1456 
1457       case VG_TRC_FAULT_SIGNAL:
1458 	 /* Everything should be set up (either we're exiting, or
1459 	    about to start in a signal handler). */
1460 	 break;
1461 
1462       case VEX_TRC_JMP_MAPFAIL:
1463          /* Failure of arch-specific address translation (x86/amd64
1464             segment override use) */
1465          /* jrs 2005 03 11: is this correct? */
1466          VG_(synth_fault)(tid);
1467          break;
1468 
1469       case VEX_TRC_JMP_EMWARN: {
1470          static Int  counts[EmNote_NUMBER];
1471          static Bool counts_initted = False;
1472          VexEmNote ew;
1473          const HChar* what;
1474          Bool      show;
1475          Int       q;
1476          if (!counts_initted) {
1477             counts_initted = True;
1478             for (q = 0; q < EmNote_NUMBER; q++)
1479                counts[q] = 0;
1480          }
1481          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1482          what = (ew < 0 || ew >= EmNote_NUMBER)
1483                    ? "unknown (?!)"
1484                    : LibVEX_EmNote_string(ew);
1485          show = (ew < 0 || ew >= EmNote_NUMBER)
1486                    ? True
1487                    : counts[ew]++ < 3;
1488          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1489             VG_(message)( Vg_UserMsg,
1490                           "Emulation warning: unsupported action:\n");
1491             VG_(message)( Vg_UserMsg, "  %s\n", what);
1492             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1493          }
1494          break;
1495       }
1496 
1497       case VEX_TRC_JMP_EMFAIL: {
1498          VexEmNote ew;
1499          const HChar* what;
1500          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1501          what = (ew < 0 || ew >= EmNote_NUMBER)
1502                    ? "unknown (?!)"
1503                    : LibVEX_EmNote_string(ew);
1504          VG_(message)( Vg_UserMsg,
1505                        "Emulation fatal error -- Valgrind cannot continue:\n");
1506          VG_(message)( Vg_UserMsg, "  %s\n", what);
1507          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1508          VG_(message)(Vg_UserMsg, "\n");
1509          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1510          VG_(message)(Vg_UserMsg, "\n");
1511          VG_(exit)(1);
1512          break;
1513       }
1514 
1515       case VEX_TRC_JMP_SIGILL:
1516          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1517          break;
1518 
1519       case VEX_TRC_JMP_SIGTRAP:
1520          VG_(synth_sigtrap)(tid);
1521          break;
1522 
1523       case VEX_TRC_JMP_SIGSEGV:
1524          VG_(synth_fault)(tid);
1525          break;
1526 
1527       case VEX_TRC_JMP_SIGBUS:
1528          VG_(synth_sigbus)(tid);
1529          break;
1530 
1531       case VEX_TRC_JMP_SIGFPE_INTDIV:
1532          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1533          break;
1534 
1535       case VEX_TRC_JMP_SIGFPE_INTOVF:
1536          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1537          break;
1538 
1539       case VEX_TRC_JMP_NODECODE: {
1540          Addr addr = VG_(get_IP)(tid);
1541 
1542          if (VG_(clo_sigill_diag)) {
1543             VG_(umsg)(
1544                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1545             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1546 #        define M(a) VG_(umsg)(a "\n");
1547          M("Your program just tried to execute an instruction that Valgrind" );
1548          M("did not recognise.  There are two possible reasons for this."    );
1549          M("1. Your program has a bug and erroneously jumped to a non-code"  );
1550          M("   location.  If you are running Memcheck and you just saw a"    );
1551          M("   warning about a bad jump, it's probably your program's fault.");
1552          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1553          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1554          M("   you are not sure, please let us know and we'll try to fix it.");
1555          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1556          M("probably kill your program."                                     );
1557 #        undef M
1558          }
1559 #        if defined(VGA_s390x)
1560          /* Now that the complaint is out we need to adjust the guest_IA. The
1561             reason is that -- after raising the exception -- execution will
1562             continue with the insn that follows the invalid insn. As the first
1563             2 bits of the invalid insn determine its length in the usual way,
1564             we can compute the address of the next insn here and adjust the
1565             guest_IA accordingly. This adjustment is essential and tested by
1566             none/tests/s390x/op_exception.c (which would loop forever
1567             otherwise) */
1568          UChar byte = ((UChar *)addr)[0];
1569          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1570          Addr  next_insn_addr = addr + insn_length;
1571          VG_(set_IP)(tid, next_insn_addr);
1572 #        endif
1573          VG_(synth_sigill)(tid, addr);
1574          break;
1575       }
1576 
1577       case VEX_TRC_JMP_INVALICACHE:
1578          VG_(discard_translations)(
1579             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1580             VG_(threads)[tid].arch.vex.guest_CMLEN,
1581             "scheduler(VEX_TRC_JMP_INVALICACHE)"
1582          );
1583          if (0)
1584             VG_(printf)("dump translations done.\n");
1585          break;
1586 
1587       case VEX_TRC_JMP_FLUSHDCACHE: {
1588          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
1589          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
1590          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1591          VG_(flush_dcache)(start, len);
1592          break;
1593       }
1594 
1595       case VG_TRC_INVARIANT_FAILED:
1596          /* This typically happens if, after running generated code,
1597             it is detected that host CPU settings (eg, FPU/Vector
1598             control words) are not as they should be.  Vex's code
1599             generation specifies the state such control words should
1600             be in on entry to Vex-generated code, and they should be
1601             unchanged on exit from it.  Failure of this assertion
1602             usually means a bug in Vex's code generation. */
1603          //{ UInt xx;
1604          //  __asm__ __volatile__ (
1605          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1606          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1607          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1608          //}
1609          vg_assert2(0, "VG_(scheduler), phase 3: "
1610                        "run_innerloop detected host "
1611                        "state invariant failure", trc);
1612 
1613       case VEX_TRC_JMP_SYS_SYSENTER:
1614          /* Do whatever simulation is appropriate for an x86 sysenter
1615             instruction.  Note that it is critical to set this thread's
1616             guest_EIP to point at the code to execute after the
1617             sysenter, since Vex-generated code will not have set it --
1618             vex does not know what it should be.  Vex sets the next
1619             address to zero, so if you don't set guest_EIP, the thread
1620             will jump to zero afterwards and probably die as a result. */
1621 #        if defined(VGP_x86_linux)
1622          vg_assert2(0, "VG_(scheduler), phase 3: "
1623                        "sysenter_x86 on x86-linux is not supported");
1624 #        elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
1625          /* return address in client edx */
1626          VG_(threads)[tid].arch.vex.guest_EIP
1627             = VG_(threads)[tid].arch.vex.guest_EDX;
1628          handle_syscall(tid, trc[0]);
1629 #        else
1630          vg_assert2(0, "VG_(scheduler), phase 3: "
1631                        "sysenter_x86 on non-x86 platform?!?!");
1632 #        endif
1633          break;
1634 
1635       default:
1636 	 vg_assert2(0, "VG_(scheduler), phase 3: "
1637                        "unexpected thread return code (%u)", trc[0]);
1638 	 /* NOTREACHED */
1639 	 break;
1640 
1641       } /* switch (trc) */
1642 
1643       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1644          maybe_show_sb_profile();
1645    }
1646 
1647    if (VG_(clo_trace_sched))
1648       print_sched_event(tid, "exiting VG_(scheduler)");
1649 
1650    vg_assert(VG_(is_exiting)(tid));
1651 
1652    return tst->exitreason;
1653 }
1654 
1655 
1656 /*
1657    This causes all threads to forceably exit.  They aren't actually
1658    dead by the time this returns; you need to call
1659    VG_(reap_threads)() to wait for them.
1660  */
VG_(nuke_all_threads_except)1661 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1662 {
1663    ThreadId tid;
1664 
1665    vg_assert(VG_(is_running_thread)(me));
1666 
1667    for (tid = 1; tid < VG_N_THREADS; tid++) {
1668       if (tid == me
1669           || VG_(threads)[tid].status == VgTs_Empty)
1670          continue;
1671       if (0)
1672          VG_(printf)(
1673             "VG_(nuke_all_threads_except): nuking tid %u\n", tid);
1674 
1675       VG_(threads)[tid].exitreason = src;
1676       if (src == VgSrc_FatalSig)
1677          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1678       VG_(get_thread_out_of_syscall)(tid);
1679    }
1680 }
1681 
1682 
1683 /* ---------------------------------------------------------------------
1684    Specifying shadow register values
1685    ------------------------------------------------------------------ */
1686 
1687 #if defined(VGA_x86)
1688 #  define VG_CLREQ_ARGS       guest_EAX
1689 #  define VG_CLREQ_RET        guest_EDX
1690 #elif defined(VGA_amd64)
1691 #  define VG_CLREQ_ARGS       guest_RAX
1692 #  define VG_CLREQ_RET        guest_RDX
1693 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1694 #  define VG_CLREQ_ARGS       guest_GPR4
1695 #  define VG_CLREQ_RET        guest_GPR3
1696 #elif defined(VGA_arm)
1697 #  define VG_CLREQ_ARGS       guest_R4
1698 #  define VG_CLREQ_RET        guest_R3
1699 #elif defined(VGA_arm64)
1700 #  define VG_CLREQ_ARGS       guest_X4
1701 #  define VG_CLREQ_RET        guest_X3
1702 #elif defined (VGA_s390x)
1703 #  define VG_CLREQ_ARGS       guest_r2
1704 #  define VG_CLREQ_RET        guest_r3
1705 #elif defined(VGA_mips32) || defined(VGA_mips64)
1706 #  define VG_CLREQ_ARGS       guest_r12
1707 #  define VG_CLREQ_RET        guest_r11
1708 #elif defined(VGA_tilegx)
1709 #  define VG_CLREQ_ARGS       guest_r12
1710 #  define VG_CLREQ_RET        guest_r11
1711 #else
1712 #  error Unknown arch
1713 #endif
1714 
1715 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1716 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1717 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1718 
1719 // These macros write a value to a client's thread register, and tell the
1720 // tool that it's happened (if necessary).
1721 
1722 #define SET_CLREQ_RETVAL(zztid, zzval) \
1723    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1724         VG_TRACK( post_reg_write, \
1725                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1726    } while (0)
1727 
1728 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1729    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1730         VG_TRACK( post_reg_write_clientcall_return, \
1731                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1732    } while (0)
1733 
1734 
1735 /* ---------------------------------------------------------------------
1736    Handle client requests.
1737    ------------------------------------------------------------------ */
1738 
1739 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1740 static Bool os_client_request(ThreadId tid, UWord *args)
1741 {
1742    Bool handled = True;
1743 
1744    vg_assert(VG_(is_running_thread)(tid));
1745 
1746    switch(args[0]) {
1747    case VG_USERREQ__LIBC_FREERES_DONE:
1748       /* This is equivalent to an exit() syscall, but we don't set the
1749 	 exitcode (since it might already be set) */
1750       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1751          VG_(message)(Vg_DebugMsg,
1752                       "__libc_freeres() done; really quitting!\n");
1753       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1754       break;
1755 
1756    default:
1757       handled = False;
1758       break;
1759    }
1760 
1761    return handled;
1762 }
1763 
1764 
1765 /* Write out a client message, possibly including a back trace. Return
1766    the number of characters written. In case of XML output, the format
1767    string as well as any arguments it requires will be XML'ified.
1768    I.e. special characters such as the angle brackets will be translated
1769    into proper escape sequences. */
1770 static
print_client_message(ThreadId tid,const HChar * format,va_list * vargsp,Bool include_backtrace)1771 Int print_client_message( ThreadId tid, const HChar *format,
1772                           va_list *vargsp, Bool include_backtrace)
1773 {
1774    Int count;
1775 
1776    if (VG_(clo_xml)) {
1777       /* Translate the format string as follows:
1778          <  -->  &lt;
1779          >  -->  &gt;
1780          &  -->  &amp;
1781          %s -->  %pS
1782          Yes, yes, it's simplified but in synch with
1783          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1784       */
1785 
1786       /* Allocate a buffer that is for sure large enough. */
1787       HChar xml_format[VG_(strlen)(format) * 5 + 1];
1788 
1789       const HChar *p;
1790       HChar *q = xml_format;
1791 
1792       for (p = format; *p; ++p) {
1793          switch (*p) {
1794          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
1795          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
1796          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
1797          case '%':
1798             /* Careful: make sure %%s stays %%s */
1799             *q++ = *p++;
1800             if (*p == 's') {
1801               *q++ = 'p';
1802               *q++ = 'S';
1803             } else {
1804               *q++ = *p;
1805             }
1806             break;
1807 
1808          default:
1809             *q++ = *p;
1810             break;
1811          }
1812       }
1813       *q = '\0';
1814 
1815       VG_(printf_xml)( "<clientmsg>\n" );
1816       VG_(printf_xml)( "  <tid>%u</tid>\n", tid );
1817       const ThreadState *tst = VG_(get_ThreadState)(tid);
1818       if (tst->thread_name)
1819          VG_(printf_xml)("  <threadname>%s</threadname>\n", tst->thread_name);
1820       VG_(printf_xml)( "  <text>" );
1821       count = VG_(vprintf_xml)( xml_format, *vargsp );
1822       VG_(printf_xml)( "  </text>\n" );
1823    } else {
1824       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1825       VG_(message_flush)();
1826    }
1827 
1828    if (include_backtrace)
1829       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1830 
1831    if (VG_(clo_xml))
1832       VG_(printf_xml)( "</clientmsg>\n" );
1833 
1834    return count;
1835 }
1836 
1837 
1838 /* Do a client request for the thread tid.  After the request, tid may
1839    or may not still be runnable; if not, the scheduler will have to
1840    choose a new thread to run.
1841 */
1842 static
do_client_request(ThreadId tid)1843 void do_client_request ( ThreadId tid )
1844 {
1845    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1846    UWord req_no = arg[0];
1847 
1848    if (0)
1849       VG_(printf)("req no = 0x%lx, arg = %p\n", req_no, arg);
1850    switch (req_no) {
1851 
1852       case VG_USERREQ__CLIENT_CALL0: {
1853          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1854 	 if (f == NULL)
1855 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1856 	 else
1857 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1858          break;
1859       }
1860       case VG_USERREQ__CLIENT_CALL1: {
1861          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1862 	 if (f == NULL)
1863 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1864 	 else
1865 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1866          break;
1867       }
1868       case VG_USERREQ__CLIENT_CALL2: {
1869          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1870 	 if (f == NULL)
1871 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1872 	 else
1873 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1874          break;
1875       }
1876       case VG_USERREQ__CLIENT_CALL3: {
1877          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1878 	 if (f == NULL)
1879 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1880 	 else
1881 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1882          break;
1883       }
1884 
1885       // Nb: this looks like a circular definition, because it kind of is.
1886       // See comment in valgrind.h to understand what's going on.
1887       case VG_USERREQ__RUNNING_ON_VALGRIND:
1888          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1889          break;
1890 
1891       case VG_USERREQ__PRINTF: {
1892          const HChar* format = (HChar *)arg[1];
1893          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1894             _VALIST_BY_REF version instead */
1895          if (sizeof(va_list) != sizeof(UWord))
1896             goto va_list_casting_error_NORETURN;
1897          union {
1898             va_list vargs;
1899             unsigned long uw;
1900          } u;
1901          u.uw = (unsigned long)arg[2];
1902          Int count =
1903             print_client_message( tid, format, &u.vargs,
1904                                   /* include_backtrace */ False );
1905          SET_CLREQ_RETVAL( tid, count );
1906          break;
1907       }
1908 
1909       case VG_USERREQ__PRINTF_BACKTRACE: {
1910          const HChar* format = (HChar *)arg[1];
1911          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1912             _VALIST_BY_REF version instead */
1913          if (sizeof(va_list) != sizeof(UWord))
1914             goto va_list_casting_error_NORETURN;
1915          union {
1916             va_list vargs;
1917             unsigned long uw;
1918          } u;
1919          u.uw = (unsigned long)arg[2];
1920          Int count =
1921             print_client_message( tid, format, &u.vargs,
1922                                   /* include_backtrace */ True );
1923          SET_CLREQ_RETVAL( tid, count );
1924          break;
1925       }
1926 
1927       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1928          const HChar* format = (HChar *)arg[1];
1929          va_list* vargsp = (va_list*)arg[2];
1930          Int count =
1931             print_client_message( tid, format, vargsp,
1932                                   /* include_backtrace */ False );
1933 
1934          SET_CLREQ_RETVAL( tid, count );
1935          break;
1936       }
1937 
1938       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1939          const HChar* format = (HChar *)arg[1];
1940          va_list* vargsp = (va_list*)arg[2];
1941          Int count =
1942             print_client_message( tid, format, vargsp,
1943                                   /* include_backtrace */ True );
1944          SET_CLREQ_RETVAL( tid, count );
1945          break;
1946       }
1947 
1948       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1949          va_list* vargsp = (va_list*)arg[2];
1950          Int count =
1951             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
1952          VG_(message_flush)();
1953          SET_CLREQ_RETVAL( tid, count );
1954          break;
1955       }
1956 
1957       case VG_USERREQ__ADD_IFUNC_TARGET: {
1958          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1959          SET_CLREQ_RETVAL( tid, 0);
1960          break; }
1961 
1962       case VG_USERREQ__STACK_REGISTER: {
1963          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1964          SET_CLREQ_RETVAL( tid, sid );
1965          break; }
1966 
1967       case VG_USERREQ__STACK_DEREGISTER: {
1968          VG_(deregister_stack)(arg[1]);
1969          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1970          break; }
1971 
1972       case VG_USERREQ__STACK_CHANGE: {
1973          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1974          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1975          break; }
1976 
1977       case VG_USERREQ__GET_MALLOCFUNCS: {
1978 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1979 
1980 	 info->tl_malloc               = VG_(tdict).tool_malloc;
1981 	 info->tl_calloc               = VG_(tdict).tool_calloc;
1982 	 info->tl_realloc              = VG_(tdict).tool_realloc;
1983 	 info->tl_memalign             = VG_(tdict).tool_memalign;
1984 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1985 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1986 	 info->tl_free                 = VG_(tdict).tool_free;
1987 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1988 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1989          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1990 
1991 	 info->mallinfo                = VG_(mallinfo);
1992 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
1993 
1994          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1995 
1996 	 break;
1997       }
1998 
1999       /* Requests from the client program */
2000 
2001       case VG_USERREQ__DISCARD_TRANSLATIONS:
2002          if (VG_(clo_verbosity) > 2)
2003             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
2004                          " addr %p,  len %lu\n",
2005                          (void*)arg[1], arg[2] );
2006 
2007          VG_(discard_translations)(
2008             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
2009          );
2010 
2011          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2012 	 break;
2013 
2014       case VG_USERREQ__COUNT_ERRORS:
2015          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
2016          break;
2017 
2018       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
2019          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
2020          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2021          break;
2022 
2023       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
2024          Addr   ip    = arg[1];
2025          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
2026          const HChar *buf;  // points to a string of unknown size
2027 
2028          VG_(memset)(buf64, 0, 64);
2029          UInt linenum = 0;
2030          Bool ok = VG_(get_filename_linenum)(
2031                       ip, &buf, NULL, &linenum
2032                    );
2033          if (ok) {
2034             /* For backward compatibility truncate the filename to
2035                49 characters. */
2036             VG_(strncpy)(buf64, buf, 50);
2037             buf64[49] = '\0';
2038             UInt i;
2039             for (i = 0; i < 50; i++) {
2040                if (buf64[i] == 0)
2041                   break;
2042             }
2043             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
2044          } else {
2045             buf64[0] = 0;
2046          }
2047 
2048          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2049          break;
2050       }
2051 
2052       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2053          Word delta = arg[1];
2054          vg_assert(delta == 1 || delta == -1);
2055          ThreadState* tst = VG_(get_ThreadState)(tid);
2056          vg_assert(tst);
2057          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2058             tst->err_disablement_level++;
2059          }
2060          else
2061          if (delta == -1 && tst->err_disablement_level > 0) {
2062             tst->err_disablement_level--;
2063          }
2064          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2065          break;
2066       }
2067 
2068       case VG_USERREQ__GDB_MONITOR_COMMAND: {
2069          UWord ret;
2070          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2071          SET_CLREQ_RETVAL(tid, ret);
2072          break;
2073       }
2074 
2075       case VG_USERREQ__MALLOCLIKE_BLOCK:
2076       case VG_USERREQ__RESIZEINPLACE_BLOCK:
2077       case VG_USERREQ__FREELIKE_BLOCK:
2078          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
2079          if (!arg[1]) {
2080             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2081             break;
2082          } else {
2083             goto my_default;
2084          }
2085 
2086       case VG_USERREQ__VEX_INIT_FOR_IRI:
2087          LibVEX_InitIRI ( (IRICB *)arg[1] );
2088          break;
2089 
2090       default:
2091        my_default:
2092 	 if (os_client_request(tid, arg)) {
2093 	    // do nothing, os_client_request() handled it
2094          } else if (VG_(needs).client_requests) {
2095 	    UWord ret;
2096 
2097             if (VG_(clo_verbosity) > 2)
2098                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
2099                            arg[0], (void*)arg[1], arg[2] );
2100 
2101 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2102 	       SET_CLREQ_RETVAL(tid, ret);
2103          } else {
2104 	    static Bool whined = False;
2105 
2106 	    if (!whined && VG_(clo_verbosity) > 2) {
2107                // Allow for requests in core, but defined by tools, which
2108                // have 0 and 0 in their two high bytes.
2109                HChar c1 = (arg[0] >> 24) & 0xff;
2110                HChar c2 = (arg[0] >> 16) & 0xff;
2111                if (c1 == 0) c1 = '_';
2112                if (c2 == 0) c2 = '_';
2113 	       VG_(message)(Vg_UserMsg, "Warning:\n"
2114                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
2115 		   "  VG_(needs).client_requests should be set?\n",
2116 			    arg[0], c1, c2, arg[0] & 0xffff);
2117 	       whined = True;
2118 	    }
2119          }
2120          break;
2121    }
2122    return;
2123 
2124    /*NOTREACHED*/
2125   va_list_casting_error_NORETURN:
2126    VG_(umsg)(
2127       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2128       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2129       "on a platform where they cannot be supported.  Please use the\n"
2130       "equivalent _VALIST_BY_REF versions instead.\n"
2131       "\n"
2132       "This is a binary-incompatible change in Valgrind's client request\n"
2133       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
2134       "are expected to almost never see this message.  The only case in\n"
2135       "which you might see this message is if your code uses the macros\n"
2136       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
2137       "to recompile such code, using the header files from this version of\n"
2138       "Valgrind, and not any previous version.\n"
2139       "\n"
2140       "If you see this mesage in any other circumstances, it is probably\n"
2141       "a bug in Valgrind.  In this case, please file a bug report at\n"
2142       "\n"
2143       "   http://www.valgrind.org/support/bug_reports.html\n"
2144       "\n"
2145       "Will now abort.\n"
2146    );
2147    vg_assert(0);
2148 }
2149 
2150 
2151 /* ---------------------------------------------------------------------
2152    Sanity checking (permanently engaged)
2153    ------------------------------------------------------------------ */
2154 
2155 /* Internal consistency checks on the sched structures. */
2156 static
scheduler_sanity(ThreadId tid)2157 void scheduler_sanity ( ThreadId tid )
2158 {
2159    Bool bad = False;
2160    Int lwpid = VG_(gettid)();
2161 
2162    if (!VG_(is_running_thread)(tid)) {
2163       VG_(message)(Vg_DebugMsg,
2164 		   "Thread %u is supposed to be running, "
2165                    "but doesn't own the_BigLock (owned by %u)\n",
2166 		   tid, VG_(running_tid));
2167       bad = True;
2168    }
2169 
2170    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2171       VG_(message)(Vg_DebugMsg,
2172                    "Thread %u supposed to be in LWP %d, but we're actually %d\n",
2173                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2174       bad = True;
2175    }
2176 
2177    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2178       VG_(message)(Vg_DebugMsg,
2179                    "Thread (LWPID) %u doesn't own the_BigLock\n",
2180                    tid);
2181       bad = True;
2182    }
2183 
2184    if (0) {
2185       /* Periodically show the state of all threads, for debugging
2186          purposes. */
2187       static UInt lasttime = 0;
2188       UInt now;
2189       now = VG_(read_millisecond_timer)();
2190       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2191          lasttime = now;
2192          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2193                      (Int)now);
2194          VG_(show_sched_status)(True,  // host_stacktrace
2195                                 True,  // stack_usage
2196                                 True); // exited_threads);
2197       }
2198    }
2199 
2200    /* core_panic also shows the sched status, which is why we don't
2201       show it above if bad==True. */
2202    if (bad)
2203       VG_(core_panic)("scheduler_sanity: failed");
2204 }
2205 
VG_(sanity_check_general)2206 void VG_(sanity_check_general) ( Bool force_expensive )
2207 {
2208    ThreadId tid;
2209 
2210    static UInt next_slow_check_at = 1;
2211    static UInt slow_check_interval = 25;
2212 
2213    if (VG_(clo_sanity_level) < 1) return;
2214 
2215    /* --- First do all the tests that we can do quickly. ---*/
2216 
2217    sanity_fast_count++;
2218 
2219    /* Check stuff pertaining to the memory check system. */
2220 
2221    /* Check that nobody has spuriously claimed that the first or
2222       last 16 pages of memory have become accessible [...] */
2223    if (VG_(needs).sanity_checks) {
2224       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2225    }
2226 
2227    /* --- Now some more expensive checks. ---*/
2228 
2229    /* Once every now and again, check some more expensive stuff.
2230       Gradually increase the interval between such checks so as not to
2231       burden long-running programs too much. */
2232    if ( force_expensive
2233         || VG_(clo_sanity_level) > 1
2234         || (VG_(clo_sanity_level) == 1
2235             && sanity_fast_count == next_slow_check_at)) {
2236 
2237       if (0) VG_(printf)("SLOW at %u\n", sanity_fast_count-1);
2238 
2239       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2240       slow_check_interval++;
2241       sanity_slow_count++;
2242 
2243       if (VG_(needs).sanity_checks) {
2244           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2245       }
2246 
2247       /* Look for stack overruns.  Visit all threads. */
2248       for (tid = 1; tid < VG_N_THREADS; tid++) {
2249 	 SizeT    remains;
2250          VgStack* stack;
2251 
2252 	 if (VG_(threads)[tid].status == VgTs_Empty ||
2253 	     VG_(threads)[tid].status == VgTs_Zombie)
2254 	    continue;
2255 
2256          stack
2257             = (VgStack*)
2258               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2259          SizeT limit
2260             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2261 	 remains
2262             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2263 	 if (remains < limit)
2264 	    VG_(message)(Vg_DebugMsg,
2265                          "WARNING: Thread %u is within %lu bytes "
2266                          "of running out of valgrind stack!\n"
2267                          "Valgrind stack size can be increased "
2268                          "using --valgrind-stacksize=....\n",
2269 		         tid, remains);
2270       }
2271    }
2272 
2273    if (VG_(clo_sanity_level) > 1) {
2274       /* Check sanity of the low-level memory manager.  Note that bugs
2275          in the client's code can cause this to fail, so we don't do
2276          this check unless specially asked for.  And because it's
2277          potentially very expensive. */
2278       VG_(sanity_check_malloc_all)();
2279    }
2280 }
2281 
2282 /*--------------------------------------------------------------------*/
2283 /*--- end                                                          ---*/
2284 /*--------------------------------------------------------------------*/
2285