1 
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling.                               scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2013 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /*
32    Overview
33 
34    Valgrind tries to emulate the kernel's threading as closely as
35    possible.  The client does all threading via the normal syscalls
36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
37    the same process structure as would be created without Valgrind.
38    There are no extra threads.
39 
40    The main difference is that Valgrind only allows one client thread
41    to run at once.  This is controlled with the CPU Big Lock,
42    "the_BigLock".  Any time a thread wants to run client code or
43    manipulate any shared state (which is anything other than its own
44    ThreadState entry), it must hold the_BigLock.
45 
46    When a thread is about to block in a blocking syscall, it releases
47    the_BigLock, and re-takes it when it becomes runnable again (either
48    because the syscall finished, or we took a signal).
49 
50    VG_(scheduler) therefore runs in each thread.  It returns only when
51    the thread is exiting, either because it exited itself, or it was
52    told to exit by another thread.
53 
54    This file is almost entirely OS-independent.  The details of how
55    the OS handles threading and signalling are abstracted away and
56    implemented elsewhere.  [Some of the functions have worked their
57    way back for the moment, until we do an OS port in earnest...]
58 */
59 
60 
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
65 #include "pub_core_threadstate.h"
66 #include "pub_core_clientstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h"      // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_sbprofile.h"
85 #include "pub_core_signals.h"
86 #include "pub_core_stacks.h"
87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
88 #include "pub_core_syscall.h"
89 #include "pub_core_syswrap.h"
90 #include "pub_core_tooliface.h"
91 #include "pub_core_translate.h"     // For VG_(translate)()
92 #include "pub_core_transtab.h"
93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
94 #include "priv_sched-lock.h"
95 #include "pub_core_scheduler.h"     // self
96 #include "pub_core_redir.h"
97 #include "libvex_emnote.h"          // VexEmNote
98 
99 
100 /* ---------------------------------------------------------------------
101    Types and globals for the scheduler.
102    ------------------------------------------------------------------ */
103 
104 /* ThreadId and ThreadState are defined elsewhere*/
105 
106 /* Defines the thread-scheduling timeslice, in terms of the number of
107    basic blocks we attempt to run each thread for.  Smaller values
108    give finer interleaving but much increased scheduling overheads. */
109 #define SCHEDULING_QUANTUM   100000
110 
111 /* If False, a fault is Valgrind-internal (ie, a bug) */
112 Bool VG_(in_generated_code) = False;
113 
114 /* 64-bit counter for the number of basic blocks done. */
115 static ULong bbs_done = 0;
116 
117 /* Counter to see if vgdb activity is to be verified.
118    When nr of bbs done reaches vgdb_next_poll, scheduler will
119    poll for gdbserver activity. VG_(force_vgdb_poll) and
120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
121    to control when the next poll will be done. */
122 static ULong vgdb_next_poll;
123 
124 /* Forwards */
125 static void do_client_request ( ThreadId tid );
126 static void scheduler_sanity ( ThreadId tid );
127 static void mostly_clear_thread_record ( ThreadId tid );
128 
129 /* Stats. */
130 static ULong n_scheduling_events_MINOR = 0;
131 static ULong n_scheduling_events_MAJOR = 0;
132 
133 /* Stats: number of XIndirs, and number that missed in the fast
134    cache. */
135 static ULong stats__n_xindirs = 0;
136 static ULong stats__n_xindir_misses = 0;
137 
138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
139    have to do 64 bit incs on the hot path through
140    VG_(cp_disp_xindir). */
141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
143 
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count = 0;
146 static UInt sanity_slow_count = 0;
147 
VG_(print_scheduler_stats)148 void VG_(print_scheduler_stats)(void)
149 {
150    VG_(message)(Vg_DebugMsg,
151       "scheduler: %'llu event checks.\n", bbs_done );
152    VG_(message)(Vg_DebugMsg,
153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
154                 stats__n_xindirs, stats__n_xindir_misses,
155                 stats__n_xindirs / (stats__n_xindir_misses
156                                     ? stats__n_xindir_misses : 1));
157    VG_(message)(Vg_DebugMsg,
158       "scheduler: %'llu/%'llu major/minor sched events.\n",
159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
160    VG_(message)(Vg_DebugMsg,
161                 "   sanity: %d cheap, %d expensive checks.\n",
162                 sanity_fast_count, sanity_slow_count );
163 }
164 
165 /*
166  * Mutual exclusion object used to serialize threads.
167  */
168 static struct sched_lock *the_BigLock;
169 
170 
171 /* ---------------------------------------------------------------------
172    Helper functions for the scheduler.
173    ------------------------------------------------------------------ */
174 
175 static
print_sched_event(ThreadId tid,const HChar * what)176 void print_sched_event ( ThreadId tid, const HChar* what )
177 {
178    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
179 }
180 
181 /* For showing SB profiles, if the user asks to see them. */
182 static
maybe_show_sb_profile(void)183 void maybe_show_sb_profile ( void )
184 {
185    /* DO NOT MAKE NON-STATIC */
186    static ULong bbs_done_lastcheck = 0;
187    /* */
188    vg_assert(VG_(clo_profyle_interval) > 0);
189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
190    vg_assert(delta >= 0);
191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
192       bbs_done_lastcheck = bbs_done;
193       VG_(get_and_show_SB_profile)(bbs_done);
194    }
195 }
196 
197 static
name_of_sched_event(UInt event)198 const HChar* name_of_sched_event ( UInt event )
199 {
200    switch (event) {
201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
208       case VEX_TRC_JMP_SIGFPE_INTOVF:
209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
213       case VEX_TRC_JMP_YIELD:          return "YIELD";
214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
221       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
222       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
223 
224       case VG_TRC_BORING:              return "VG_BORING";
225       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
226       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
227       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
228       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
229       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
230       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
231       default:                         return "??UNKNOWN??";
232   }
233 }
234 
235 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)236 ThreadId VG_(alloc_ThreadState) ( void )
237 {
238    Int i;
239    for (i = 1; i < VG_N_THREADS; i++) {
240       if (VG_(threads)[i].status == VgTs_Empty) {
241 	 VG_(threads)[i].status = VgTs_Init;
242 	 VG_(threads)[i].exitreason = VgSrc_None;
243          if (VG_(threads)[i].thread_name)
244             VG_(free)(VG_(threads)[i].thread_name);
245          VG_(threads)[i].thread_name = NULL;
246          return i;
247       }
248    }
249    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
250                "and rerun valgrind\n");
251    VG_(core_panic)("Max number of threads is too low");
252    /*NOTREACHED*/
253 }
254 
255 /*
256    Mark a thread as Runnable.  This will block until the_BigLock is
257    available, so that we get exclusive access to all the shared
258    structures and the CPU.  Up until we get the_BigLock, we must not
259    touch any shared state.
260 
261    When this returns, we'll actually be running.
262  */
VG_(acquire_BigLock)263 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
264 {
265    ThreadState *tst;
266 
267 #if 0
268    if (VG_(clo_trace_sched)) {
269       HChar buf[VG_(strlen)(who) + 30];
270       VG_(sprintf)(buf, "waiting for lock (%s)", who);
271       print_sched_event(tid, buf);
272    }
273 #endif
274 
275    /* First, acquire the_BigLock.  We can't do anything else safely
276       prior to this point.  Even doing debug printing prior to this
277       point is, technically, wrong. */
278    VG_(acquire_BigLock_LL)(NULL);
279 
280    tst = VG_(get_ThreadState)(tid);
281 
282    vg_assert(tst->status != VgTs_Runnable);
283 
284    tst->status = VgTs_Runnable;
285 
286    if (VG_(running_tid) != VG_INVALID_THREADID)
287       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
288    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
289    VG_(running_tid) = tid;
290 
291    { Addr gsp = VG_(get_SP)(tid);
292       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
293          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
294       else
295          VG_(unknown_SP_update)(gsp, gsp);
296    }
297 
298    if (VG_(clo_trace_sched)) {
299       HChar buf[VG_(strlen)(who) + 30];
300       VG_(sprintf)(buf, " acquired lock (%s)", who);
301       print_sched_event(tid, buf);
302    }
303 }
304 
305 /*
306    Set a thread into a sleeping state, and give up exclusive access to
307    the CPU.  On return, the thread must be prepared to block until it
308    is ready to run again (generally this means blocking in a syscall,
309    but it may mean that we remain in a Runnable state and we're just
310    yielding the CPU to another thread).
311  */
VG_(release_BigLock)312 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
313                           const HChar* who)
314 {
315    ThreadState *tst = VG_(get_ThreadState)(tid);
316 
317    vg_assert(tst->status == VgTs_Runnable);
318 
319    vg_assert(sleepstate == VgTs_WaitSys ||
320 	     sleepstate == VgTs_Yielding);
321 
322    tst->status = sleepstate;
323 
324    vg_assert(VG_(running_tid) == tid);
325    VG_(running_tid) = VG_INVALID_THREADID;
326 
327    if (VG_(clo_trace_sched)) {
328       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
329       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
330       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
331       print_sched_event(tid, buf);
332    }
333 
334    /* Release the_BigLock; this will reschedule any runnable
335       thread. */
336    VG_(release_BigLock_LL)(NULL);
337 }
338 
init_BigLock(void)339 static void init_BigLock(void)
340 {
341    vg_assert(!the_BigLock);
342    the_BigLock = ML_(create_sched_lock)();
343 }
344 
deinit_BigLock(void)345 static void deinit_BigLock(void)
346 {
347    ML_(destroy_sched_lock)(the_BigLock);
348    the_BigLock = NULL;
349 }
350 
351 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)352 void VG_(acquire_BigLock_LL) ( const HChar* who )
353 {
354    ML_(acquire_sched_lock)(the_BigLock);
355 }
356 
357 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)358 void VG_(release_BigLock_LL) ( const HChar* who )
359 {
360    ML_(release_sched_lock)(the_BigLock);
361 }
362 
VG_(owns_BigLock_LL)363 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
364 {
365    return (ML_(get_sched_lock_owner)(the_BigLock)
366            == VG_(threads)[tid].os_state.lwpid);
367 }
368 
369 
370 /* Clear out the ThreadState and release the semaphore. Leaves the
371    ThreadState in VgTs_Zombie state, so that it doesn't get
372    reallocated until the caller is really ready. */
VG_(exit_thread)373 void VG_(exit_thread)(ThreadId tid)
374 {
375    vg_assert(VG_(is_valid_tid)(tid));
376    vg_assert(VG_(is_running_thread)(tid));
377    vg_assert(VG_(is_exiting)(tid));
378 
379    mostly_clear_thread_record(tid);
380    VG_(running_tid) = VG_INVALID_THREADID;
381 
382    /* There should still be a valid exitreason for this thread */
383    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
384 
385    if (VG_(clo_trace_sched))
386       print_sched_event(tid, "release lock in VG_(exit_thread)");
387 
388    VG_(release_BigLock_LL)(NULL);
389 }
390 
391 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
392    out of the syscall and onto doing the next thing, whatever that is.
393    If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)394 void VG_(get_thread_out_of_syscall)(ThreadId tid)
395 {
396    vg_assert(VG_(is_valid_tid)(tid));
397    vg_assert(!VG_(is_running_thread)(tid));
398 
399    if (VG_(threads)[tid].status == VgTs_WaitSys) {
400       if (VG_(clo_trace_signals)) {
401 	 VG_(message)(Vg_DebugMsg,
402                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
403 		      tid, VG_(threads)[tid].os_state.lwpid);
404       }
405 #     if defined(VGO_darwin)
406       {
407          // GrP fixme use mach primitives on darwin?
408          // GrP fixme thread_abort_safely?
409          // GrP fixme race for thread with WaitSys set but not in syscall yet?
410          extern kern_return_t thread_abort(mach_port_t);
411          thread_abort(VG_(threads)[tid].os_state.lwpid);
412       }
413 #     else
414       {
415          __attribute__((unused))
416          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
417          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
418             I'm really not sure.  Here's a race scenario which argues
419             that we shoudn't; but equally I'm not sure the scenario is
420             even possible, because of constraints caused by the question
421             of who holds the BigLock when.
422 
423             Target thread tid does sys_read on a socket and blocks.  This
424             function gets called, and we observe correctly that tid's
425             status is WaitSys but then for whatever reason this function
426             goes very slowly for a while.  Then data arrives from
427             wherever, tid's sys_read returns, tid exits.  Then we do
428             tkill on tid, but tid no longer exists; tkill returns an
429             error code and the assert fails. */
430          /* vg_assert(r == 0); */
431       }
432 #     endif
433    }
434 }
435 
436 /*
437    Yield the CPU for a short time to let some other thread run.
438  */
VG_(vg_yield)439 void VG_(vg_yield)(void)
440 {
441    ThreadId tid = VG_(running_tid);
442 
443    vg_assert(tid != VG_INVALID_THREADID);
444    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
445 
446    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
447 
448    /*
449       Tell the kernel we're yielding.
450     */
451    VG_(do_syscall0)(__NR_sched_yield);
452 
453    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
454 }
455 
456 
457 /* Set the standard set of blocked signals, used whenever we're not
458    running a client syscall. */
block_signals(void)459 static void block_signals(void)
460 {
461    vki_sigset_t mask;
462 
463    VG_(sigfillset)(&mask);
464 
465    /* Don't block these because they're synchronous */
466    VG_(sigdelset)(&mask, VKI_SIGSEGV);
467    VG_(sigdelset)(&mask, VKI_SIGBUS);
468    VG_(sigdelset)(&mask, VKI_SIGFPE);
469    VG_(sigdelset)(&mask, VKI_SIGILL);
470    VG_(sigdelset)(&mask, VKI_SIGTRAP);
471 
472    /* Can't block these anyway */
473    VG_(sigdelset)(&mask, VKI_SIGSTOP);
474    VG_(sigdelset)(&mask, VKI_SIGKILL);
475 
476    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
477 }
478 
os_state_clear(ThreadState * tst)479 static void os_state_clear(ThreadState *tst)
480 {
481    tst->os_state.lwpid       = 0;
482    tst->os_state.threadgroup = 0;
483 #  if defined(VGO_linux)
484    /* no other fields to clear */
485 #  elif defined(VGO_darwin)
486    tst->os_state.post_mach_trap_fn = NULL;
487    tst->os_state.pthread           = 0;
488    tst->os_state.func_arg          = 0;
489    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
490    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
491    tst->os_state.wq_jmpbuf_valid   = False;
492    tst->os_state.remote_port       = 0;
493    tst->os_state.msgh_id           = 0;
494    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
495 #  else
496 #    error "Unknown OS"
497 #  endif
498 }
499 
os_state_init(ThreadState * tst)500 static void os_state_init(ThreadState *tst)
501 {
502    tst->os_state.valgrind_stack_base    = 0;
503    tst->os_state.valgrind_stack_init_SP = 0;
504    os_state_clear(tst);
505 }
506 
507 static
mostly_clear_thread_record(ThreadId tid)508 void mostly_clear_thread_record ( ThreadId tid )
509 {
510    vki_sigset_t savedmask;
511 
512    vg_assert(tid >= 0 && tid < VG_N_THREADS);
513    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
514    VG_(threads)[tid].tid = tid;
515 
516    /* Leave the thread in Zombie, so that it doesn't get reallocated
517       until the caller is finally done with the thread stack. */
518    VG_(threads)[tid].status               = VgTs_Zombie;
519 
520    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
521    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
522 
523    os_state_clear(&VG_(threads)[tid]);
524 
525    /* start with no altstack */
526    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
527    VG_(threads)[tid].altstack.ss_size = 0;
528    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
529 
530    VG_(clear_out_queued_signals)(tid, &savedmask);
531 
532    VG_(threads)[tid].sched_jmpbuf_valid = False;
533 }
534 
535 /*
536    Called in the child after fork.  If the parent has multiple
537    threads, then we've inherited a VG_(threads) array describing them,
538    but only the thread which called fork() is actually alive in the
539    child.  This functions needs to clean up all those other thread
540    structures.
541 
542    Whichever tid in the parent which called fork() becomes the
543    master_tid in the child.  That's because the only living slot in
544    VG_(threads) in the child after fork is VG_(threads)[tid], and it
545    would be too hard to try to re-number the thread and relocate the
546    thread state down to VG_(threads)[1].
547 
548    This function also needs to reinitialize the_BigLock, since
549    otherwise we may end up sharing its state with the parent, which
550    would be deeply confusing.
551 */
sched_fork_cleanup(ThreadId me)552 static void sched_fork_cleanup(ThreadId me)
553 {
554    ThreadId tid;
555    vg_assert(VG_(running_tid) == me);
556 
557 #  if defined(VGO_darwin)
558    // GrP fixme hack reset Mach ports
559    VG_(mach_init)();
560 #  endif
561 
562    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
563    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
564 
565    /* clear out all the unused thread slots */
566    for (tid = 1; tid < VG_N_THREADS; tid++) {
567       if (tid != me) {
568          mostly_clear_thread_record(tid);
569 	 VG_(threads)[tid].status = VgTs_Empty;
570          VG_(clear_syscallInfo)(tid);
571       }
572    }
573 
574    /* re-init and take the sema */
575    deinit_BigLock();
576    init_BigLock();
577    VG_(acquire_BigLock_LL)(NULL);
578 }
579 
580 
581 /* First phase of initialisation of the scheduler.  Initialise the
582    bigLock, zeroise the VG_(threads) structure and decide on the
583    ThreadId of the root thread.
584 */
VG_(scheduler_init_phase1)585 ThreadId VG_(scheduler_init_phase1) ( void )
586 {
587    Int i;
588    ThreadId tid_main;
589 
590    VG_(debugLog)(1,"sched","sched_init_phase1\n");
591 
592    if (VG_(clo_fair_sched) != disable_fair_sched
593        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
594        && VG_(clo_fair_sched) == enable_fair_sched)
595    {
596       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
597       VG_(exit)(1);
598    }
599 
600    if (VG_(clo_verbosity) > 1) {
601       VG_(message)(Vg_DebugMsg,
602                    "Scheduler: using %s scheduler lock implementation.\n",
603                    ML_(get_sched_lock_name)());
604    }
605 
606    init_BigLock();
607 
608    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
609       /* Paranoia .. completely zero it out. */
610       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
611 
612       VG_(threads)[i].sig_queue = NULL;
613 
614       os_state_init(&VG_(threads)[i]);
615       mostly_clear_thread_record(i);
616 
617       VG_(threads)[i].status                    = VgTs_Empty;
618       VG_(threads)[i].client_stack_szB          = 0;
619       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
620       VG_(threads)[i].err_disablement_level     = 0;
621       VG_(threads)[i].thread_name               = NULL;
622    }
623 
624    tid_main = VG_(alloc_ThreadState)();
625 
626    /* Bleh.  Unfortunately there are various places in the system that
627       assume that the main thread has a ThreadId of 1.
628       - Helgrind (possibly)
629       - stack overflow message in default_action() in m_signals.c
630       - definitely a lot more places
631    */
632    vg_assert(tid_main == 1);
633 
634    return tid_main;
635 }
636 
637 
638 /* Second phase of initialisation of the scheduler.  Given the root
639    ThreadId computed by first phase of initialisation, fill in stack
640    details and acquire bigLock.  Initialise the scheduler.  This is
641    called at startup.  The caller subsequently initialises the guest
642    state components of this main thread.
643 */
VG_(scheduler_init_phase2)644 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
645                                   Addr     clstack_end,
646                                   SizeT    clstack_size )
647 {
648    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
649                    "cls_end=0x%lx, cls_sz=%ld\n",
650                    tid_main, clstack_end, clstack_size);
651 
652    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
653    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
654 
655    VG_(threads)[tid_main].client_stack_highest_byte
656       = clstack_end;
657    VG_(threads)[tid_main].client_stack_szB
658       = clstack_size;
659 
660    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
661 }
662 
663 
664 /* ---------------------------------------------------------------------
665    Helpers for running translations.
666    ------------------------------------------------------------------ */
667 
668 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
669    mask state, but does need to pass "val" through.  jumped must be a
670    volatile UWord. */
671 #define SCHEDSETJMP(tid, jumped, stmt)					\
672    do {									\
673       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
674 									\
675       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
676       if ((jumped) == ((UWord)0)) {                                     \
677 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
678 	 _qq_tst->sched_jmpbuf_valid = True;				\
679 	 stmt;								\
680       }	else if (VG_(clo_trace_sched))					\
681 	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
682                      __LINE__, tid, jumped);                            \
683       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
684       _qq_tst->sched_jmpbuf_valid = False;				\
685    } while(0)
686 
687 
688 /* Do various guest state alignment checks prior to running a thread.
689    Specifically, check that what we have matches Vex's guest state
690    layout requirements.  See libvex.h for details, but in short the
691    requirements are: There must be no holes in between the primary
692    guest state, its two copies, and the spill area.  In short, all 4
693    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
694    be placed back-to-back without holes in between. */
do_pre_run_checks(volatile ThreadState * tst)695 static void do_pre_run_checks ( volatile ThreadState* tst )
696 {
697    Addr a_vex     = (Addr) & tst->arch.vex;
698    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
699    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
700    Addr a_spill   = (Addr) & tst->arch.vex_spill;
701    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
702    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
703    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
704    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
705 
706    if (0)
707    VG_(printf)("gst %p %d, sh1 %p %d, "
708                "sh2 %p %d, spill %p %d\n",
709                (void*)a_vex, sz_vex,
710                (void*)a_vexsh1, sz_vexsh1,
711                (void*)a_vexsh2, sz_vexsh2,
712                (void*)a_spill, sz_spill );
713 
714    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
715    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
716    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
717    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
718 
719    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
720    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
721    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
722    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
723 
724    /* Check that the guest state and its two shadows have the same
725       size, and that there are no holes in between.  The latter is
726       important because Memcheck assumes that it can reliably access
727       the shadows by indexing off a pointer to the start of the
728       primary guest state area. */
729    vg_assert(sz_vex == sz_vexsh1);
730    vg_assert(sz_vex == sz_vexsh2);
731    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
732    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
733    /* Also check there's no hole between the second shadow area and
734       the spill area. */
735    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
736    vg_assert(a_vex + 3 * sz_vex == a_spill);
737 
738 #  if defined(VGA_x86)
739    /* x86 XMM regs must form an array, ie, have no holes in
740       between. */
741    vg_assert(
742       (offsetof(VexGuestX86State,guest_XMM7)
743        - offsetof(VexGuestX86State,guest_XMM0))
744       == (8/*#regs*/-1) * 16/*bytes per reg*/
745    );
746    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
747    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
748    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
749    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
750    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
751 #  endif
752 
753 #  if defined(VGA_amd64)
754    /* amd64 YMM regs must form an array, ie, have no holes in
755       between. */
756    vg_assert(
757       (offsetof(VexGuestAMD64State,guest_YMM16)
758        - offsetof(VexGuestAMD64State,guest_YMM0))
759       == (17/*#regs*/-1) * 32/*bytes per reg*/
760    );
761    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
762    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
763    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
764    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
765    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
766 #  endif
767 
768 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
769    /* ppc guest_state vector regs must be 16 byte aligned for
770       loads/stores.  This is important! */
771    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
772    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
773    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
774    /* be extra paranoid .. */
775    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
776    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
777    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
778 #  endif
779 
780 #  if defined(VGA_arm)
781    /* arm guest_state VFP regs must be 8 byte aligned for
782       loads/stores.  Let's use 16 just to be on the safe side. */
783    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
784    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
785    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
786    /* be extra paranoid .. */
787    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
788    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
789    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
790 #  endif
791 
792 #  if defined(VGA_arm64)
793    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
794    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
795    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
796    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
797    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
798    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
799 #  endif
800 
801 #  if defined(VGA_s390x)
802    /* no special requirements */
803 #  endif
804 
805 #  if defined(VGA_mips32) || defined(VGA_mips64)
806    /* no special requirements */
807 #  endif
808 }
809 
810 // NO_VGDB_POLL value ensures vgdb is not polled, while
811 // VGDB_POLL_ASAP ensures that the next scheduler call
812 // will cause a poll.
813 #define NO_VGDB_POLL    0xffffffffffffffffULL
814 #define VGDB_POLL_ASAP  0x0ULL
815 
VG_(disable_vgdb_poll)816 void VG_(disable_vgdb_poll) (void )
817 {
818    vgdb_next_poll = NO_VGDB_POLL;
819 }
VG_(force_vgdb_poll)820 void VG_(force_vgdb_poll) ( void )
821 {
822    vgdb_next_poll = VGDB_POLL_ASAP;
823 }
824 
825 /* Run the thread tid for a while, and return a VG_TRC_* value
826    indicating why VG_(disp_run_translations) stopped, and possibly an
827    auxiliary word.  Also, only allow the thread to run for at most
828    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
829    is False, we are running ordinary redir'd translations, and we
830    should therefore start by looking up the guest next IP in TT.  If
831    it is True then we ignore the guest next IP and just run from
832    alt_host_addr, which presumably points at host code for a no-redir
833    translation.
834 
835    Return results are placed in two_words.  two_words[0] is set to the
836    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
837    the address to patch is placed in two_words[1].
838 */
839 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)840 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
841                               /*MOD*/Int*   dispatchCtrP,
842                               ThreadId      tid,
843                               HWord         alt_host_addr,
844                               Bool          use_alt_host_addr )
845 {
846    volatile HWord        jumped         = 0;
847    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
848    volatile Int          done_this_time = 0;
849    volatile HWord        host_code_addr = 0;
850 
851    /* Paranoia */
852    vg_assert(VG_(is_valid_tid)(tid));
853    vg_assert(VG_(is_running_thread)(tid));
854    vg_assert(!VG_(is_exiting)(tid));
855    vg_assert(*dispatchCtrP > 0);
856 
857    tst = VG_(get_ThreadState)(tid);
858    do_pre_run_checks( tst );
859    /* end Paranoia */
860 
861    /* Futz with the XIndir stats counters. */
862    vg_assert(VG_(stats__n_xindirs_32) == 0);
863    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
864 
865    /* Clear return area. */
866    two_words[0] = two_words[1] = 0;
867 
868    /* Figure out where we're starting from. */
869    if (use_alt_host_addr) {
870       /* unusual case -- no-redir translation */
871       host_code_addr = alt_host_addr;
872    } else {
873       /* normal case -- redir translation */
874       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
875       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
876          host_code_addr = VG_(tt_fast)[cno].host;
877       else {
878          Addr res = 0;
879          /* not found in VG_(tt_fast). Searching here the transtab
880             improves the performance compared to returning directly
881             to the scheduler. */
882          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
883                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
884                                             True/*upd cache*/
885                                             );
886          if (LIKELY(found)) {
887             host_code_addr = res;
888          } else {
889             /* At this point, we know that we intended to start at a
890                normal redir translation, but it was not found.  In
891                which case we can return now claiming it's not
892                findable. */
893             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
894             return;
895          }
896       }
897    }
898    /* We have either a no-redir or a redir translation. */
899    vg_assert(host_code_addr != 0); /* implausible */
900 
901    /* there should be no undealt-with signals */
902    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
903 
904    /* Set up event counter stuff for the run. */
905    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
906    tst->arch.vex.host_EvC_FAILADDR
907       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
908 
909    if (0) {
910       vki_sigset_t m;
911       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
912       vg_assert(err == 0);
913       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
914       for (i = 1; i <= _VKI_NSIG; i++)
915          if (!VG_(sigismember)(&m, i))
916             VG_(printf)("%d ", i);
917       VG_(printf)("\n");
918    }
919 
920    /* Set up return-value area. */
921 
922    // Tell the tool this thread is about to run client code
923    VG_TRACK( start_client_code, tid, bbs_done );
924 
925    vg_assert(VG_(in_generated_code) == False);
926    VG_(in_generated_code) = True;
927 
928    SCHEDSETJMP(
929       tid,
930       jumped,
931       VG_(disp_run_translations)(
932          two_words,
933          (volatile void*)&tst->arch.vex,
934          host_code_addr
935       )
936    );
937 
938    vg_assert(VG_(in_generated_code) == True);
939    VG_(in_generated_code) = False;
940 
941    if (jumped != (HWord)0) {
942       /* We get here if the client took a fault that caused our signal
943          handler to longjmp. */
944       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
945       two_words[0] = VG_TRC_FAULT_SIGNAL;
946       two_words[1] = 0;
947       block_signals();
948    }
949 
950    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
951       and zero out the 32-bit ones in preparation for the next run of
952       generated code. */
953    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
954    VG_(stats__n_xindirs_32) = 0;
955    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
956    VG_(stats__n_xindir_misses_32) = 0;
957 
958    /* Inspect the event counter. */
959    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
960    vg_assert(tst->arch.vex.host_EvC_FAILADDR
961              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
962 
963    /* The number of events done this time is the difference between
964       the event counter originally and what it is now.  Except -- if
965       it has gone negative (to -1) then the transition 0 to -1 doesn't
966       correspond to a real executed block, so back it out.  It's like
967       this because the event checks decrement the counter first and
968       check it for negativeness second, hence the 0 to -1 transition
969       causes a bailout and the block it happens in isn't executed. */
970    {
971      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
972      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
973      if (dispatchCtrAfterwards == -1) {
974         done_this_time--;
975      } else {
976         /* If the generated code drives the counter below -1, something
977            is seriously wrong. */
978         vg_assert(dispatchCtrAfterwards >= 0);
979      }
980    }
981 
982    vg_assert(done_this_time >= 0);
983    bbs_done += (ULong)done_this_time;
984 
985    *dispatchCtrP -= done_this_time;
986    vg_assert(*dispatchCtrP >= 0);
987 
988    // Tell the tool this thread has stopped running client code
989    VG_TRACK( stop_client_code, tid, bbs_done );
990 
991    if (bbs_done >= vgdb_next_poll) {
992       if (VG_(clo_vgdb_poll))
993          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
994       else
995          /* value was changed due to gdbserver invocation via ptrace */
996          vgdb_next_poll = NO_VGDB_POLL;
997       if (VG_(gdbserver_activity) (tid))
998          VG_(gdbserver) (tid);
999    }
1000 
1001    /* TRC value and possible auxiliary patch-address word are already
1002       in two_words[0] and [1] respectively, as a result of the call to
1003       VG_(run_innerloop). */
1004    /* Stay sane .. */
1005    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1006        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1007       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1008    } else {
1009       vg_assert(two_words[1] == 0); /* nobody messed with it */
1010    }
1011 }
1012 
1013 
1014 /* ---------------------------------------------------------------------
1015    The scheduler proper.
1016    ------------------------------------------------------------------ */
1017 
handle_tt_miss(ThreadId tid)1018 static void handle_tt_miss ( ThreadId tid )
1019 {
1020    Bool found;
1021    Addr ip = VG_(get_IP)(tid);
1022 
1023    /* Trivial event.  Miss in the fast-cache.  Do a full
1024       lookup for it. */
1025    found = VG_(search_transtab)( NULL, NULL, NULL,
1026                                  ip, True/*upd_fast_cache*/ );
1027    if (UNLIKELY(!found)) {
1028       /* Not found; we need to request a translation. */
1029       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1030                           bbs_done, True/*allow redirection*/ )) {
1031          found = VG_(search_transtab)( NULL, NULL, NULL,
1032                                        ip, True );
1033          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1034 
1035       } else {
1036 	 // If VG_(translate)() fails, it's because it had to throw a
1037 	 // signal because the client jumped to a bad address.  That
1038 	 // means that either a signal has been set up for delivery,
1039 	 // or the thread has been marked for termination.  Either
1040 	 // way, we just need to go back into the scheduler loop.
1041       }
1042    }
1043 }
1044 
1045 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1046 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1047 {
1048    Bool found          = False;
1049    Addr ip             = VG_(get_IP)(tid);
1050    SECno to_sNo         = INV_SNO;
1051    TTEno to_tteNo       = INV_TTE;
1052 
1053    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1054                                  ip, False/*dont_upd_fast_cache*/ );
1055    if (!found) {
1056       /* Not found; we need to request a translation. */
1057       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1058                           bbs_done, True/*allow redirection*/ )) {
1059          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1060                                        ip, False );
1061          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1062       } else {
1063 	 // If VG_(translate)() fails, it's because it had to throw a
1064 	 // signal because the client jumped to a bad address.  That
1065 	 // means that either a signal has been set up for delivery,
1066 	 // or the thread has been marked for termination.  Either
1067 	 // way, we just need to go back into the scheduler loop.
1068         return;
1069       }
1070    }
1071    vg_assert(found);
1072    vg_assert(to_sNo != INV_SNO);
1073    vg_assert(to_tteNo != INV_TTE);
1074 
1075    /* So, finally we know where to patch through to.  Do the patching
1076       and update the various admin tables that allow it to be undone
1077       in the case that the destination block gets deleted. */
1078    VG_(tt_tc_do_chaining)( place_to_chain,
1079                            to_sNo, to_tteNo, toFastEP );
1080 }
1081 
handle_syscall(ThreadId tid,UInt trc)1082 static void handle_syscall(ThreadId tid, UInt trc)
1083 {
1084    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1085    volatile UWord jumped;
1086 
1087    /* Syscall may or may not block; either way, it will be
1088       complete by the time this call returns, and we'll be
1089       runnable again.  We could take a signal while the
1090       syscall runs. */
1091 
1092    if (VG_(clo_sanity_level) >= 3) {
1093       HChar buf[50];    // large enough
1094       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %d)", tid);
1095       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1096       vg_assert(ok);
1097    }
1098 
1099    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1100 
1101    if (VG_(clo_sanity_level) >= 3) {
1102       HChar buf[50];    // large enough
1103       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %d)", tid);
1104       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1105       vg_assert(ok);
1106    }
1107 
1108    if (!VG_(is_running_thread)(tid))
1109       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
1110 		  tid, VG_(running_tid), tid, tst->status);
1111    vg_assert(VG_(is_running_thread)(tid));
1112 
1113    if (jumped != (UWord)0) {
1114       block_signals();
1115       VG_(poll_signals)(tid);
1116    }
1117 }
1118 
1119 /* tid just requested a jump to the noredir version of its current
1120    program counter.  So make up that translation if needed, run it,
1121    and return the resulting thread return code in two_words[]. */
1122 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1123 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1124                            /*MOD*/Int*   dispatchCtrP,
1125                            ThreadId tid )
1126 {
1127    /* Clear return area. */
1128    two_words[0] = two_words[1] = 0;
1129 
1130    Addr  hcode = 0;
1131    Addr  ip    = VG_(get_IP)(tid);
1132 
1133    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1134    if (!found) {
1135       /* Not found; we need to request a translation. */
1136       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1137                           False/*NO REDIRECTION*/ )) {
1138 
1139          found = VG_(search_unredir_transtab)( &hcode, ip );
1140          vg_assert2(found, "unredir translation missing after creation?!");
1141       } else {
1142 	 // If VG_(translate)() fails, it's because it had to throw a
1143 	 // signal because the client jumped to a bad address.  That
1144 	 // means that either a signal has been set up for delivery,
1145 	 // or the thread has been marked for termination.  Either
1146 	 // way, we just need to go back into the scheduler loop.
1147          two_words[0] = VG_TRC_BORING;
1148          return;
1149       }
1150 
1151    }
1152 
1153    vg_assert(found);
1154    vg_assert(hcode != 0);
1155 
1156    /* Otherwise run it and return the resulting VG_TRC_* value. */
1157    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1158    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1159                            hcode, True/*use hcode*/ );
1160 }
1161 
1162 
1163 /*
1164    Run a thread until it wants to exit.
1165 
1166    We assume that the caller has already called VG_(acquire_BigLock) for
1167    us, so we own the VCPU.  Also, all signals are blocked.
1168  */
VG_(scheduler)1169 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1170 {
1171    /* Holds the remaining size of this thread's "timeslice". */
1172    Int dispatch_ctr = 0;
1173 
1174    ThreadState *tst = VG_(get_ThreadState)(tid);
1175    static Bool vgdb_startup_action_done = False;
1176 
1177    if (VG_(clo_trace_sched))
1178       print_sched_event(tid, "entering VG_(scheduler)");
1179 
1180    /* Do vgdb initialization (but once). Only the first (main) task
1181       starting up will do the below.
1182       Initialize gdbserver earlier than at the first
1183       thread VG_(scheduler) is causing problems:
1184       * at the end of VG_(scheduler_init_phase2) :
1185         The main thread is in VgTs_Init state, but in a not yet
1186         consistent state => the thread cannot be reported to gdb
1187         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1188         back the guest registers to gdb).
1189       * at end of valgrind_main, just
1190         before VG_(main_thread_wrapper_NORETURN)(1) :
1191         The main thread is still in VgTs_Init state but in a
1192         more advanced state. However, the thread state is not yet
1193         completely initialized : a.o., the os_state is not yet fully
1194         set => the thread is then not properly reported to gdb,
1195         which is then confused (causing e.g. a duplicate thread be
1196         shown, without thread id).
1197       * it would be possible to initialize gdbserver "lower" in the
1198         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1199         these are platform dependent and the place at which
1200         the thread state is completely initialized is not
1201         specific anymore to the main thread (so a similar "do it only
1202         once" would be needed).
1203 
1204         => a "once only" initialization here is the best compromise. */
1205    if (!vgdb_startup_action_done) {
1206       vg_assert(tid == 1); // it must be the main thread.
1207       vgdb_startup_action_done = True;
1208       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1209          /* If we have to poll, ensures we do an initial poll at first
1210             scheduler call. Otherwise, ensure no poll (unless interrupted
1211             by ptrace). */
1212          if (VG_(clo_vgdb_poll))
1213             VG_(force_vgdb_poll) ();
1214          else
1215             VG_(disable_vgdb_poll) ();
1216 
1217          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1218          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1219             changed yet. */
1220 
1221          VG_(gdbserver_prerun_action) (1);
1222       } else {
1223          VG_(disable_vgdb_poll) ();
1224       }
1225    }
1226 
1227    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1228        && tid != 1) {
1229       /* We disable the stack cache the first time we see a thread other
1230          than the main thread appearing. At this moment, we are sure the pthread
1231          lib loading is done/variable was initialised by pthread lib/... */
1232       if (VG_(client__stack_cache_actsize__addr)) {
1233          if (*VG_(client__stack_cache_actsize__addr) == 0) {
1234             VG_(debugLog)(1,"sched",
1235                           "pthread stack cache size disable done"
1236                           " via kludge\n");
1237             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1238             /* Set a value big enough to be above the hardcoded maximum stack
1239                cache size in glibc, small enough to allow a pthread stack size
1240                to be added without risk of overflow. */
1241          }
1242       } else {
1243           VG_(debugLog)(0,"sched",
1244                         "WARNING: pthread stack cache cannot be disabled!\n");
1245           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1246           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1247              to avoid having a msg for all following threads. */
1248       }
1249    }
1250 
1251    /* set the proper running signal mask */
1252    block_signals();
1253 
1254    vg_assert(VG_(is_running_thread)(tid));
1255 
1256    dispatch_ctr = SCHEDULING_QUANTUM;
1257 
1258    while (!VG_(is_exiting)(tid)) {
1259 
1260       vg_assert(dispatch_ctr >= 0);
1261       if (dispatch_ctr == 0) {
1262 
1263 	 /* Our slice is done, so yield the CPU to another thread.  On
1264             Linux, this doesn't sleep between sleeping and running,
1265             since that would take too much time. */
1266 
1267 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
1268             cause async thread cancellation (canceller.c) to terminate
1269             in finite time; else it is in some kind of race/starvation
1270             situation and completion is arbitrarily delayed (although
1271             this is not a deadlock).
1272 
1273             Unfortunately these sleeps cause MPI jobs not to terminate
1274             sometimes (some kind of livelock).  So sleeping once
1275             every N opportunities appears to work. */
1276 
1277 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1278             sys_yield also helps the problem, whilst not crashing apps. */
1279 
1280 	 VG_(release_BigLock)(tid, VgTs_Yielding,
1281                                    "VG_(scheduler):timeslice");
1282 	 /* ------------ now we don't have The Lock ------------ */
1283 
1284 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1285 	 /* ------------ now we do have The Lock ------------ */
1286 
1287 	 /* OK, do some relatively expensive housekeeping stuff */
1288 	 scheduler_sanity(tid);
1289 	 VG_(sanity_check_general)(False);
1290 
1291 	 /* Look for any pending signals for this thread, and set them up
1292 	    for delivery */
1293 	 VG_(poll_signals)(tid);
1294 
1295 	 if (VG_(is_exiting)(tid))
1296 	    break;		/* poll_signals picked up a fatal signal */
1297 
1298 	 /* For stats purposes only. */
1299 	 n_scheduling_events_MAJOR++;
1300 
1301 	 /* Figure out how many bbs to ask vg_run_innerloop to do. */
1302          dispatch_ctr = SCHEDULING_QUANTUM;
1303 
1304 	 /* paranoia ... */
1305 	 vg_assert(tst->tid == tid);
1306 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1307       }
1308 
1309       /* For stats purposes only. */
1310       n_scheduling_events_MINOR++;
1311 
1312       if (0)
1313          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1314                                    tid, dispatch_ctr - 1 );
1315 
1316       HWord trc[2]; /* "two_words" */
1317       run_thread_for_a_while( &trc[0],
1318                               &dispatch_ctr,
1319                               tid, 0/*ignored*/, False );
1320 
1321       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1322          const HChar *name = name_of_sched_event(trc[0]);
1323          HChar buf[VG_(strlen)(name) + 10];    // large enough
1324 	 VG_(sprintf)(buf, "TRC: %s", name);
1325 	 print_sched_event(tid, buf);
1326       }
1327 
1328       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1329          /* If we got a request to run a no-redir version of
1330             something, do so now -- handle_noredir_jump just (creates
1331             and) runs that one translation.  The flip side is that the
1332             noredir translation can't itself return another noredir
1333             request -- that would be nonsensical.  It can, however,
1334             return VG_TRC_BORING, which just means keep going as
1335             normal. */
1336          /* Note that the fact that we need to continue with a
1337             no-redir jump is not recorded anywhere else in this
1338             thread's state.  So we *must* execute the block right now
1339             -- we can't fail to execute it and later resume with it,
1340             because by then we'll have forgotten the fact that it
1341             should be run as no-redir, but will get run as a normal
1342             potentially-redir'd, hence screwing up.  This really ought
1343             to be cleaned up, by noting in the guest state that the
1344             next block to be executed should be no-redir.  Then we can
1345             suspend and resume at any point, which isn't the case at
1346             the moment. */
1347          /* We can't enter a no-redir translation with the dispatch
1348             ctr set to zero, for the reasons commented just above --
1349             we need to force it to execute right now.  So, if the
1350             dispatch ctr is zero, set it to one.  Note that this would
1351             have the bad side effect of holding the Big Lock arbitrary
1352             long should there be an arbitrarily long sequence of
1353             back-to-back no-redir translations to run.  But we assert
1354             just below that this translation cannot request another
1355             no-redir jump, so we should be safe against that. */
1356          if (dispatch_ctr == 0) {
1357             dispatch_ctr = 1;
1358          }
1359          handle_noredir_jump( &trc[0],
1360                               &dispatch_ctr,
1361                               tid );
1362          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1363 
1364          /* This can't be allowed to happen, since it means the block
1365             didn't execute, and we have no way to resume-as-noredir
1366             after we get more timeslice.  But I don't think it ever
1367             can, since handle_noredir_jump will assert if the counter
1368             is zero on entry. */
1369          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1370          /* This asserts the same thing. */
1371          vg_assert(dispatch_ctr >= 0);
1372 
1373          /* A no-redir translation can't return with a chain-me
1374             request, since chaining in the no-redir cache is too
1375             complex. */
1376          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1377                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1378       }
1379 
1380       switch (trc[0]) {
1381       case VEX_TRC_JMP_BORING:
1382          /* assisted dispatch, no event.  Used by no-redir
1383             translations to force return to the scheduler. */
1384       case VG_TRC_BORING:
1385          /* no special event, just keep going. */
1386          break;
1387 
1388       case VG_TRC_INNER_FASTMISS:
1389 	 vg_assert(dispatch_ctr >= 0);
1390 	 handle_tt_miss(tid);
1391 	 break;
1392 
1393       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1394          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1395          handle_chain_me(tid, (void*)trc[1], False);
1396          break;
1397       }
1398 
1399       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1400          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1401          handle_chain_me(tid, (void*)trc[1], True);
1402          break;
1403       }
1404 
1405       case VEX_TRC_JMP_CLIENTREQ:
1406 	 do_client_request(tid);
1407 	 break;
1408 
1409       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1410       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1411       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1412       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1413 	 handle_syscall(tid, trc[0]);
1414 	 if (VG_(clo_sanity_level) > 2)
1415 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
1416 	 break;
1417 
1418       case VEX_TRC_JMP_YIELD:
1419 	 /* Explicit yield, because this thread is in a spin-lock
1420 	    or something.  Only let the thread run for a short while
1421             longer.  Because swapping to another thread is expensive,
1422             we're prepared to let this thread eat a little more CPU
1423             before swapping to another.  That means that short term
1424             spins waiting for hardware to poke memory won't cause a
1425             thread swap. */
1426          if (dispatch_ctr > 1000)
1427             dispatch_ctr = 1000;
1428 	 break;
1429 
1430       case VG_TRC_INNER_COUNTERZERO:
1431 	 /* Timeslice is out.  Let a new thread be scheduled. */
1432 	 vg_assert(dispatch_ctr == 0);
1433 	 break;
1434 
1435       case VG_TRC_FAULT_SIGNAL:
1436 	 /* Everything should be set up (either we're exiting, or
1437 	    about to start in a signal handler). */
1438 	 break;
1439 
1440       case VEX_TRC_JMP_MAPFAIL:
1441          /* Failure of arch-specific address translation (x86/amd64
1442             segment override use) */
1443          /* jrs 2005 03 11: is this correct? */
1444          VG_(synth_fault)(tid);
1445          break;
1446 
1447       case VEX_TRC_JMP_EMWARN: {
1448          static Int  counts[EmNote_NUMBER];
1449          static Bool counts_initted = False;
1450          VexEmNote ew;
1451          const HChar* what;
1452          Bool      show;
1453          Int       q;
1454          if (!counts_initted) {
1455             counts_initted = True;
1456             for (q = 0; q < EmNote_NUMBER; q++)
1457                counts[q] = 0;
1458          }
1459          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1460          what = (ew < 0 || ew >= EmNote_NUMBER)
1461                    ? "unknown (?!)"
1462                    : LibVEX_EmNote_string(ew);
1463          show = (ew < 0 || ew >= EmNote_NUMBER)
1464                    ? True
1465                    : counts[ew]++ < 3;
1466          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1467             VG_(message)( Vg_UserMsg,
1468                           "Emulation warning: unsupported action:\n");
1469             VG_(message)( Vg_UserMsg, "  %s\n", what);
1470             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1471          }
1472          break;
1473       }
1474 
1475       case VEX_TRC_JMP_EMFAIL: {
1476          VexEmNote ew;
1477          const HChar* what;
1478          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1479          what = (ew < 0 || ew >= EmNote_NUMBER)
1480                    ? "unknown (?!)"
1481                    : LibVEX_EmNote_string(ew);
1482          VG_(message)( Vg_UserMsg,
1483                        "Emulation fatal error -- Valgrind cannot continue:\n");
1484          VG_(message)( Vg_UserMsg, "  %s\n", what);
1485          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1486          VG_(message)(Vg_UserMsg, "\n");
1487          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1488          VG_(message)(Vg_UserMsg, "\n");
1489          VG_(exit)(1);
1490          break;
1491       }
1492 
1493       case VEX_TRC_JMP_SIGILL:
1494          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1495          break;
1496 
1497       case VEX_TRC_JMP_SIGTRAP:
1498          VG_(synth_sigtrap)(tid);
1499          break;
1500 
1501       case VEX_TRC_JMP_SIGSEGV:
1502          VG_(synth_fault)(tid);
1503          break;
1504 
1505       case VEX_TRC_JMP_SIGBUS:
1506          VG_(synth_sigbus)(tid);
1507          break;
1508 
1509       case VEX_TRC_JMP_SIGFPE_INTDIV:
1510          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1511          break;
1512 
1513       case VEX_TRC_JMP_SIGFPE_INTOVF:
1514          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1515          break;
1516 
1517       case VEX_TRC_JMP_NODECODE: {
1518          Addr addr = VG_(get_IP)(tid);
1519 
1520          if (VG_(clo_sigill_diag)) {
1521             VG_(umsg)(
1522                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1523             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1524 #        define M(a) VG_(umsg)(a "\n");
1525          M("Your program just tried to execute an instruction that Valgrind" );
1526          M("did not recognise.  There are two possible reasons for this."    );
1527          M("1. Your program has a bug and erroneously jumped to a non-code"  );
1528          M("   location.  If you are running Memcheck and you just saw a"    );
1529          M("   warning about a bad jump, it's probably your program's fault.");
1530          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1531          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1532          M("   you are not sure, please let us know and we'll try to fix it.");
1533          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1534          M("probably kill your program."                                     );
1535 #        undef M
1536          }
1537 #        if defined(VGA_s390x)
1538          /* Now that the complaint is out we need to adjust the guest_IA. The
1539             reason is that -- after raising the exception -- execution will
1540             continue with the insn that follows the invalid insn. As the first
1541             2 bits of the invalid insn determine its length in the usual way,
1542             we can compute the address of the next insn here and adjust the
1543             guest_IA accordingly. This adjustment is essential and tested by
1544             none/tests/s390x/op_exception.c (which would loop forever
1545             otherwise) */
1546          UChar byte = ((UChar *)addr)[0];
1547          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1548          Addr  next_insn_addr = addr + insn_length;
1549          VG_(set_IP)(tid, next_insn_addr);
1550 #        endif
1551          VG_(synth_sigill)(tid, addr);
1552          break;
1553       }
1554 
1555       case VEX_TRC_JMP_INVALICACHE:
1556          VG_(discard_translations)(
1557             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1558             VG_(threads)[tid].arch.vex.guest_CMLEN,
1559             "scheduler(VEX_TRC_JMP_INVALICACHE)"
1560          );
1561          if (0)
1562             VG_(printf)("dump translations done.\n");
1563          break;
1564 
1565       case VEX_TRC_JMP_FLUSHDCACHE: {
1566          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
1567          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
1568          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1569          VG_(flush_dcache)(start, len);
1570          break;
1571       }
1572 
1573       case VG_TRC_INVARIANT_FAILED:
1574          /* This typically happens if, after running generated code,
1575             it is detected that host CPU settings (eg, FPU/Vector
1576             control words) are not as they should be.  Vex's code
1577             generation specifies the state such control words should
1578             be in on entry to Vex-generated code, and they should be
1579             unchanged on exit from it.  Failure of this assertion
1580             usually means a bug in Vex's code generation. */
1581          //{ UInt xx;
1582          //  __asm__ __volatile__ (
1583          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1584          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1585          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1586          //}
1587          vg_assert2(0, "VG_(scheduler), phase 3: "
1588                        "run_innerloop detected host "
1589                        "state invariant failure", trc);
1590 
1591       case VEX_TRC_JMP_SYS_SYSENTER:
1592          /* Do whatever simulation is appropriate for an x86 sysenter
1593             instruction.  Note that it is critical to set this thread's
1594             guest_EIP to point at the code to execute after the
1595             sysenter, since Vex-generated code will not have set it --
1596             vex does not know what it should be.  Vex sets the next
1597             address to zero, so if you don't set guest_EIP, the thread
1598             will jump to zero afterwards and probably die as a result. */
1599 #        if defined(VGP_x86_linux)
1600          vg_assert2(0, "VG_(scheduler), phase 3: "
1601                        "sysenter_x86 on x86-linux is not supported");
1602 #        elif defined(VGP_x86_darwin)
1603          /* return address in client edx */
1604          VG_(threads)[tid].arch.vex.guest_EIP
1605             = VG_(threads)[tid].arch.vex.guest_EDX;
1606          handle_syscall(tid, trc[0]);
1607 #        else
1608          vg_assert2(0, "VG_(scheduler), phase 3: "
1609                        "sysenter_x86 on non-x86 platform?!?!");
1610 #        endif
1611          break;
1612 
1613       default:
1614 	 vg_assert2(0, "VG_(scheduler), phase 3: "
1615                        "unexpected thread return code (%u)", trc[0]);
1616 	 /* NOTREACHED */
1617 	 break;
1618 
1619       } /* switch (trc) */
1620 
1621       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1622          maybe_show_sb_profile();
1623    }
1624 
1625    if (VG_(clo_trace_sched))
1626       print_sched_event(tid, "exiting VG_(scheduler)");
1627 
1628    vg_assert(VG_(is_exiting)(tid));
1629 
1630    return tst->exitreason;
1631 }
1632 
1633 
1634 /*
1635    This causes all threads to forceably exit.  They aren't actually
1636    dead by the time this returns; you need to call
1637    VG_(reap_threads)() to wait for them.
1638  */
VG_(nuke_all_threads_except)1639 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1640 {
1641    ThreadId tid;
1642 
1643    vg_assert(VG_(is_running_thread)(me));
1644 
1645    for (tid = 1; tid < VG_N_THREADS; tid++) {
1646       if (tid == me
1647           || VG_(threads)[tid].status == VgTs_Empty)
1648          continue;
1649       if (0)
1650          VG_(printf)(
1651             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1652 
1653       VG_(threads)[tid].exitreason = src;
1654       if (src == VgSrc_FatalSig)
1655          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1656       VG_(get_thread_out_of_syscall)(tid);
1657    }
1658 }
1659 
1660 
1661 /* ---------------------------------------------------------------------
1662    Specifying shadow register values
1663    ------------------------------------------------------------------ */
1664 
1665 #if defined(VGA_x86)
1666 #  define VG_CLREQ_ARGS       guest_EAX
1667 #  define VG_CLREQ_RET        guest_EDX
1668 #elif defined(VGA_amd64)
1669 #  define VG_CLREQ_ARGS       guest_RAX
1670 #  define VG_CLREQ_RET        guest_RDX
1671 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1672 #  define VG_CLREQ_ARGS       guest_GPR4
1673 #  define VG_CLREQ_RET        guest_GPR3
1674 #elif defined(VGA_arm)
1675 #  define VG_CLREQ_ARGS       guest_R4
1676 #  define VG_CLREQ_RET        guest_R3
1677 #elif defined(VGA_arm64)
1678 #  define VG_CLREQ_ARGS       guest_X4
1679 #  define VG_CLREQ_RET        guest_X3
1680 #elif defined (VGA_s390x)
1681 #  define VG_CLREQ_ARGS       guest_r2
1682 #  define VG_CLREQ_RET        guest_r3
1683 #elif defined(VGA_mips32) || defined(VGA_mips64)
1684 #  define VG_CLREQ_ARGS       guest_r12
1685 #  define VG_CLREQ_RET        guest_r11
1686 #elif defined(VGA_tilegx)
1687 #  define VG_CLREQ_ARGS       guest_r12
1688 #  define VG_CLREQ_RET        guest_r11
1689 #else
1690 #  error Unknown arch
1691 #endif
1692 
1693 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1694 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1695 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1696 
1697 // These macros write a value to a client's thread register, and tell the
1698 // tool that it's happened (if necessary).
1699 
1700 #define SET_CLREQ_RETVAL(zztid, zzval) \
1701    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1702         VG_TRACK( post_reg_write, \
1703                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1704    } while (0)
1705 
1706 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1707    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1708         VG_TRACK( post_reg_write_clientcall_return, \
1709                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1710    } while (0)
1711 
1712 
1713 /* ---------------------------------------------------------------------
1714    Handle client requests.
1715    ------------------------------------------------------------------ */
1716 
1717 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1718 static Bool os_client_request(ThreadId tid, UWord *args)
1719 {
1720    Bool handled = True;
1721 
1722    vg_assert(VG_(is_running_thread)(tid));
1723 
1724    switch(args[0]) {
1725    case VG_USERREQ__LIBC_FREERES_DONE:
1726       /* This is equivalent to an exit() syscall, but we don't set the
1727 	 exitcode (since it might already be set) */
1728       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1729          VG_(message)(Vg_DebugMsg,
1730                       "__libc_freeres() done; really quitting!\n");
1731       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1732       break;
1733 
1734    default:
1735       handled = False;
1736       break;
1737    }
1738 
1739    return handled;
1740 }
1741 
1742 
1743 /* Write out a client message, possibly including a back trace. Return
1744    the number of characters written. In case of XML output, the format
1745    string as well as any arguments it requires will be XML'ified.
1746    I.e. special characters such as the angle brackets will be translated
1747    into proper escape sequences. */
1748 static
print_client_message(ThreadId tid,const HChar * format,va_list * vargsp,Bool include_backtrace)1749 Int print_client_message( ThreadId tid, const HChar *format,
1750                           va_list *vargsp, Bool include_backtrace)
1751 {
1752    Int count;
1753 
1754    if (VG_(clo_xml)) {
1755       /* Translate the format string as follows:
1756          <  -->  &lt;
1757          >  -->  &gt;
1758          &  -->  &amp;
1759          %s -->  %pS
1760          Yes, yes, it's simplified but in synch with
1761          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1762       */
1763 
1764       /* Allocate a buffer that is for sure large enough. */
1765       HChar xml_format[VG_(strlen)(format) * 5 + 1];
1766 
1767       const HChar *p;
1768       HChar *q = xml_format;
1769 
1770       for (p = format; *p; ++p) {
1771          switch (*p) {
1772          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
1773          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
1774          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
1775          case '%':
1776             /* Careful: make sure %%s stays %%s */
1777             *q++ = *p++;
1778             if (*p == 's') {
1779               *q++ = 'p';
1780               *q++ = 'S';
1781             } else {
1782               *q++ = *p;
1783             }
1784             break;
1785 
1786          default:
1787             *q++ = *p;
1788             break;
1789          }
1790       }
1791       *q = '\0';
1792 
1793       VG_(printf_xml)( "<clientmsg>\n" );
1794       VG_(printf_xml)( "  <tid>%d</tid>\n", tid );
1795       VG_(printf_xml)( "  <text>" );
1796       count = VG_(vprintf_xml)( xml_format, *vargsp );
1797       VG_(printf_xml)( "  </text>\n" );
1798    } else {
1799       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1800       VG_(message_flush)();
1801    }
1802 
1803    if (include_backtrace)
1804       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1805 
1806    if (VG_(clo_xml))
1807       VG_(printf_xml)( "</clientmsg>\n" );
1808 
1809    return count;
1810 }
1811 
1812 
1813 /* Do a client request for the thread tid.  After the request, tid may
1814    or may not still be runnable; if not, the scheduler will have to
1815    choose a new thread to run.
1816 */
1817 static
do_client_request(ThreadId tid)1818 void do_client_request ( ThreadId tid )
1819 {
1820    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1821    UWord req_no = arg[0];
1822 
1823    if (0)
1824       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1825    switch (req_no) {
1826 
1827       case VG_USERREQ__CLIENT_CALL0: {
1828          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1829 	 if (f == NULL)
1830 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1831 	 else
1832 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1833          break;
1834       }
1835       case VG_USERREQ__CLIENT_CALL1: {
1836          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1837 	 if (f == NULL)
1838 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1839 	 else
1840 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1841          break;
1842       }
1843       case VG_USERREQ__CLIENT_CALL2: {
1844          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1845 	 if (f == NULL)
1846 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1847 	 else
1848 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1849          break;
1850       }
1851       case VG_USERREQ__CLIENT_CALL3: {
1852          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1853 	 if (f == NULL)
1854 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1855 	 else
1856 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1857          break;
1858       }
1859 
1860       // Nb: this looks like a circular definition, because it kind of is.
1861       // See comment in valgrind.h to understand what's going on.
1862       case VG_USERREQ__RUNNING_ON_VALGRIND:
1863          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1864          break;
1865 
1866       case VG_USERREQ__PRINTF: {
1867          const HChar* format = (HChar *)arg[1];
1868          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1869             _VALIST_BY_REF version instead */
1870          if (sizeof(va_list) != sizeof(UWord))
1871             goto va_list_casting_error_NORETURN;
1872          union {
1873             va_list vargs;
1874             unsigned long uw;
1875          } u;
1876          u.uw = (unsigned long)arg[2];
1877          Int count =
1878             print_client_message( tid, format, &u.vargs,
1879                                   /* include_backtrace */ False );
1880          SET_CLREQ_RETVAL( tid, count );
1881          break;
1882       }
1883 
1884       case VG_USERREQ__PRINTF_BACKTRACE: {
1885          const HChar* format = (HChar *)arg[1];
1886          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1887             _VALIST_BY_REF version instead */
1888          if (sizeof(va_list) != sizeof(UWord))
1889             goto va_list_casting_error_NORETURN;
1890          union {
1891             va_list vargs;
1892             unsigned long uw;
1893          } u;
1894          u.uw = (unsigned long)arg[2];
1895          Int count =
1896             print_client_message( tid, format, &u.vargs,
1897                                   /* include_backtrace */ True );
1898          SET_CLREQ_RETVAL( tid, count );
1899          break;
1900       }
1901 
1902       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1903          const HChar* format = (HChar *)arg[1];
1904          va_list* vargsp = (va_list*)arg[2];
1905          Int count =
1906             print_client_message( tid, format, vargsp,
1907                                   /* include_backtrace */ False );
1908 
1909          SET_CLREQ_RETVAL( tid, count );
1910          break;
1911       }
1912 
1913       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1914          const HChar* format = (HChar *)arg[1];
1915          va_list* vargsp = (va_list*)arg[2];
1916          Int count =
1917             print_client_message( tid, format, vargsp,
1918                                   /* include_backtrace */ True );
1919          SET_CLREQ_RETVAL( tid, count );
1920          break;
1921       }
1922 
1923       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1924          va_list* vargsp = (va_list*)arg[2];
1925          Int count =
1926             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
1927          VG_(message_flush)();
1928          SET_CLREQ_RETVAL( tid, count );
1929          break;
1930       }
1931 
1932       case VG_USERREQ__ADD_IFUNC_TARGET: {
1933          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1934          SET_CLREQ_RETVAL( tid, 0);
1935          break; }
1936 
1937       case VG_USERREQ__STACK_REGISTER: {
1938          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1939          SET_CLREQ_RETVAL( tid, sid );
1940          break; }
1941 
1942       case VG_USERREQ__STACK_DEREGISTER: {
1943          VG_(deregister_stack)(arg[1]);
1944          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1945          break; }
1946 
1947       case VG_USERREQ__STACK_CHANGE: {
1948          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1949          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1950          break; }
1951 
1952       case VG_USERREQ__GET_MALLOCFUNCS: {
1953 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1954 
1955 	 info->tl_malloc               = VG_(tdict).tool_malloc;
1956 	 info->tl_calloc               = VG_(tdict).tool_calloc;
1957 	 info->tl_realloc              = VG_(tdict).tool_realloc;
1958 	 info->tl_memalign             = VG_(tdict).tool_memalign;
1959 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1960 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1961 	 info->tl_free                 = VG_(tdict).tool_free;
1962 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1963 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1964          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1965 
1966 	 info->mallinfo                = VG_(mallinfo);
1967 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
1968 
1969          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1970 
1971 	 break;
1972       }
1973 
1974       /* Requests from the client program */
1975 
1976       case VG_USERREQ__DISCARD_TRANSLATIONS:
1977          if (VG_(clo_verbosity) > 2)
1978             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1979                          " addr %p,  len %lu\n",
1980                          (void*)arg[1], arg[2] );
1981 
1982          VG_(discard_translations)(
1983             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1984          );
1985 
1986          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1987 	 break;
1988 
1989       case VG_USERREQ__COUNT_ERRORS:
1990          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1991          break;
1992 
1993       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1994          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1995          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1996          break;
1997 
1998       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
1999          Addr   ip    = arg[1];
2000          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
2001          const HChar *buf;  // points to a string of unknown size
2002 
2003          VG_(memset)(buf64, 0, 64);
2004          UInt linenum = 0;
2005          Bool ok = VG_(get_filename_linenum)(
2006                       ip, &buf, NULL, &linenum
2007                    );
2008          if (ok) {
2009             /* For backward compatibility truncate the filename to
2010                49 characters. */
2011             VG_(strncpy)(buf64, buf, 50);
2012             buf64[49] = '\0';
2013             UInt i;
2014             for (i = 0; i < 50; i++) {
2015                if (buf64[i] == 0)
2016                   break;
2017             }
2018             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
2019          } else {
2020             buf64[0] = 0;
2021          }
2022 
2023          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2024          break;
2025       }
2026 
2027       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2028          Word delta = arg[1];
2029          vg_assert(delta == 1 || delta == -1);
2030          ThreadState* tst = VG_(get_ThreadState)(tid);
2031          vg_assert(tst);
2032          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2033             tst->err_disablement_level++;
2034          }
2035          else
2036          if (delta == -1 && tst->err_disablement_level > 0) {
2037             tst->err_disablement_level--;
2038          }
2039          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2040          break;
2041       }
2042 
2043       case VG_USERREQ__GDB_MONITOR_COMMAND: {
2044          UWord ret;
2045          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2046          SET_CLREQ_RETVAL(tid, ret);
2047          break;
2048       }
2049 
2050       case VG_USERREQ__MALLOCLIKE_BLOCK:
2051       case VG_USERREQ__RESIZEINPLACE_BLOCK:
2052       case VG_USERREQ__FREELIKE_BLOCK:
2053          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
2054          if (!arg[1]) {
2055             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2056             break;
2057          } else {
2058             goto my_default;
2059          }
2060 
2061       case VG_USERREQ__VEX_INIT_FOR_IRI:
2062          LibVEX_InitIRI ( (IRICB *)arg[1] );
2063          break;
2064 
2065       default:
2066        my_default:
2067 	 if (os_client_request(tid, arg)) {
2068 	    // do nothing, os_client_request() handled it
2069          } else if (VG_(needs).client_requests) {
2070 	    UWord ret;
2071 
2072             if (VG_(clo_verbosity) > 2)
2073                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
2074                            arg[0], (void*)arg[1], arg[2] );
2075 
2076 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2077 	       SET_CLREQ_RETVAL(tid, ret);
2078          } else {
2079 	    static Bool whined = False;
2080 
2081 	    if (!whined && VG_(clo_verbosity) > 2) {
2082                // Allow for requests in core, but defined by tools, which
2083                // have 0 and 0 in their two high bytes.
2084                HChar c1 = (arg[0] >> 24) & 0xff;
2085                HChar c2 = (arg[0] >> 16) & 0xff;
2086                if (c1 == 0) c1 = '_';
2087                if (c2 == 0) c2 = '_';
2088 	       VG_(message)(Vg_UserMsg, "Warning:\n"
2089                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
2090 		   "  VG_(needs).client_requests should be set?\n",
2091 			    arg[0], c1, c2, arg[0] & 0xffff);
2092 	       whined = True;
2093 	    }
2094          }
2095          break;
2096    }
2097    return;
2098 
2099    /*NOTREACHED*/
2100   va_list_casting_error_NORETURN:
2101    VG_(umsg)(
2102       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2103       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2104       "on a platform where they cannot be supported.  Please use the\n"
2105       "equivalent _VALIST_BY_REF versions instead.\n"
2106       "\n"
2107       "This is a binary-incompatible change in Valgrind's client request\n"
2108       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
2109       "are expected to almost never see this message.  The only case in\n"
2110       "which you might see this message is if your code uses the macros\n"
2111       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
2112       "to recompile such code, using the header files from this version of\n"
2113       "Valgrind, and not any previous version.\n"
2114       "\n"
2115       "If you see this mesage in any other circumstances, it is probably\n"
2116       "a bug in Valgrind.  In this case, please file a bug report at\n"
2117       "\n"
2118       "   http://www.valgrind.org/support/bug_reports.html\n"
2119       "\n"
2120       "Will now abort.\n"
2121    );
2122    vg_assert(0);
2123 }
2124 
2125 
2126 /* ---------------------------------------------------------------------
2127    Sanity checking (permanently engaged)
2128    ------------------------------------------------------------------ */
2129 
2130 /* Internal consistency checks on the sched structures. */
2131 static
scheduler_sanity(ThreadId tid)2132 void scheduler_sanity ( ThreadId tid )
2133 {
2134    Bool bad = False;
2135    Int lwpid = VG_(gettid)();
2136 
2137    if (!VG_(is_running_thread)(tid)) {
2138       VG_(message)(Vg_DebugMsg,
2139 		   "Thread %d is supposed to be running, "
2140                    "but doesn't own the_BigLock (owned by %d)\n",
2141 		   tid, VG_(running_tid));
2142       bad = True;
2143    }
2144 
2145    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2146       VG_(message)(Vg_DebugMsg,
2147                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
2148                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2149       bad = True;
2150    }
2151 
2152    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2153       VG_(message)(Vg_DebugMsg,
2154                    "Thread (LWPID) %d doesn't own the_BigLock\n",
2155                    tid);
2156       bad = True;
2157    }
2158 
2159    if (0) {
2160       /* Periodically show the state of all threads, for debugging
2161          purposes. */
2162       static UInt lasttime = 0;
2163       UInt now;
2164       now = VG_(read_millisecond_timer)();
2165       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2166          lasttime = now;
2167          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2168                      (Int)now);
2169          VG_(show_sched_status)(True,  // host_stacktrace
2170                                 True,  // stack_usage
2171                                 True); // exited_threads);
2172       }
2173    }
2174 
2175    /* core_panic also shows the sched status, which is why we don't
2176       show it above if bad==True. */
2177    if (bad)
2178       VG_(core_panic)("scheduler_sanity: failed");
2179 }
2180 
VG_(sanity_check_general)2181 void VG_(sanity_check_general) ( Bool force_expensive )
2182 {
2183    ThreadId tid;
2184 
2185    static UInt next_slow_check_at = 1;
2186    static UInt slow_check_interval = 25;
2187 
2188    if (VG_(clo_sanity_level) < 1) return;
2189 
2190    /* --- First do all the tests that we can do quickly. ---*/
2191 
2192    sanity_fast_count++;
2193 
2194    /* Check stuff pertaining to the memory check system. */
2195 
2196    /* Check that nobody has spuriously claimed that the first or
2197       last 16 pages of memory have become accessible [...] */
2198    if (VG_(needs).sanity_checks) {
2199       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2200    }
2201 
2202    /* --- Now some more expensive checks. ---*/
2203 
2204    /* Once every now and again, check some more expensive stuff.
2205       Gradually increase the interval between such checks so as not to
2206       burden long-running programs too much. */
2207    if ( force_expensive
2208         || VG_(clo_sanity_level) > 1
2209         || (VG_(clo_sanity_level) == 1
2210             && sanity_fast_count == next_slow_check_at)) {
2211 
2212       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
2213 
2214       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2215       slow_check_interval++;
2216       sanity_slow_count++;
2217 
2218       if (VG_(needs).sanity_checks) {
2219           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2220       }
2221 
2222       /* Look for stack overruns.  Visit all threads. */
2223       for (tid = 1; tid < VG_N_THREADS; tid++) {
2224 	 SizeT    remains;
2225          VgStack* stack;
2226 
2227 	 if (VG_(threads)[tid].status == VgTs_Empty ||
2228 	     VG_(threads)[tid].status == VgTs_Zombie)
2229 	    continue;
2230 
2231          stack
2232             = (VgStack*)
2233               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2234          SizeT limit
2235             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2236 	 remains
2237             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2238 	 if (remains < limit)
2239 	    VG_(message)(Vg_DebugMsg,
2240                          "WARNING: Thread %d is within %ld bytes "
2241                          "of running out of valgrind stack!\n"
2242                          "Valgrind stack size can be increased "
2243                          "using --valgrind-stacksize=....\n",
2244 		         tid, remains);
2245       }
2246    }
2247 
2248    if (VG_(clo_sanity_level) > 1) {
2249       /* Check sanity of the low-level memory manager.  Note that bugs
2250          in the client's code can cause this to fail, so we don't do
2251          this check unless specially asked for.  And because it's
2252          potentially very expensive. */
2253       VG_(sanity_check_malloc_all)();
2254    }
2255 }
2256 
2257 /*--------------------------------------------------------------------*/
2258 /*--- end                                                          ---*/
2259 /*--------------------------------------------------------------------*/
2260