1
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling. scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 /*
32 Overview
33
34 Valgrind tries to emulate the kernel's threading as closely as
35 possible. The client does all threading via the normal syscalls
36 (on Linux: clone, etc). Valgrind emulates this by creating exactly
37 the same process structure as would be created without Valgrind.
38 There are no extra threads.
39
40 The main difference is that Valgrind only allows one client thread
41 to run at once. This is controlled with the CPU Big Lock,
42 "the_BigLock". Any time a thread wants to run client code or
43 manipulate any shared state (which is anything other than its own
44 ThreadState entry), it must hold the_BigLock.
45
46 When a thread is about to block in a blocking syscall, it releases
47 the_BigLock, and re-takes it when it becomes runnable again (either
48 because the syscall finished, or we took a signal).
49
50 VG_(scheduler) therefore runs in each thread. It returns only when
51 the thread is exiting, either because it exited itself, or it was
52 told to exit by another thread.
53
54 This file is almost entirely OS-independent. The details of how
55 the OS handles threading and signalling are abstracted away and
56 implemented elsewhere. [Some of the functions have worked their
57 way back for the moment, until we do an OS port in earnest...]
58 */
59
60
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h" // __NR_sched_yield
65 #include "pub_core_threadstate.h"
66 #include "pub_core_clientstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h" // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h" // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h" // for VG_(gdbserver)/VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_sbprofile.h"
85 #include "pub_core_signals.h"
86 #include "pub_core_stacks.h"
87 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
88 #include "pub_core_syscall.h"
89 #include "pub_core_syswrap.h"
90 #include "pub_core_tooliface.h"
91 #include "pub_core_translate.h" // For VG_(translate)()
92 #include "pub_core_transtab.h"
93 #include "pub_core_debuginfo.h" // VG_(di_notify_pdb_debuginfo)
94 #include "priv_sched-lock.h"
95 #include "pub_core_scheduler.h" // self
96 #include "pub_core_redir.h"
97 #include "libvex_emnote.h" // VexEmNote
98
99
100 /* ---------------------------------------------------------------------
101 Types and globals for the scheduler.
102 ------------------------------------------------------------------ */
103
104 /* ThreadId and ThreadState are defined elsewhere*/
105
106 /* Defines the thread-scheduling timeslice, in terms of the number of
107 basic blocks we attempt to run each thread for. Smaller values
108 give finer interleaving but much increased scheduling overheads. */
109 #define SCHEDULING_QUANTUM 100000
110
111 /* If False, a fault is Valgrind-internal (ie, a bug) */
112 Bool VG_(in_generated_code) = False;
113
114 /* 64-bit counter for the number of basic blocks done. */
115 static ULong bbs_done = 0;
116
117 /* Counter to see if vgdb activity is to be verified.
118 When nr of bbs done reaches vgdb_next_poll, scheduler will
119 poll for gdbserver activity. VG_(force_vgdb_poll) and
120 VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
121 to control when the next poll will be done. */
122 static ULong vgdb_next_poll;
123
124 /* Forwards */
125 static void do_client_request ( ThreadId tid );
126 static void scheduler_sanity ( ThreadId tid );
127 static void mostly_clear_thread_record ( ThreadId tid );
128
129 /* Stats. */
130 static ULong n_scheduling_events_MINOR = 0;
131 static ULong n_scheduling_events_MAJOR = 0;
132
133 /* Stats: number of XIndirs, and number that missed in the fast
134 cache. */
135 static ULong stats__n_xindirs = 0;
136 static ULong stats__n_xindir_misses = 0;
137
138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
139 have to do 64 bit incs on the hot path through
140 VG_(cp_disp_xindir). */
141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
143
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count = 0;
146 static UInt sanity_slow_count = 0;
147
VG_(print_scheduler_stats)148 void VG_(print_scheduler_stats)(void)
149 {
150 VG_(message)(Vg_DebugMsg,
151 "scheduler: %'llu event checks.\n", bbs_done );
152 VG_(message)(Vg_DebugMsg,
153 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
154 stats__n_xindirs, stats__n_xindir_misses,
155 stats__n_xindirs / (stats__n_xindir_misses
156 ? stats__n_xindir_misses : 1));
157 VG_(message)(Vg_DebugMsg,
158 "scheduler: %'llu/%'llu major/minor sched events.\n",
159 n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
160 VG_(message)(Vg_DebugMsg,
161 " sanity: %d cheap, %d expensive checks.\n",
162 sanity_fast_count, sanity_slow_count );
163 }
164
165 /*
166 * Mutual exclusion object used to serialize threads.
167 */
168 static struct sched_lock *the_BigLock;
169
170
171 /* ---------------------------------------------------------------------
172 Helper functions for the scheduler.
173 ------------------------------------------------------------------ */
174
175 static
print_sched_event(ThreadId tid,const HChar * what)176 void print_sched_event ( ThreadId tid, const HChar* what )
177 {
178 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s\n", tid, what );
179 }
180
181 /* For showing SB profiles, if the user asks to see them. */
182 static
maybe_show_sb_profile(void)183 void maybe_show_sb_profile ( void )
184 {
185 /* DO NOT MAKE NON-STATIC */
186 static ULong bbs_done_lastcheck = 0;
187 /* */
188 vg_assert(VG_(clo_profyle_interval) > 0);
189 Long delta = (Long)(bbs_done - bbs_done_lastcheck);
190 vg_assert(delta >= 0);
191 if ((ULong)delta >= VG_(clo_profyle_interval)) {
192 bbs_done_lastcheck = bbs_done;
193 VG_(get_and_show_SB_profile)(bbs_done);
194 }
195 }
196
197 static
name_of_sched_event(UInt event)198 const HChar* name_of_sched_event ( UInt event )
199 {
200 switch (event) {
201 case VEX_TRC_JMP_INVALICACHE: return "INVALICACHE";
202 case VEX_TRC_JMP_FLUSHDCACHE: return "FLUSHDCACHE";
203 case VEX_TRC_JMP_NOREDIR: return "NOREDIR";
204 case VEX_TRC_JMP_SIGILL: return "SIGILL";
205 case VEX_TRC_JMP_SIGTRAP: return "SIGTRAP";
206 case VEX_TRC_JMP_SIGSEGV: return "SIGSEGV";
207 case VEX_TRC_JMP_SIGBUS: return "SIGBUS";
208 case VEX_TRC_JMP_SIGFPE_INTOVF:
209 case VEX_TRC_JMP_SIGFPE_INTDIV: return "SIGFPE";
210 case VEX_TRC_JMP_EMWARN: return "EMWARN";
211 case VEX_TRC_JMP_EMFAIL: return "EMFAIL";
212 case VEX_TRC_JMP_CLIENTREQ: return "CLIENTREQ";
213 case VEX_TRC_JMP_YIELD: return "YIELD";
214 case VEX_TRC_JMP_NODECODE: return "NODECODE";
215 case VEX_TRC_JMP_MAPFAIL: return "MAPFAIL";
216 case VEX_TRC_JMP_SYS_SYSCALL: return "SYSCALL";
217 case VEX_TRC_JMP_SYS_INT32: return "INT32";
218 case VEX_TRC_JMP_SYS_INT128: return "INT128";
219 case VEX_TRC_JMP_SYS_INT129: return "INT129";
220 case VEX_TRC_JMP_SYS_INT130: return "INT130";
221 case VEX_TRC_JMP_SYS_SYSENTER: return "SYSENTER";
222 case VEX_TRC_JMP_BORING: return "VEX_BORING";
223
224 case VG_TRC_BORING: return "VG_BORING";
225 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
226 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
227 case VG_TRC_FAULT_SIGNAL: return "FAULTSIGNAL";
228 case VG_TRC_INVARIANT_FAILED: return "INVFAILED";
229 case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
230 case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
231 default: return "??UNKNOWN??";
232 }
233 }
234
235 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)236 ThreadId VG_(alloc_ThreadState) ( void )
237 {
238 Int i;
239 for (i = 1; i < VG_N_THREADS; i++) {
240 if (VG_(threads)[i].status == VgTs_Empty) {
241 VG_(threads)[i].status = VgTs_Init;
242 VG_(threads)[i].exitreason = VgSrc_None;
243 if (VG_(threads)[i].thread_name)
244 VG_(free)(VG_(threads)[i].thread_name);
245 VG_(threads)[i].thread_name = NULL;
246 return i;
247 }
248 }
249 VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
250 "and rerun valgrind\n");
251 VG_(core_panic)("Max number of threads is too low");
252 /*NOTREACHED*/
253 }
254
255 /*
256 Mark a thread as Runnable. This will block until the_BigLock is
257 available, so that we get exclusive access to all the shared
258 structures and the CPU. Up until we get the_BigLock, we must not
259 touch any shared state.
260
261 When this returns, we'll actually be running.
262 */
VG_(acquire_BigLock)263 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
264 {
265 ThreadState *tst;
266
267 #if 0
268 if (VG_(clo_trace_sched)) {
269 HChar buf[VG_(strlen)(who) + 30];
270 VG_(sprintf)(buf, "waiting for lock (%s)", who);
271 print_sched_event(tid, buf);
272 }
273 #endif
274
275 /* First, acquire the_BigLock. We can't do anything else safely
276 prior to this point. Even doing debug printing prior to this
277 point is, technically, wrong. */
278 VG_(acquire_BigLock_LL)(NULL);
279
280 tst = VG_(get_ThreadState)(tid);
281
282 vg_assert(tst->status != VgTs_Runnable);
283
284 tst->status = VgTs_Runnable;
285
286 if (VG_(running_tid) != VG_INVALID_THREADID)
287 VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
288 vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
289 VG_(running_tid) = tid;
290
291 { Addr gsp = VG_(get_SP)(tid);
292 if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
293 VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
294 else
295 VG_(unknown_SP_update)(gsp, gsp);
296 }
297
298 if (VG_(clo_trace_sched)) {
299 HChar buf[VG_(strlen)(who) + 30];
300 VG_(sprintf)(buf, " acquired lock (%s)", who);
301 print_sched_event(tid, buf);
302 }
303 }
304
305 /*
306 Set a thread into a sleeping state, and give up exclusive access to
307 the CPU. On return, the thread must be prepared to block until it
308 is ready to run again (generally this means blocking in a syscall,
309 but it may mean that we remain in a Runnable state and we're just
310 yielding the CPU to another thread).
311 */
VG_(release_BigLock)312 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
313 const HChar* who)
314 {
315 ThreadState *tst = VG_(get_ThreadState)(tid);
316
317 vg_assert(tst->status == VgTs_Runnable);
318
319 vg_assert(sleepstate == VgTs_WaitSys ||
320 sleepstate == VgTs_Yielding);
321
322 tst->status = sleepstate;
323
324 vg_assert(VG_(running_tid) == tid);
325 VG_(running_tid) = VG_INVALID_THREADID;
326
327 if (VG_(clo_trace_sched)) {
328 const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
329 HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
330 VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
331 print_sched_event(tid, buf);
332 }
333
334 /* Release the_BigLock; this will reschedule any runnable
335 thread. */
336 VG_(release_BigLock_LL)(NULL);
337 }
338
init_BigLock(void)339 static void init_BigLock(void)
340 {
341 vg_assert(!the_BigLock);
342 the_BigLock = ML_(create_sched_lock)();
343 }
344
deinit_BigLock(void)345 static void deinit_BigLock(void)
346 {
347 ML_(destroy_sched_lock)(the_BigLock);
348 the_BigLock = NULL;
349 }
350
351 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)352 void VG_(acquire_BigLock_LL) ( const HChar* who )
353 {
354 ML_(acquire_sched_lock)(the_BigLock);
355 }
356
357 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)358 void VG_(release_BigLock_LL) ( const HChar* who )
359 {
360 ML_(release_sched_lock)(the_BigLock);
361 }
362
VG_(owns_BigLock_LL)363 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
364 {
365 return (ML_(get_sched_lock_owner)(the_BigLock)
366 == VG_(threads)[tid].os_state.lwpid);
367 }
368
369
370 /* Clear out the ThreadState and release the semaphore. Leaves the
371 ThreadState in VgTs_Zombie state, so that it doesn't get
372 reallocated until the caller is really ready. */
VG_(exit_thread)373 void VG_(exit_thread)(ThreadId tid)
374 {
375 vg_assert(VG_(is_valid_tid)(tid));
376 vg_assert(VG_(is_running_thread)(tid));
377 vg_assert(VG_(is_exiting)(tid));
378
379 mostly_clear_thread_record(tid);
380 VG_(running_tid) = VG_INVALID_THREADID;
381
382 /* There should still be a valid exitreason for this thread */
383 vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
384
385 if (VG_(clo_trace_sched))
386 print_sched_event(tid, "release lock in VG_(exit_thread)");
387
388 VG_(release_BigLock_LL)(NULL);
389 }
390
391 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
392 out of the syscall and onto doing the next thing, whatever that is.
393 If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)394 void VG_(get_thread_out_of_syscall)(ThreadId tid)
395 {
396 vg_assert(VG_(is_valid_tid)(tid));
397 vg_assert(!VG_(is_running_thread)(tid));
398
399 if (VG_(threads)[tid].status == VgTs_WaitSys) {
400 if (VG_(clo_trace_signals)) {
401 VG_(message)(Vg_DebugMsg,
402 "get_thread_out_of_syscall zaps tid %d lwp %d\n",
403 tid, VG_(threads)[tid].os_state.lwpid);
404 }
405 # if defined(VGO_darwin)
406 {
407 // GrP fixme use mach primitives on darwin?
408 // GrP fixme thread_abort_safely?
409 // GrP fixme race for thread with WaitSys set but not in syscall yet?
410 extern kern_return_t thread_abort(mach_port_t);
411 thread_abort(VG_(threads)[tid].os_state.lwpid);
412 }
413 # else
414 {
415 __attribute__((unused))
416 Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
417 /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
418 I'm really not sure. Here's a race scenario which argues
419 that we shoudn't; but equally I'm not sure the scenario is
420 even possible, because of constraints caused by the question
421 of who holds the BigLock when.
422
423 Target thread tid does sys_read on a socket and blocks. This
424 function gets called, and we observe correctly that tid's
425 status is WaitSys but then for whatever reason this function
426 goes very slowly for a while. Then data arrives from
427 wherever, tid's sys_read returns, tid exits. Then we do
428 tkill on tid, but tid no longer exists; tkill returns an
429 error code and the assert fails. */
430 /* vg_assert(r == 0); */
431 }
432 # endif
433 }
434 }
435
436 /*
437 Yield the CPU for a short time to let some other thread run.
438 */
VG_(vg_yield)439 void VG_(vg_yield)(void)
440 {
441 ThreadId tid = VG_(running_tid);
442
443 vg_assert(tid != VG_INVALID_THREADID);
444 vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
445
446 VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
447
448 /*
449 Tell the kernel we're yielding.
450 */
451 VG_(do_syscall0)(__NR_sched_yield);
452
453 VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
454 }
455
456
457 /* Set the standard set of blocked signals, used whenever we're not
458 running a client syscall. */
block_signals(void)459 static void block_signals(void)
460 {
461 vki_sigset_t mask;
462
463 VG_(sigfillset)(&mask);
464
465 /* Don't block these because they're synchronous */
466 VG_(sigdelset)(&mask, VKI_SIGSEGV);
467 VG_(sigdelset)(&mask, VKI_SIGBUS);
468 VG_(sigdelset)(&mask, VKI_SIGFPE);
469 VG_(sigdelset)(&mask, VKI_SIGILL);
470 VG_(sigdelset)(&mask, VKI_SIGTRAP);
471
472 /* Can't block these anyway */
473 VG_(sigdelset)(&mask, VKI_SIGSTOP);
474 VG_(sigdelset)(&mask, VKI_SIGKILL);
475
476 VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
477 }
478
os_state_clear(ThreadState * tst)479 static void os_state_clear(ThreadState *tst)
480 {
481 tst->os_state.lwpid = 0;
482 tst->os_state.threadgroup = 0;
483 # if defined(VGO_linux)
484 /* no other fields to clear */
485 # elif defined(VGO_darwin)
486 tst->os_state.post_mach_trap_fn = NULL;
487 tst->os_state.pthread = 0;
488 tst->os_state.func_arg = 0;
489 VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
490 VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
491 tst->os_state.wq_jmpbuf_valid = False;
492 tst->os_state.remote_port = 0;
493 tst->os_state.msgh_id = 0;
494 VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
495 # else
496 # error "Unknown OS"
497 # endif
498 }
499
os_state_init(ThreadState * tst)500 static void os_state_init(ThreadState *tst)
501 {
502 tst->os_state.valgrind_stack_base = 0;
503 tst->os_state.valgrind_stack_init_SP = 0;
504 os_state_clear(tst);
505 }
506
507 static
mostly_clear_thread_record(ThreadId tid)508 void mostly_clear_thread_record ( ThreadId tid )
509 {
510 vki_sigset_t savedmask;
511
512 vg_assert(tid >= 0 && tid < VG_N_THREADS);
513 VG_(cleanup_thread)(&VG_(threads)[tid].arch);
514 VG_(threads)[tid].tid = tid;
515
516 /* Leave the thread in Zombie, so that it doesn't get reallocated
517 until the caller is finally done with the thread stack. */
518 VG_(threads)[tid].status = VgTs_Zombie;
519
520 VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
521 VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
522
523 os_state_clear(&VG_(threads)[tid]);
524
525 /* start with no altstack */
526 VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
527 VG_(threads)[tid].altstack.ss_size = 0;
528 VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
529
530 VG_(clear_out_queued_signals)(tid, &savedmask);
531
532 VG_(threads)[tid].sched_jmpbuf_valid = False;
533 }
534
535 /*
536 Called in the child after fork. If the parent has multiple
537 threads, then we've inherited a VG_(threads) array describing them,
538 but only the thread which called fork() is actually alive in the
539 child. This functions needs to clean up all those other thread
540 structures.
541
542 Whichever tid in the parent which called fork() becomes the
543 master_tid in the child. That's because the only living slot in
544 VG_(threads) in the child after fork is VG_(threads)[tid], and it
545 would be too hard to try to re-number the thread and relocate the
546 thread state down to VG_(threads)[1].
547
548 This function also needs to reinitialize the_BigLock, since
549 otherwise we may end up sharing its state with the parent, which
550 would be deeply confusing.
551 */
sched_fork_cleanup(ThreadId me)552 static void sched_fork_cleanup(ThreadId me)
553 {
554 ThreadId tid;
555 vg_assert(VG_(running_tid) == me);
556
557 # if defined(VGO_darwin)
558 // GrP fixme hack reset Mach ports
559 VG_(mach_init)();
560 # endif
561
562 VG_(threads)[me].os_state.lwpid = VG_(gettid)();
563 VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
564
565 /* clear out all the unused thread slots */
566 for (tid = 1; tid < VG_N_THREADS; tid++) {
567 if (tid != me) {
568 mostly_clear_thread_record(tid);
569 VG_(threads)[tid].status = VgTs_Empty;
570 VG_(clear_syscallInfo)(tid);
571 }
572 }
573
574 /* re-init and take the sema */
575 deinit_BigLock();
576 init_BigLock();
577 VG_(acquire_BigLock_LL)(NULL);
578 }
579
580
581 /* First phase of initialisation of the scheduler. Initialise the
582 bigLock, zeroise the VG_(threads) structure and decide on the
583 ThreadId of the root thread.
584 */
VG_(scheduler_init_phase1)585 ThreadId VG_(scheduler_init_phase1) ( void )
586 {
587 Int i;
588 ThreadId tid_main;
589
590 VG_(debugLog)(1,"sched","sched_init_phase1\n");
591
592 if (VG_(clo_fair_sched) != disable_fair_sched
593 && !ML_(set_sched_lock_impl)(sched_lock_ticket)
594 && VG_(clo_fair_sched) == enable_fair_sched)
595 {
596 VG_(printf)("Error: fair scheduling is not supported on this system.\n");
597 VG_(exit)(1);
598 }
599
600 if (VG_(clo_verbosity) > 1) {
601 VG_(message)(Vg_DebugMsg,
602 "Scheduler: using %s scheduler lock implementation.\n",
603 ML_(get_sched_lock_name)());
604 }
605
606 init_BigLock();
607
608 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
609 /* Paranoia .. completely zero it out. */
610 VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
611
612 VG_(threads)[i].sig_queue = NULL;
613
614 os_state_init(&VG_(threads)[i]);
615 mostly_clear_thread_record(i);
616
617 VG_(threads)[i].status = VgTs_Empty;
618 VG_(threads)[i].client_stack_szB = 0;
619 VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
620 VG_(threads)[i].err_disablement_level = 0;
621 VG_(threads)[i].thread_name = NULL;
622 }
623
624 tid_main = VG_(alloc_ThreadState)();
625
626 /* Bleh. Unfortunately there are various places in the system that
627 assume that the main thread has a ThreadId of 1.
628 - Helgrind (possibly)
629 - stack overflow message in default_action() in m_signals.c
630 - definitely a lot more places
631 */
632 vg_assert(tid_main == 1);
633
634 return tid_main;
635 }
636
637
638 /* Second phase of initialisation of the scheduler. Given the root
639 ThreadId computed by first phase of initialisation, fill in stack
640 details and acquire bigLock. Initialise the scheduler. This is
641 called at startup. The caller subsequently initialises the guest
642 state components of this main thread.
643 */
VG_(scheduler_init_phase2)644 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
645 Addr clstack_end,
646 SizeT clstack_size )
647 {
648 VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
649 "cls_end=0x%lx, cls_sz=%ld\n",
650 tid_main, clstack_end, clstack_size);
651
652 vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
653 vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
654
655 VG_(threads)[tid_main].client_stack_highest_byte
656 = clstack_end;
657 VG_(threads)[tid_main].client_stack_szB
658 = clstack_size;
659
660 VG_(atfork)(NULL, NULL, sched_fork_cleanup);
661 }
662
663
664 /* ---------------------------------------------------------------------
665 Helpers for running translations.
666 ------------------------------------------------------------------ */
667
668 /* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal
669 mask state, but does need to pass "val" through. jumped must be a
670 volatile UWord. */
671 #define SCHEDSETJMP(tid, jumped, stmt) \
672 do { \
673 ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \
674 \
675 (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf); \
676 if ((jumped) == ((UWord)0)) { \
677 vg_assert(!_qq_tst->sched_jmpbuf_valid); \
678 _qq_tst->sched_jmpbuf_valid = True; \
679 stmt; \
680 } else if (VG_(clo_trace_sched)) \
681 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n", \
682 __LINE__, tid, jumped); \
683 vg_assert(_qq_tst->sched_jmpbuf_valid); \
684 _qq_tst->sched_jmpbuf_valid = False; \
685 } while(0)
686
687
688 /* Do various guest state alignment checks prior to running a thread.
689 Specifically, check that what we have matches Vex's guest state
690 layout requirements. See libvex.h for details, but in short the
691 requirements are: There must be no holes in between the primary
692 guest state, its two copies, and the spill area. In short, all 4
693 areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
694 be placed back-to-back without holes in between. */
do_pre_run_checks(volatile ThreadState * tst)695 static void do_pre_run_checks ( volatile ThreadState* tst )
696 {
697 Addr a_vex = (Addr) & tst->arch.vex;
698 Addr a_vexsh1 = (Addr) & tst->arch.vex_shadow1;
699 Addr a_vexsh2 = (Addr) & tst->arch.vex_shadow2;
700 Addr a_spill = (Addr) & tst->arch.vex_spill;
701 UInt sz_vex = (UInt) sizeof tst->arch.vex;
702 UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
703 UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
704 UInt sz_spill = (UInt) sizeof tst->arch.vex_spill;
705
706 if (0)
707 VG_(printf)("gst %p %d, sh1 %p %d, "
708 "sh2 %p %d, spill %p %d\n",
709 (void*)a_vex, sz_vex,
710 (void*)a_vexsh1, sz_vexsh1,
711 (void*)a_vexsh2, sz_vexsh2,
712 (void*)a_spill, sz_spill );
713
714 vg_assert(sz_vex % LibVEX_GUEST_STATE_ALIGN == 0);
715 vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
716 vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
717 vg_assert(sz_spill % LibVEX_GUEST_STATE_ALIGN == 0);
718
719 vg_assert(a_vex % LibVEX_GUEST_STATE_ALIGN == 0);
720 vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
721 vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
722 vg_assert(a_spill % LibVEX_GUEST_STATE_ALIGN == 0);
723
724 /* Check that the guest state and its two shadows have the same
725 size, and that there are no holes in between. The latter is
726 important because Memcheck assumes that it can reliably access
727 the shadows by indexing off a pointer to the start of the
728 primary guest state area. */
729 vg_assert(sz_vex == sz_vexsh1);
730 vg_assert(sz_vex == sz_vexsh2);
731 vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
732 vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
733 /* Also check there's no hole between the second shadow area and
734 the spill area. */
735 vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
736 vg_assert(a_vex + 3 * sz_vex == a_spill);
737
738 # if defined(VGA_x86)
739 /* x86 XMM regs must form an array, ie, have no holes in
740 between. */
741 vg_assert(
742 (offsetof(VexGuestX86State,guest_XMM7)
743 - offsetof(VexGuestX86State,guest_XMM0))
744 == (8/*#regs*/-1) * 16/*bytes per reg*/
745 );
746 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
747 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
748 vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
749 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
750 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
751 # endif
752
753 # if defined(VGA_amd64)
754 /* amd64 YMM regs must form an array, ie, have no holes in
755 between. */
756 vg_assert(
757 (offsetof(VexGuestAMD64State,guest_YMM16)
758 - offsetof(VexGuestAMD64State,guest_YMM0))
759 == (17/*#regs*/-1) * 32/*bytes per reg*/
760 );
761 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
762 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
763 vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
764 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
765 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
766 # endif
767
768 # if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
769 /* ppc guest_state vector regs must be 16 byte aligned for
770 loads/stores. This is important! */
771 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
772 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
773 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
774 /* be extra paranoid .. */
775 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
776 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
777 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
778 # endif
779
780 # if defined(VGA_arm)
781 /* arm guest_state VFP regs must be 8 byte aligned for
782 loads/stores. Let's use 16 just to be on the safe side. */
783 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
784 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
785 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
786 /* be extra paranoid .. */
787 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
788 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
789 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
790 # endif
791
792 # if defined(VGA_arm64)
793 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
794 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
795 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
796 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
797 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
798 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
799 # endif
800
801 # if defined(VGA_s390x)
802 /* no special requirements */
803 # endif
804
805 # if defined(VGA_mips32) || defined(VGA_mips64)
806 /* no special requirements */
807 # endif
808 }
809
810 // NO_VGDB_POLL value ensures vgdb is not polled, while
811 // VGDB_POLL_ASAP ensures that the next scheduler call
812 // will cause a poll.
813 #define NO_VGDB_POLL 0xffffffffffffffffULL
814 #define VGDB_POLL_ASAP 0x0ULL
815
VG_(disable_vgdb_poll)816 void VG_(disable_vgdb_poll) (void )
817 {
818 vgdb_next_poll = NO_VGDB_POLL;
819 }
VG_(force_vgdb_poll)820 void VG_(force_vgdb_poll) ( void )
821 {
822 vgdb_next_poll = VGDB_POLL_ASAP;
823 }
824
825 /* Run the thread tid for a while, and return a VG_TRC_* value
826 indicating why VG_(disp_run_translations) stopped, and possibly an
827 auxiliary word. Also, only allow the thread to run for at most
828 *dispatchCtrP events. If (as is the normal case) use_alt_host_addr
829 is False, we are running ordinary redir'd translations, and we
830 should therefore start by looking up the guest next IP in TT. If
831 it is True then we ignore the guest next IP and just run from
832 alt_host_addr, which presumably points at host code for a no-redir
833 translation.
834
835 Return results are placed in two_words. two_words[0] is set to the
836 TRC. In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
837 the address to patch is placed in two_words[1].
838 */
839 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)840 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
841 /*MOD*/Int* dispatchCtrP,
842 ThreadId tid,
843 HWord alt_host_addr,
844 Bool use_alt_host_addr )
845 {
846 volatile HWord jumped = 0;
847 volatile ThreadState* tst = NULL; /* stop gcc complaining */
848 volatile Int done_this_time = 0;
849 volatile HWord host_code_addr = 0;
850
851 /* Paranoia */
852 vg_assert(VG_(is_valid_tid)(tid));
853 vg_assert(VG_(is_running_thread)(tid));
854 vg_assert(!VG_(is_exiting)(tid));
855 vg_assert(*dispatchCtrP > 0);
856
857 tst = VG_(get_ThreadState)(tid);
858 do_pre_run_checks( tst );
859 /* end Paranoia */
860
861 /* Futz with the XIndir stats counters. */
862 vg_assert(VG_(stats__n_xindirs_32) == 0);
863 vg_assert(VG_(stats__n_xindir_misses_32) == 0);
864
865 /* Clear return area. */
866 two_words[0] = two_words[1] = 0;
867
868 /* Figure out where we're starting from. */
869 if (use_alt_host_addr) {
870 /* unusual case -- no-redir translation */
871 host_code_addr = alt_host_addr;
872 } else {
873 /* normal case -- redir translation */
874 UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
875 if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
876 host_code_addr = VG_(tt_fast)[cno].host;
877 else {
878 Addr res = 0;
879 /* not found in VG_(tt_fast). Searching here the transtab
880 improves the performance compared to returning directly
881 to the scheduler. */
882 Bool found = VG_(search_transtab)(&res, NULL, NULL,
883 (Addr)tst->arch.vex.VG_INSTR_PTR,
884 True/*upd cache*/
885 );
886 if (LIKELY(found)) {
887 host_code_addr = res;
888 } else {
889 /* At this point, we know that we intended to start at a
890 normal redir translation, but it was not found. In
891 which case we can return now claiming it's not
892 findable. */
893 two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
894 return;
895 }
896 }
897 }
898 /* We have either a no-redir or a redir translation. */
899 vg_assert(host_code_addr != 0); /* implausible */
900
901 /* there should be no undealt-with signals */
902 //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
903
904 /* Set up event counter stuff for the run. */
905 tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
906 tst->arch.vex.host_EvC_FAILADDR
907 = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
908
909 if (0) {
910 vki_sigset_t m;
911 Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
912 vg_assert(err == 0);
913 VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
914 for (i = 1; i <= _VKI_NSIG; i++)
915 if (!VG_(sigismember)(&m, i))
916 VG_(printf)("%d ", i);
917 VG_(printf)("\n");
918 }
919
920 /* Set up return-value area. */
921
922 // Tell the tool this thread is about to run client code
923 VG_TRACK( start_client_code, tid, bbs_done );
924
925 vg_assert(VG_(in_generated_code) == False);
926 VG_(in_generated_code) = True;
927
928 SCHEDSETJMP(
929 tid,
930 jumped,
931 VG_(disp_run_translations)(
932 two_words,
933 (volatile void*)&tst->arch.vex,
934 host_code_addr
935 )
936 );
937
938 vg_assert(VG_(in_generated_code) == True);
939 VG_(in_generated_code) = False;
940
941 if (jumped != (HWord)0) {
942 /* We get here if the client took a fault that caused our signal
943 handler to longjmp. */
944 vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
945 two_words[0] = VG_TRC_FAULT_SIGNAL;
946 two_words[1] = 0;
947 block_signals();
948 }
949
950 /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
951 and zero out the 32-bit ones in preparation for the next run of
952 generated code. */
953 stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
954 VG_(stats__n_xindirs_32) = 0;
955 stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
956 VG_(stats__n_xindir_misses_32) = 0;
957
958 /* Inspect the event counter. */
959 vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
960 vg_assert(tst->arch.vex.host_EvC_FAILADDR
961 == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
962
963 /* The number of events done this time is the difference between
964 the event counter originally and what it is now. Except -- if
965 it has gone negative (to -1) then the transition 0 to -1 doesn't
966 correspond to a real executed block, so back it out. It's like
967 this because the event checks decrement the counter first and
968 check it for negativeness second, hence the 0 to -1 transition
969 causes a bailout and the block it happens in isn't executed. */
970 {
971 Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
972 done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
973 if (dispatchCtrAfterwards == -1) {
974 done_this_time--;
975 } else {
976 /* If the generated code drives the counter below -1, something
977 is seriously wrong. */
978 vg_assert(dispatchCtrAfterwards >= 0);
979 }
980 }
981
982 vg_assert(done_this_time >= 0);
983 bbs_done += (ULong)done_this_time;
984
985 *dispatchCtrP -= done_this_time;
986 vg_assert(*dispatchCtrP >= 0);
987
988 // Tell the tool this thread has stopped running client code
989 VG_TRACK( stop_client_code, tid, bbs_done );
990
991 if (bbs_done >= vgdb_next_poll) {
992 if (VG_(clo_vgdb_poll))
993 vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
994 else
995 /* value was changed due to gdbserver invocation via ptrace */
996 vgdb_next_poll = NO_VGDB_POLL;
997 if (VG_(gdbserver_activity) (tid))
998 VG_(gdbserver) (tid);
999 }
1000
1001 /* TRC value and possible auxiliary patch-address word are already
1002 in two_words[0] and [1] respectively, as a result of the call to
1003 VG_(run_innerloop). */
1004 /* Stay sane .. */
1005 if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1006 || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1007 vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1008 } else {
1009 vg_assert(two_words[1] == 0); /* nobody messed with it */
1010 }
1011 }
1012
1013
1014 /* ---------------------------------------------------------------------
1015 The scheduler proper.
1016 ------------------------------------------------------------------ */
1017
handle_tt_miss(ThreadId tid)1018 static void handle_tt_miss ( ThreadId tid )
1019 {
1020 Bool found;
1021 Addr ip = VG_(get_IP)(tid);
1022
1023 /* Trivial event. Miss in the fast-cache. Do a full
1024 lookup for it. */
1025 found = VG_(search_transtab)( NULL, NULL, NULL,
1026 ip, True/*upd_fast_cache*/ );
1027 if (UNLIKELY(!found)) {
1028 /* Not found; we need to request a translation. */
1029 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1030 bbs_done, True/*allow redirection*/ )) {
1031 found = VG_(search_transtab)( NULL, NULL, NULL,
1032 ip, True );
1033 vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1034
1035 } else {
1036 // If VG_(translate)() fails, it's because it had to throw a
1037 // signal because the client jumped to a bad address. That
1038 // means that either a signal has been set up for delivery,
1039 // or the thread has been marked for termination. Either
1040 // way, we just need to go back into the scheduler loop.
1041 }
1042 }
1043 }
1044
1045 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1046 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1047 {
1048 Bool found = False;
1049 Addr ip = VG_(get_IP)(tid);
1050 SECno to_sNo = INV_SNO;
1051 TTEno to_tteNo = INV_TTE;
1052
1053 found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1054 ip, False/*dont_upd_fast_cache*/ );
1055 if (!found) {
1056 /* Not found; we need to request a translation. */
1057 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1058 bbs_done, True/*allow redirection*/ )) {
1059 found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1060 ip, False );
1061 vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1062 } else {
1063 // If VG_(translate)() fails, it's because it had to throw a
1064 // signal because the client jumped to a bad address. That
1065 // means that either a signal has been set up for delivery,
1066 // or the thread has been marked for termination. Either
1067 // way, we just need to go back into the scheduler loop.
1068 return;
1069 }
1070 }
1071 vg_assert(found);
1072 vg_assert(to_sNo != INV_SNO);
1073 vg_assert(to_tteNo != INV_TTE);
1074
1075 /* So, finally we know where to patch through to. Do the patching
1076 and update the various admin tables that allow it to be undone
1077 in the case that the destination block gets deleted. */
1078 VG_(tt_tc_do_chaining)( place_to_chain,
1079 to_sNo, to_tteNo, toFastEP );
1080 }
1081
handle_syscall(ThreadId tid,UInt trc)1082 static void handle_syscall(ThreadId tid, UInt trc)
1083 {
1084 ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1085 volatile UWord jumped;
1086
1087 /* Syscall may or may not block; either way, it will be
1088 complete by the time this call returns, and we'll be
1089 runnable again. We could take a signal while the
1090 syscall runs. */
1091
1092 if (VG_(clo_sanity_level) >= 3) {
1093 HChar buf[50]; // large enough
1094 VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %d)", tid);
1095 Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1096 vg_assert(ok);
1097 }
1098
1099 SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1100
1101 if (VG_(clo_sanity_level) >= 3) {
1102 HChar buf[50]; // large enough
1103 VG_(sprintf)(buf, "(AFTER SYSCALL, tid %d)", tid);
1104 Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1105 vg_assert(ok);
1106 }
1107
1108 if (!VG_(is_running_thread)(tid))
1109 VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
1110 tid, VG_(running_tid), tid, tst->status);
1111 vg_assert(VG_(is_running_thread)(tid));
1112
1113 if (jumped != (UWord)0) {
1114 block_signals();
1115 VG_(poll_signals)(tid);
1116 }
1117 }
1118
1119 /* tid just requested a jump to the noredir version of its current
1120 program counter. So make up that translation if needed, run it,
1121 and return the resulting thread return code in two_words[]. */
1122 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1123 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1124 /*MOD*/Int* dispatchCtrP,
1125 ThreadId tid )
1126 {
1127 /* Clear return area. */
1128 two_words[0] = two_words[1] = 0;
1129
1130 Addr hcode = 0;
1131 Addr ip = VG_(get_IP)(tid);
1132
1133 Bool found = VG_(search_unredir_transtab)( &hcode, ip );
1134 if (!found) {
1135 /* Not found; we need to request a translation. */
1136 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1137 False/*NO REDIRECTION*/ )) {
1138
1139 found = VG_(search_unredir_transtab)( &hcode, ip );
1140 vg_assert2(found, "unredir translation missing after creation?!");
1141 } else {
1142 // If VG_(translate)() fails, it's because it had to throw a
1143 // signal because the client jumped to a bad address. That
1144 // means that either a signal has been set up for delivery,
1145 // or the thread has been marked for termination. Either
1146 // way, we just need to go back into the scheduler loop.
1147 two_words[0] = VG_TRC_BORING;
1148 return;
1149 }
1150
1151 }
1152
1153 vg_assert(found);
1154 vg_assert(hcode != 0);
1155
1156 /* Otherwise run it and return the resulting VG_TRC_* value. */
1157 vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1158 run_thread_for_a_while( two_words, dispatchCtrP, tid,
1159 hcode, True/*use hcode*/ );
1160 }
1161
1162
1163 /*
1164 Run a thread until it wants to exit.
1165
1166 We assume that the caller has already called VG_(acquire_BigLock) for
1167 us, so we own the VCPU. Also, all signals are blocked.
1168 */
VG_(scheduler)1169 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1170 {
1171 /* Holds the remaining size of this thread's "timeslice". */
1172 Int dispatch_ctr = 0;
1173
1174 ThreadState *tst = VG_(get_ThreadState)(tid);
1175 static Bool vgdb_startup_action_done = False;
1176
1177 if (VG_(clo_trace_sched))
1178 print_sched_event(tid, "entering VG_(scheduler)");
1179
1180 /* Do vgdb initialization (but once). Only the first (main) task
1181 starting up will do the below.
1182 Initialize gdbserver earlier than at the first
1183 thread VG_(scheduler) is causing problems:
1184 * at the end of VG_(scheduler_init_phase2) :
1185 The main thread is in VgTs_Init state, but in a not yet
1186 consistent state => the thread cannot be reported to gdb
1187 (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1188 back the guest registers to gdb).
1189 * at end of valgrind_main, just
1190 before VG_(main_thread_wrapper_NORETURN)(1) :
1191 The main thread is still in VgTs_Init state but in a
1192 more advanced state. However, the thread state is not yet
1193 completely initialized : a.o., the os_state is not yet fully
1194 set => the thread is then not properly reported to gdb,
1195 which is then confused (causing e.g. a duplicate thread be
1196 shown, without thread id).
1197 * it would be possible to initialize gdbserver "lower" in the
1198 call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1199 these are platform dependent and the place at which
1200 the thread state is completely initialized is not
1201 specific anymore to the main thread (so a similar "do it only
1202 once" would be needed).
1203
1204 => a "once only" initialization here is the best compromise. */
1205 if (!vgdb_startup_action_done) {
1206 vg_assert(tid == 1); // it must be the main thread.
1207 vgdb_startup_action_done = True;
1208 if (VG_(clo_vgdb) != Vg_VgdbNo) {
1209 /* If we have to poll, ensures we do an initial poll at first
1210 scheduler call. Otherwise, ensure no poll (unless interrupted
1211 by ptrace). */
1212 if (VG_(clo_vgdb_poll))
1213 VG_(force_vgdb_poll) ();
1214 else
1215 VG_(disable_vgdb_poll) ();
1216
1217 vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1218 /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1219 changed yet. */
1220
1221 VG_(gdbserver_prerun_action) (1);
1222 } else {
1223 VG_(disable_vgdb_poll) ();
1224 }
1225 }
1226
1227 if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1228 && tid != 1) {
1229 /* We disable the stack cache the first time we see a thread other
1230 than the main thread appearing. At this moment, we are sure the pthread
1231 lib loading is done/variable was initialised by pthread lib/... */
1232 if (VG_(client__stack_cache_actsize__addr)) {
1233 if (*VG_(client__stack_cache_actsize__addr) == 0) {
1234 VG_(debugLog)(1,"sched",
1235 "pthread stack cache size disable done"
1236 " via kludge\n");
1237 *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1238 /* Set a value big enough to be above the hardcoded maximum stack
1239 cache size in glibc, small enough to allow a pthread stack size
1240 to be added without risk of overflow. */
1241 }
1242 } else {
1243 VG_(debugLog)(0,"sched",
1244 "WARNING: pthread stack cache cannot be disabled!\n");
1245 VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1246 /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1247 to avoid having a msg for all following threads. */
1248 }
1249 }
1250
1251 /* set the proper running signal mask */
1252 block_signals();
1253
1254 vg_assert(VG_(is_running_thread)(tid));
1255
1256 dispatch_ctr = SCHEDULING_QUANTUM;
1257
1258 while (!VG_(is_exiting)(tid)) {
1259
1260 vg_assert(dispatch_ctr >= 0);
1261 if (dispatch_ctr == 0) {
1262
1263 /* Our slice is done, so yield the CPU to another thread. On
1264 Linux, this doesn't sleep between sleeping and running,
1265 since that would take too much time. */
1266
1267 /* 4 July 06: it seems that a zero-length nsleep is needed to
1268 cause async thread cancellation (canceller.c) to terminate
1269 in finite time; else it is in some kind of race/starvation
1270 situation and completion is arbitrarily delayed (although
1271 this is not a deadlock).
1272
1273 Unfortunately these sleeps cause MPI jobs not to terminate
1274 sometimes (some kind of livelock). So sleeping once
1275 every N opportunities appears to work. */
1276
1277 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1278 sys_yield also helps the problem, whilst not crashing apps. */
1279
1280 VG_(release_BigLock)(tid, VgTs_Yielding,
1281 "VG_(scheduler):timeslice");
1282 /* ------------ now we don't have The Lock ------------ */
1283
1284 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1285 /* ------------ now we do have The Lock ------------ */
1286
1287 /* OK, do some relatively expensive housekeeping stuff */
1288 scheduler_sanity(tid);
1289 VG_(sanity_check_general)(False);
1290
1291 /* Look for any pending signals for this thread, and set them up
1292 for delivery */
1293 VG_(poll_signals)(tid);
1294
1295 if (VG_(is_exiting)(tid))
1296 break; /* poll_signals picked up a fatal signal */
1297
1298 /* For stats purposes only. */
1299 n_scheduling_events_MAJOR++;
1300
1301 /* Figure out how many bbs to ask vg_run_innerloop to do. */
1302 dispatch_ctr = SCHEDULING_QUANTUM;
1303
1304 /* paranoia ... */
1305 vg_assert(tst->tid == tid);
1306 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1307 }
1308
1309 /* For stats purposes only. */
1310 n_scheduling_events_MINOR++;
1311
1312 if (0)
1313 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1314 tid, dispatch_ctr - 1 );
1315
1316 HWord trc[2]; /* "two_words" */
1317 run_thread_for_a_while( &trc[0],
1318 &dispatch_ctr,
1319 tid, 0/*ignored*/, False );
1320
1321 if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1322 const HChar *name = name_of_sched_event(trc[0]);
1323 HChar buf[VG_(strlen)(name) + 10]; // large enough
1324 VG_(sprintf)(buf, "TRC: %s", name);
1325 print_sched_event(tid, buf);
1326 }
1327
1328 if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1329 /* If we got a request to run a no-redir version of
1330 something, do so now -- handle_noredir_jump just (creates
1331 and) runs that one translation. The flip side is that the
1332 noredir translation can't itself return another noredir
1333 request -- that would be nonsensical. It can, however,
1334 return VG_TRC_BORING, which just means keep going as
1335 normal. */
1336 /* Note that the fact that we need to continue with a
1337 no-redir jump is not recorded anywhere else in this
1338 thread's state. So we *must* execute the block right now
1339 -- we can't fail to execute it and later resume with it,
1340 because by then we'll have forgotten the fact that it
1341 should be run as no-redir, but will get run as a normal
1342 potentially-redir'd, hence screwing up. This really ought
1343 to be cleaned up, by noting in the guest state that the
1344 next block to be executed should be no-redir. Then we can
1345 suspend and resume at any point, which isn't the case at
1346 the moment. */
1347 /* We can't enter a no-redir translation with the dispatch
1348 ctr set to zero, for the reasons commented just above --
1349 we need to force it to execute right now. So, if the
1350 dispatch ctr is zero, set it to one. Note that this would
1351 have the bad side effect of holding the Big Lock arbitrary
1352 long should there be an arbitrarily long sequence of
1353 back-to-back no-redir translations to run. But we assert
1354 just below that this translation cannot request another
1355 no-redir jump, so we should be safe against that. */
1356 if (dispatch_ctr == 0) {
1357 dispatch_ctr = 1;
1358 }
1359 handle_noredir_jump( &trc[0],
1360 &dispatch_ctr,
1361 tid );
1362 vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1363
1364 /* This can't be allowed to happen, since it means the block
1365 didn't execute, and we have no way to resume-as-noredir
1366 after we get more timeslice. But I don't think it ever
1367 can, since handle_noredir_jump will assert if the counter
1368 is zero on entry. */
1369 vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1370 /* This asserts the same thing. */
1371 vg_assert(dispatch_ctr >= 0);
1372
1373 /* A no-redir translation can't return with a chain-me
1374 request, since chaining in the no-redir cache is too
1375 complex. */
1376 vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1377 && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1378 }
1379
1380 switch (trc[0]) {
1381 case VEX_TRC_JMP_BORING:
1382 /* assisted dispatch, no event. Used by no-redir
1383 translations to force return to the scheduler. */
1384 case VG_TRC_BORING:
1385 /* no special event, just keep going. */
1386 break;
1387
1388 case VG_TRC_INNER_FASTMISS:
1389 vg_assert(dispatch_ctr >= 0);
1390 handle_tt_miss(tid);
1391 break;
1392
1393 case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1394 if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1395 handle_chain_me(tid, (void*)trc[1], False);
1396 break;
1397 }
1398
1399 case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1400 if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1401 handle_chain_me(tid, (void*)trc[1], True);
1402 break;
1403 }
1404
1405 case VEX_TRC_JMP_CLIENTREQ:
1406 do_client_request(tid);
1407 break;
1408
1409 case VEX_TRC_JMP_SYS_INT128: /* x86-linux */
1410 case VEX_TRC_JMP_SYS_INT129: /* x86-darwin */
1411 case VEX_TRC_JMP_SYS_INT130: /* x86-darwin */
1412 case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1413 handle_syscall(tid, trc[0]);
1414 if (VG_(clo_sanity_level) > 2)
1415 VG_(sanity_check_general)(True); /* sanity-check every syscall */
1416 break;
1417
1418 case VEX_TRC_JMP_YIELD:
1419 /* Explicit yield, because this thread is in a spin-lock
1420 or something. Only let the thread run for a short while
1421 longer. Because swapping to another thread is expensive,
1422 we're prepared to let this thread eat a little more CPU
1423 before swapping to another. That means that short term
1424 spins waiting for hardware to poke memory won't cause a
1425 thread swap. */
1426 if (dispatch_ctr > 1000)
1427 dispatch_ctr = 1000;
1428 break;
1429
1430 case VG_TRC_INNER_COUNTERZERO:
1431 /* Timeslice is out. Let a new thread be scheduled. */
1432 vg_assert(dispatch_ctr == 0);
1433 break;
1434
1435 case VG_TRC_FAULT_SIGNAL:
1436 /* Everything should be set up (either we're exiting, or
1437 about to start in a signal handler). */
1438 break;
1439
1440 case VEX_TRC_JMP_MAPFAIL:
1441 /* Failure of arch-specific address translation (x86/amd64
1442 segment override use) */
1443 /* jrs 2005 03 11: is this correct? */
1444 VG_(synth_fault)(tid);
1445 break;
1446
1447 case VEX_TRC_JMP_EMWARN: {
1448 static Int counts[EmNote_NUMBER];
1449 static Bool counts_initted = False;
1450 VexEmNote ew;
1451 const HChar* what;
1452 Bool show;
1453 Int q;
1454 if (!counts_initted) {
1455 counts_initted = True;
1456 for (q = 0; q < EmNote_NUMBER; q++)
1457 counts[q] = 0;
1458 }
1459 ew = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1460 what = (ew < 0 || ew >= EmNote_NUMBER)
1461 ? "unknown (?!)"
1462 : LibVEX_EmNote_string(ew);
1463 show = (ew < 0 || ew >= EmNote_NUMBER)
1464 ? True
1465 : counts[ew]++ < 3;
1466 if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1467 VG_(message)( Vg_UserMsg,
1468 "Emulation warning: unsupported action:\n");
1469 VG_(message)( Vg_UserMsg, " %s\n", what);
1470 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1471 }
1472 break;
1473 }
1474
1475 case VEX_TRC_JMP_EMFAIL: {
1476 VexEmNote ew;
1477 const HChar* what;
1478 ew = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1479 what = (ew < 0 || ew >= EmNote_NUMBER)
1480 ? "unknown (?!)"
1481 : LibVEX_EmNote_string(ew);
1482 VG_(message)( Vg_UserMsg,
1483 "Emulation fatal error -- Valgrind cannot continue:\n");
1484 VG_(message)( Vg_UserMsg, " %s\n", what);
1485 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1486 VG_(message)(Vg_UserMsg, "\n");
1487 VG_(message)(Vg_UserMsg, "Valgrind has to exit now. Sorry.\n");
1488 VG_(message)(Vg_UserMsg, "\n");
1489 VG_(exit)(1);
1490 break;
1491 }
1492
1493 case VEX_TRC_JMP_SIGILL:
1494 VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1495 break;
1496
1497 case VEX_TRC_JMP_SIGTRAP:
1498 VG_(synth_sigtrap)(tid);
1499 break;
1500
1501 case VEX_TRC_JMP_SIGSEGV:
1502 VG_(synth_fault)(tid);
1503 break;
1504
1505 case VEX_TRC_JMP_SIGBUS:
1506 VG_(synth_sigbus)(tid);
1507 break;
1508
1509 case VEX_TRC_JMP_SIGFPE_INTDIV:
1510 VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1511 break;
1512
1513 case VEX_TRC_JMP_SIGFPE_INTOVF:
1514 VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1515 break;
1516
1517 case VEX_TRC_JMP_NODECODE: {
1518 Addr addr = VG_(get_IP)(tid);
1519
1520 if (VG_(clo_sigill_diag)) {
1521 VG_(umsg)(
1522 "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1523 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1524 # define M(a) VG_(umsg)(a "\n");
1525 M("Your program just tried to execute an instruction that Valgrind" );
1526 M("did not recognise. There are two possible reasons for this." );
1527 M("1. Your program has a bug and erroneously jumped to a non-code" );
1528 M(" location. If you are running Memcheck and you just saw a" );
1529 M(" warning about a bad jump, it's probably your program's fault.");
1530 M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1531 M(" i.e. it's Valgrind's fault. If you think this is the case or");
1532 M(" you are not sure, please let us know and we'll try to fix it.");
1533 M("Either way, Valgrind will now raise a SIGILL signal which will" );
1534 M("probably kill your program." );
1535 # undef M
1536 }
1537 # if defined(VGA_s390x)
1538 /* Now that the complaint is out we need to adjust the guest_IA. The
1539 reason is that -- after raising the exception -- execution will
1540 continue with the insn that follows the invalid insn. As the first
1541 2 bits of the invalid insn determine its length in the usual way,
1542 we can compute the address of the next insn here and adjust the
1543 guest_IA accordingly. This adjustment is essential and tested by
1544 none/tests/s390x/op_exception.c (which would loop forever
1545 otherwise) */
1546 UChar byte = ((UChar *)addr)[0];
1547 UInt insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1548 Addr next_insn_addr = addr + insn_length;
1549 VG_(set_IP)(tid, next_insn_addr);
1550 # endif
1551 VG_(synth_sigill)(tid, addr);
1552 break;
1553 }
1554
1555 case VEX_TRC_JMP_INVALICACHE:
1556 VG_(discard_translations)(
1557 (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1558 VG_(threads)[tid].arch.vex.guest_CMLEN,
1559 "scheduler(VEX_TRC_JMP_INVALICACHE)"
1560 );
1561 if (0)
1562 VG_(printf)("dump translations done.\n");
1563 break;
1564
1565 case VEX_TRC_JMP_FLUSHDCACHE: {
1566 void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
1567 SizeT len = VG_(threads)[tid].arch.vex.guest_CMLEN;
1568 VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1569 VG_(flush_dcache)(start, len);
1570 break;
1571 }
1572
1573 case VG_TRC_INVARIANT_FAILED:
1574 /* This typically happens if, after running generated code,
1575 it is detected that host CPU settings (eg, FPU/Vector
1576 control words) are not as they should be. Vex's code
1577 generation specifies the state such control words should
1578 be in on entry to Vex-generated code, and they should be
1579 unchanged on exit from it. Failure of this assertion
1580 usually means a bug in Vex's code generation. */
1581 //{ UInt xx;
1582 // __asm__ __volatile__ (
1583 // "\t.word 0xEEF12A10\n" // fmrx r2,fpscr
1584 // "\tmov %0, r2" : "=r"(xx) : : "r2" );
1585 // VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1586 //}
1587 vg_assert2(0, "VG_(scheduler), phase 3: "
1588 "run_innerloop detected host "
1589 "state invariant failure", trc);
1590
1591 case VEX_TRC_JMP_SYS_SYSENTER:
1592 /* Do whatever simulation is appropriate for an x86 sysenter
1593 instruction. Note that it is critical to set this thread's
1594 guest_EIP to point at the code to execute after the
1595 sysenter, since Vex-generated code will not have set it --
1596 vex does not know what it should be. Vex sets the next
1597 address to zero, so if you don't set guest_EIP, the thread
1598 will jump to zero afterwards and probably die as a result. */
1599 # if defined(VGP_x86_linux)
1600 vg_assert2(0, "VG_(scheduler), phase 3: "
1601 "sysenter_x86 on x86-linux is not supported");
1602 # elif defined(VGP_x86_darwin)
1603 /* return address in client edx */
1604 VG_(threads)[tid].arch.vex.guest_EIP
1605 = VG_(threads)[tid].arch.vex.guest_EDX;
1606 handle_syscall(tid, trc[0]);
1607 # else
1608 vg_assert2(0, "VG_(scheduler), phase 3: "
1609 "sysenter_x86 on non-x86 platform?!?!");
1610 # endif
1611 break;
1612
1613 default:
1614 vg_assert2(0, "VG_(scheduler), phase 3: "
1615 "unexpected thread return code (%u)", trc[0]);
1616 /* NOTREACHED */
1617 break;
1618
1619 } /* switch (trc) */
1620
1621 if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1622 maybe_show_sb_profile();
1623 }
1624
1625 if (VG_(clo_trace_sched))
1626 print_sched_event(tid, "exiting VG_(scheduler)");
1627
1628 vg_assert(VG_(is_exiting)(tid));
1629
1630 return tst->exitreason;
1631 }
1632
1633
1634 /*
1635 This causes all threads to forceably exit. They aren't actually
1636 dead by the time this returns; you need to call
1637 VG_(reap_threads)() to wait for them.
1638 */
VG_(nuke_all_threads_except)1639 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1640 {
1641 ThreadId tid;
1642
1643 vg_assert(VG_(is_running_thread)(me));
1644
1645 for (tid = 1; tid < VG_N_THREADS; tid++) {
1646 if (tid == me
1647 || VG_(threads)[tid].status == VgTs_Empty)
1648 continue;
1649 if (0)
1650 VG_(printf)(
1651 "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1652
1653 VG_(threads)[tid].exitreason = src;
1654 if (src == VgSrc_FatalSig)
1655 VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1656 VG_(get_thread_out_of_syscall)(tid);
1657 }
1658 }
1659
1660
1661 /* ---------------------------------------------------------------------
1662 Specifying shadow register values
1663 ------------------------------------------------------------------ */
1664
1665 #if defined(VGA_x86)
1666 # define VG_CLREQ_ARGS guest_EAX
1667 # define VG_CLREQ_RET guest_EDX
1668 #elif defined(VGA_amd64)
1669 # define VG_CLREQ_ARGS guest_RAX
1670 # define VG_CLREQ_RET guest_RDX
1671 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1672 # define VG_CLREQ_ARGS guest_GPR4
1673 # define VG_CLREQ_RET guest_GPR3
1674 #elif defined(VGA_arm)
1675 # define VG_CLREQ_ARGS guest_R4
1676 # define VG_CLREQ_RET guest_R3
1677 #elif defined(VGA_arm64)
1678 # define VG_CLREQ_ARGS guest_X4
1679 # define VG_CLREQ_RET guest_X3
1680 #elif defined (VGA_s390x)
1681 # define VG_CLREQ_ARGS guest_r2
1682 # define VG_CLREQ_RET guest_r3
1683 #elif defined(VGA_mips32) || defined(VGA_mips64)
1684 # define VG_CLREQ_ARGS guest_r12
1685 # define VG_CLREQ_RET guest_r11
1686 #elif defined(VGA_tilegx)
1687 # define VG_CLREQ_ARGS guest_r12
1688 # define VG_CLREQ_RET guest_r11
1689 #else
1690 # error Unknown arch
1691 #endif
1692
1693 #define CLREQ_ARGS(regs) ((regs).vex.VG_CLREQ_ARGS)
1694 #define CLREQ_RET(regs) ((regs).vex.VG_CLREQ_RET)
1695 #define O_CLREQ_RET (offsetof(VexGuestArchState, VG_CLREQ_RET))
1696
1697 // These macros write a value to a client's thread register, and tell the
1698 // tool that it's happened (if necessary).
1699
1700 #define SET_CLREQ_RETVAL(zztid, zzval) \
1701 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1702 VG_TRACK( post_reg_write, \
1703 Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1704 } while (0)
1705
1706 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1707 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1708 VG_TRACK( post_reg_write_clientcall_return, \
1709 zztid, O_CLREQ_RET, sizeof(UWord), f); \
1710 } while (0)
1711
1712
1713 /* ---------------------------------------------------------------------
1714 Handle client requests.
1715 ------------------------------------------------------------------ */
1716
1717 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1718 static Bool os_client_request(ThreadId tid, UWord *args)
1719 {
1720 Bool handled = True;
1721
1722 vg_assert(VG_(is_running_thread)(tid));
1723
1724 switch(args[0]) {
1725 case VG_USERREQ__LIBC_FREERES_DONE:
1726 /* This is equivalent to an exit() syscall, but we don't set the
1727 exitcode (since it might already be set) */
1728 if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1729 VG_(message)(Vg_DebugMsg,
1730 "__libc_freeres() done; really quitting!\n");
1731 VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1732 break;
1733
1734 default:
1735 handled = False;
1736 break;
1737 }
1738
1739 return handled;
1740 }
1741
1742
1743 /* Write out a client message, possibly including a back trace. Return
1744 the number of characters written. In case of XML output, the format
1745 string as well as any arguments it requires will be XML'ified.
1746 I.e. special characters such as the angle brackets will be translated
1747 into proper escape sequences. */
1748 static
print_client_message(ThreadId tid,const HChar * format,va_list * vargsp,Bool include_backtrace)1749 Int print_client_message( ThreadId tid, const HChar *format,
1750 va_list *vargsp, Bool include_backtrace)
1751 {
1752 Int count;
1753
1754 if (VG_(clo_xml)) {
1755 /* Translate the format string as follows:
1756 < --> <
1757 > --> >
1758 & --> &
1759 %s --> %pS
1760 Yes, yes, it's simplified but in synch with
1761 myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1762 */
1763
1764 /* Allocate a buffer that is for sure large enough. */
1765 HChar xml_format[VG_(strlen)(format) * 5 + 1];
1766
1767 const HChar *p;
1768 HChar *q = xml_format;
1769
1770 for (p = format; *p; ++p) {
1771 switch (*p) {
1772 case '<': VG_(strcpy)(q, "<"); q += 4; break;
1773 case '>': VG_(strcpy)(q, ">"); q += 4; break;
1774 case '&': VG_(strcpy)(q, "&"); q += 5; break;
1775 case '%':
1776 /* Careful: make sure %%s stays %%s */
1777 *q++ = *p++;
1778 if (*p == 's') {
1779 *q++ = 'p';
1780 *q++ = 'S';
1781 } else {
1782 *q++ = *p;
1783 }
1784 break;
1785
1786 default:
1787 *q++ = *p;
1788 break;
1789 }
1790 }
1791 *q = '\0';
1792
1793 VG_(printf_xml)( "<clientmsg>\n" );
1794 VG_(printf_xml)( " <tid>%d</tid>\n", tid );
1795 VG_(printf_xml)( " <text>" );
1796 count = VG_(vprintf_xml)( xml_format, *vargsp );
1797 VG_(printf_xml)( " </text>\n" );
1798 } else {
1799 count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1800 VG_(message_flush)();
1801 }
1802
1803 if (include_backtrace)
1804 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1805
1806 if (VG_(clo_xml))
1807 VG_(printf_xml)( "</clientmsg>\n" );
1808
1809 return count;
1810 }
1811
1812
1813 /* Do a client request for the thread tid. After the request, tid may
1814 or may not still be runnable; if not, the scheduler will have to
1815 choose a new thread to run.
1816 */
1817 static
do_client_request(ThreadId tid)1818 void do_client_request ( ThreadId tid )
1819 {
1820 UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1821 UWord req_no = arg[0];
1822
1823 if (0)
1824 VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1825 switch (req_no) {
1826
1827 case VG_USERREQ__CLIENT_CALL0: {
1828 UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1829 if (f == NULL)
1830 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1831 else
1832 SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1833 break;
1834 }
1835 case VG_USERREQ__CLIENT_CALL1: {
1836 UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1837 if (f == NULL)
1838 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1839 else
1840 SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1841 break;
1842 }
1843 case VG_USERREQ__CLIENT_CALL2: {
1844 UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1845 if (f == NULL)
1846 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1847 else
1848 SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1849 break;
1850 }
1851 case VG_USERREQ__CLIENT_CALL3: {
1852 UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1853 if (f == NULL)
1854 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1855 else
1856 SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1857 break;
1858 }
1859
1860 // Nb: this looks like a circular definition, because it kind of is.
1861 // See comment in valgrind.h to understand what's going on.
1862 case VG_USERREQ__RUNNING_ON_VALGRIND:
1863 SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1864 break;
1865
1866 case VG_USERREQ__PRINTF: {
1867 const HChar* format = (HChar *)arg[1];
1868 /* JRS 2010-Jan-28: this is DEPRECATED; use the
1869 _VALIST_BY_REF version instead */
1870 if (sizeof(va_list) != sizeof(UWord))
1871 goto va_list_casting_error_NORETURN;
1872 union {
1873 va_list vargs;
1874 unsigned long uw;
1875 } u;
1876 u.uw = (unsigned long)arg[2];
1877 Int count =
1878 print_client_message( tid, format, &u.vargs,
1879 /* include_backtrace */ False );
1880 SET_CLREQ_RETVAL( tid, count );
1881 break;
1882 }
1883
1884 case VG_USERREQ__PRINTF_BACKTRACE: {
1885 const HChar* format = (HChar *)arg[1];
1886 /* JRS 2010-Jan-28: this is DEPRECATED; use the
1887 _VALIST_BY_REF version instead */
1888 if (sizeof(va_list) != sizeof(UWord))
1889 goto va_list_casting_error_NORETURN;
1890 union {
1891 va_list vargs;
1892 unsigned long uw;
1893 } u;
1894 u.uw = (unsigned long)arg[2];
1895 Int count =
1896 print_client_message( tid, format, &u.vargs,
1897 /* include_backtrace */ True );
1898 SET_CLREQ_RETVAL( tid, count );
1899 break;
1900 }
1901
1902 case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1903 const HChar* format = (HChar *)arg[1];
1904 va_list* vargsp = (va_list*)arg[2];
1905 Int count =
1906 print_client_message( tid, format, vargsp,
1907 /* include_backtrace */ False );
1908
1909 SET_CLREQ_RETVAL( tid, count );
1910 break;
1911 }
1912
1913 case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1914 const HChar* format = (HChar *)arg[1];
1915 va_list* vargsp = (va_list*)arg[2];
1916 Int count =
1917 print_client_message( tid, format, vargsp,
1918 /* include_backtrace */ True );
1919 SET_CLREQ_RETVAL( tid, count );
1920 break;
1921 }
1922
1923 case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1924 va_list* vargsp = (va_list*)arg[2];
1925 Int count =
1926 VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
1927 VG_(message_flush)();
1928 SET_CLREQ_RETVAL( tid, count );
1929 break;
1930 }
1931
1932 case VG_USERREQ__ADD_IFUNC_TARGET: {
1933 VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1934 SET_CLREQ_RETVAL( tid, 0);
1935 break; }
1936
1937 case VG_USERREQ__STACK_REGISTER: {
1938 UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1939 SET_CLREQ_RETVAL( tid, sid );
1940 break; }
1941
1942 case VG_USERREQ__STACK_DEREGISTER: {
1943 VG_(deregister_stack)(arg[1]);
1944 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1945 break; }
1946
1947 case VG_USERREQ__STACK_CHANGE: {
1948 VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1949 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1950 break; }
1951
1952 case VG_USERREQ__GET_MALLOCFUNCS: {
1953 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1954
1955 info->tl_malloc = VG_(tdict).tool_malloc;
1956 info->tl_calloc = VG_(tdict).tool_calloc;
1957 info->tl_realloc = VG_(tdict).tool_realloc;
1958 info->tl_memalign = VG_(tdict).tool_memalign;
1959 info->tl___builtin_new = VG_(tdict).tool___builtin_new;
1960 info->tl___builtin_vec_new = VG_(tdict).tool___builtin_vec_new;
1961 info->tl_free = VG_(tdict).tool_free;
1962 info->tl___builtin_delete = VG_(tdict).tool___builtin_delete;
1963 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1964 info->tl_malloc_usable_size = VG_(tdict).tool_malloc_usable_size;
1965
1966 info->mallinfo = VG_(mallinfo);
1967 info->clo_trace_malloc = VG_(clo_trace_malloc);
1968
1969 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1970
1971 break;
1972 }
1973
1974 /* Requests from the client program */
1975
1976 case VG_USERREQ__DISCARD_TRANSLATIONS:
1977 if (VG_(clo_verbosity) > 2)
1978 VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1979 " addr %p, len %lu\n",
1980 (void*)arg[1], arg[2] );
1981
1982 VG_(discard_translations)(
1983 arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1984 );
1985
1986 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1987 break;
1988
1989 case VG_USERREQ__COUNT_ERRORS:
1990 SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1991 break;
1992
1993 case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1994 VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1995 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1996 break;
1997
1998 case VG_USERREQ__MAP_IP_TO_SRCLOC: {
1999 Addr ip = arg[1];
2000 HChar* buf64 = (HChar*)arg[2]; // points to a HChar [64] array
2001 const HChar *buf; // points to a string of unknown size
2002
2003 VG_(memset)(buf64, 0, 64);
2004 UInt linenum = 0;
2005 Bool ok = VG_(get_filename_linenum)(
2006 ip, &buf, NULL, &linenum
2007 );
2008 if (ok) {
2009 /* For backward compatibility truncate the filename to
2010 49 characters. */
2011 VG_(strncpy)(buf64, buf, 50);
2012 buf64[49] = '\0';
2013 UInt i;
2014 for (i = 0; i < 50; i++) {
2015 if (buf64[i] == 0)
2016 break;
2017 }
2018 VG_(sprintf)(buf64+i, ":%u", linenum); // safe
2019 } else {
2020 buf64[0] = 0;
2021 }
2022
2023 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2024 break;
2025 }
2026
2027 case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2028 Word delta = arg[1];
2029 vg_assert(delta == 1 || delta == -1);
2030 ThreadState* tst = VG_(get_ThreadState)(tid);
2031 vg_assert(tst);
2032 if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2033 tst->err_disablement_level++;
2034 }
2035 else
2036 if (delta == -1 && tst->err_disablement_level > 0) {
2037 tst->err_disablement_level--;
2038 }
2039 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2040 break;
2041 }
2042
2043 case VG_USERREQ__GDB_MONITOR_COMMAND: {
2044 UWord ret;
2045 ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2046 SET_CLREQ_RETVAL(tid, ret);
2047 break;
2048 }
2049
2050 case VG_USERREQ__MALLOCLIKE_BLOCK:
2051 case VG_USERREQ__RESIZEINPLACE_BLOCK:
2052 case VG_USERREQ__FREELIKE_BLOCK:
2053 // Ignore them if the addr is NULL; otherwise pass onto the tool.
2054 if (!arg[1]) {
2055 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2056 break;
2057 } else {
2058 goto my_default;
2059 }
2060
2061 case VG_USERREQ__VEX_INIT_FOR_IRI:
2062 LibVEX_InitIRI ( (IRICB *)arg[1] );
2063 break;
2064
2065 default:
2066 my_default:
2067 if (os_client_request(tid, arg)) {
2068 // do nothing, os_client_request() handled it
2069 } else if (VG_(needs).client_requests) {
2070 UWord ret;
2071
2072 if (VG_(clo_verbosity) > 2)
2073 VG_(printf)("client request: code %lx, addr %p, len %lu\n",
2074 arg[0], (void*)arg[1], arg[2] );
2075
2076 if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2077 SET_CLREQ_RETVAL(tid, ret);
2078 } else {
2079 static Bool whined = False;
2080
2081 if (!whined && VG_(clo_verbosity) > 2) {
2082 // Allow for requests in core, but defined by tools, which
2083 // have 0 and 0 in their two high bytes.
2084 HChar c1 = (arg[0] >> 24) & 0xff;
2085 HChar c2 = (arg[0] >> 16) & 0xff;
2086 if (c1 == 0) c1 = '_';
2087 if (c2 == 0) c2 = '_';
2088 VG_(message)(Vg_UserMsg, "Warning:\n"
2089 " unhandled client request: 0x%lx (%c%c+0x%lx). Perhaps\n"
2090 " VG_(needs).client_requests should be set?\n",
2091 arg[0], c1, c2, arg[0] & 0xffff);
2092 whined = True;
2093 }
2094 }
2095 break;
2096 }
2097 return;
2098
2099 /*NOTREACHED*/
2100 va_list_casting_error_NORETURN:
2101 VG_(umsg)(
2102 "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2103 "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2104 "on a platform where they cannot be supported. Please use the\n"
2105 "equivalent _VALIST_BY_REF versions instead.\n"
2106 "\n"
2107 "This is a binary-incompatible change in Valgrind's client request\n"
2108 "mechanism. It is unfortunate, but difficult to avoid. End-users\n"
2109 "are expected to almost never see this message. The only case in\n"
2110 "which you might see this message is if your code uses the macros\n"
2111 "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE. If so, you will need\n"
2112 "to recompile such code, using the header files from this version of\n"
2113 "Valgrind, and not any previous version.\n"
2114 "\n"
2115 "If you see this mesage in any other circumstances, it is probably\n"
2116 "a bug in Valgrind. In this case, please file a bug report at\n"
2117 "\n"
2118 " http://www.valgrind.org/support/bug_reports.html\n"
2119 "\n"
2120 "Will now abort.\n"
2121 );
2122 vg_assert(0);
2123 }
2124
2125
2126 /* ---------------------------------------------------------------------
2127 Sanity checking (permanently engaged)
2128 ------------------------------------------------------------------ */
2129
2130 /* Internal consistency checks on the sched structures. */
2131 static
scheduler_sanity(ThreadId tid)2132 void scheduler_sanity ( ThreadId tid )
2133 {
2134 Bool bad = False;
2135 Int lwpid = VG_(gettid)();
2136
2137 if (!VG_(is_running_thread)(tid)) {
2138 VG_(message)(Vg_DebugMsg,
2139 "Thread %d is supposed to be running, "
2140 "but doesn't own the_BigLock (owned by %d)\n",
2141 tid, VG_(running_tid));
2142 bad = True;
2143 }
2144
2145 if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2146 VG_(message)(Vg_DebugMsg,
2147 "Thread %d supposed to be in LWP %d, but we're actually %d\n",
2148 tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2149 bad = True;
2150 }
2151
2152 if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2153 VG_(message)(Vg_DebugMsg,
2154 "Thread (LWPID) %d doesn't own the_BigLock\n",
2155 tid);
2156 bad = True;
2157 }
2158
2159 if (0) {
2160 /* Periodically show the state of all threads, for debugging
2161 purposes. */
2162 static UInt lasttime = 0;
2163 UInt now;
2164 now = VG_(read_millisecond_timer)();
2165 if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2166 lasttime = now;
2167 VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2168 (Int)now);
2169 VG_(show_sched_status)(True, // host_stacktrace
2170 True, // stack_usage
2171 True); // exited_threads);
2172 }
2173 }
2174
2175 /* core_panic also shows the sched status, which is why we don't
2176 show it above if bad==True. */
2177 if (bad)
2178 VG_(core_panic)("scheduler_sanity: failed");
2179 }
2180
VG_(sanity_check_general)2181 void VG_(sanity_check_general) ( Bool force_expensive )
2182 {
2183 ThreadId tid;
2184
2185 static UInt next_slow_check_at = 1;
2186 static UInt slow_check_interval = 25;
2187
2188 if (VG_(clo_sanity_level) < 1) return;
2189
2190 /* --- First do all the tests that we can do quickly. ---*/
2191
2192 sanity_fast_count++;
2193
2194 /* Check stuff pertaining to the memory check system. */
2195
2196 /* Check that nobody has spuriously claimed that the first or
2197 last 16 pages of memory have become accessible [...] */
2198 if (VG_(needs).sanity_checks) {
2199 vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2200 }
2201
2202 /* --- Now some more expensive checks. ---*/
2203
2204 /* Once every now and again, check some more expensive stuff.
2205 Gradually increase the interval between such checks so as not to
2206 burden long-running programs too much. */
2207 if ( force_expensive
2208 || VG_(clo_sanity_level) > 1
2209 || (VG_(clo_sanity_level) == 1
2210 && sanity_fast_count == next_slow_check_at)) {
2211
2212 if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
2213
2214 next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2215 slow_check_interval++;
2216 sanity_slow_count++;
2217
2218 if (VG_(needs).sanity_checks) {
2219 vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2220 }
2221
2222 /* Look for stack overruns. Visit all threads. */
2223 for (tid = 1; tid < VG_N_THREADS; tid++) {
2224 SizeT remains;
2225 VgStack* stack;
2226
2227 if (VG_(threads)[tid].status == VgTs_Empty ||
2228 VG_(threads)[tid].status == VgTs_Zombie)
2229 continue;
2230
2231 stack
2232 = (VgStack*)
2233 VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2234 SizeT limit
2235 = 4096; // Let's say. Checking more causes lots of L2 misses.
2236 remains
2237 = VG_(am_get_VgStack_unused_szB)(stack, limit);
2238 if (remains < limit)
2239 VG_(message)(Vg_DebugMsg,
2240 "WARNING: Thread %d is within %ld bytes "
2241 "of running out of valgrind stack!\n"
2242 "Valgrind stack size can be increased "
2243 "using --valgrind-stacksize=....\n",
2244 tid, remains);
2245 }
2246 }
2247
2248 if (VG_(clo_sanity_level) > 1) {
2249 /* Check sanity of the low-level memory manager. Note that bugs
2250 in the client's code can cause this to fail, so we don't do
2251 this check unless specially asked for. And because it's
2252 potentially very expensive. */
2253 VG_(sanity_check_malloc_all)();
2254 }
2255 }
2256
2257 /*--------------------------------------------------------------------*/
2258 /*--- end ---*/
2259 /*--------------------------------------------------------------------*/
2260