1
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling. scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 /*
32 Overview
33
34 Valgrind tries to emulate the kernel's threading as closely as
35 possible. The client does all threading via the normal syscalls
36 (on Linux: clone, etc). Valgrind emulates this by creating exactly
37 the same process structure as would be created without Valgrind.
38 There are no extra threads.
39
40 The main difference is that Valgrind only allows one client thread
41 to run at once. This is controlled with the CPU Big Lock,
42 "the_BigLock". Any time a thread wants to run client code or
43 manipulate any shared state (which is anything other than its own
44 ThreadState entry), it must hold the_BigLock.
45
46 When a thread is about to block in a blocking syscall, it releases
47 the_BigLock, and re-takes it when it becomes runnable again (either
48 because the syscall finished, or we took a signal).
49
50 VG_(scheduler) therefore runs in each thread. It returns only when
51 the thread is exiting, either because it exited itself, or it was
52 told to exit by another thread.
53
54 This file is almost entirely OS-independent. The details of how
55 the OS handles threading and signalling are abstracted away and
56 implemented elsewhere. [Some of the functions have worked their
57 way back for the moment, until we do an OS port in earnest...]
58 */
59
60
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h" // __NR_sched_yield
65 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
66 #include "pub_core_threadstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h" // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h" // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h" // for VG_(gdbserver) and VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_sbprofile.h"
85 #include "pub_core_signals.h"
86 #include "pub_core_stacks.h"
87 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
88 #include "pub_core_syscall.h"
89 #include "pub_core_syswrap.h"
90 #include "pub_core_tooliface.h"
91 #include "pub_core_translate.h" // For VG_(translate)()
92 #include "pub_core_transtab.h"
93 #include "pub_core_debuginfo.h" // VG_(di_notify_pdb_debuginfo)
94 #include "priv_sched-lock.h"
95 #include "pub_core_scheduler.h" // self
96 #include "pub_core_redir.h"
97 #include "libvex_emnote.h" // VexEmNote
98
99
100 /* ---------------------------------------------------------------------
101 Types and globals for the scheduler.
102 ------------------------------------------------------------------ */
103
104 /* ThreadId and ThreadState are defined elsewhere*/
105
106 /* Defines the thread-scheduling timeslice, in terms of the number of
107 basic blocks we attempt to run each thread for. Smaller values
108 give finer interleaving but much increased scheduling overheads. */
109 #define SCHEDULING_QUANTUM 100000
110
111 /* If False, a fault is Valgrind-internal (ie, a bug) */
112 Bool VG_(in_generated_code) = False;
113
114 /* 64-bit counter for the number of basic blocks done. */
115 static ULong bbs_done = 0;
116
117 /* Counter to see if vgdb activity is to be verified.
118 When nr of bbs done reaches vgdb_next_poll, scheduler will
119 poll for gdbserver activity. VG_(force_vgdb_poll) and
120 VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
121 to control when the next poll will be done. */
122 static ULong vgdb_next_poll;
123
124 /* Forwards */
125 static void do_client_request ( ThreadId tid );
126 static void scheduler_sanity ( ThreadId tid );
127 static void mostly_clear_thread_record ( ThreadId tid );
128
129 /* Stats. */
130 static ULong n_scheduling_events_MINOR = 0;
131 static ULong n_scheduling_events_MAJOR = 0;
132
133 /* Stats: number of XIndirs, and number that missed in the fast
134 cache. */
135 static ULong stats__n_xindirs = 0;
136 static ULong stats__n_xindir_misses = 0;
137
138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
139 have to do 64 bit incs on the hot path through
140 VG_(cp_disp_xindir). */
141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
143
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count = 0;
146 static UInt sanity_slow_count = 0;
147
VG_(print_scheduler_stats)148 void VG_(print_scheduler_stats)(void)
149 {
150 VG_(message)(Vg_DebugMsg,
151 "scheduler: %'llu event checks.\n", bbs_done );
152 VG_(message)(Vg_DebugMsg,
153 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
154 stats__n_xindirs, stats__n_xindir_misses,
155 stats__n_xindirs / (stats__n_xindir_misses
156 ? stats__n_xindir_misses : 1));
157 VG_(message)(Vg_DebugMsg,
158 "scheduler: %'llu/%'llu major/minor sched events.\n",
159 n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
160 VG_(message)(Vg_DebugMsg,
161 " sanity: %d cheap, %d expensive checks.\n",
162 sanity_fast_count, sanity_slow_count );
163 }
164
165 /*
166 * Mutual exclusion object used to serialize threads.
167 */
168 static struct sched_lock *the_BigLock;
169
170
171 /* ---------------------------------------------------------------------
172 Helper functions for the scheduler.
173 ------------------------------------------------------------------ */
174
175 static
print_sched_event(ThreadId tid,const HChar * what)176 void print_sched_event ( ThreadId tid, const HChar* what )
177 {
178 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s\n", tid, what );
179 }
180
181 /* For showing SB profiles, if the user asks to see them. */
182 static
maybe_show_sb_profile(void)183 void maybe_show_sb_profile ( void )
184 {
185 /* DO NOT MAKE NON-STATIC */
186 static ULong bbs_done_lastcheck = 0;
187 /* */
188 vg_assert(VG_(clo_profyle_interval) > 0);
189 Long delta = (Long)(bbs_done - bbs_done_lastcheck);
190 vg_assert(delta >= 0);
191 if ((ULong)delta >= VG_(clo_profyle_interval)) {
192 bbs_done_lastcheck = bbs_done;
193 VG_(get_and_show_SB_profile)(bbs_done);
194 }
195 }
196
197 static
name_of_sched_event(UInt event)198 const HChar* name_of_sched_event ( UInt event )
199 {
200 switch (event) {
201 case VEX_TRC_JMP_INVALICACHE: return "INVALICACHE";
202 case VEX_TRC_JMP_FLUSHDCACHE: return "FLUSHDCACHE";
203 case VEX_TRC_JMP_NOREDIR: return "NOREDIR";
204 case VEX_TRC_JMP_SIGILL: return "SIGILL";
205 case VEX_TRC_JMP_SIGTRAP: return "SIGTRAP";
206 case VEX_TRC_JMP_SIGSEGV: return "SIGSEGV";
207 case VEX_TRC_JMP_SIGBUS: return "SIGBUS";
208 case VEX_TRC_JMP_SIGFPE_INTOVF:
209 case VEX_TRC_JMP_SIGFPE_INTDIV: return "SIGFPE";
210 case VEX_TRC_JMP_EMWARN: return "EMWARN";
211 case VEX_TRC_JMP_EMFAIL: return "EMFAIL";
212 case VEX_TRC_JMP_CLIENTREQ: return "CLIENTREQ";
213 case VEX_TRC_JMP_YIELD: return "YIELD";
214 case VEX_TRC_JMP_NODECODE: return "NODECODE";
215 case VEX_TRC_JMP_MAPFAIL: return "MAPFAIL";
216 case VEX_TRC_JMP_SYS_SYSCALL: return "SYSCALL";
217 case VEX_TRC_JMP_SYS_INT32: return "INT32";
218 case VEX_TRC_JMP_SYS_INT128: return "INT128";
219 case VEX_TRC_JMP_SYS_INT129: return "INT129";
220 case VEX_TRC_JMP_SYS_INT130: return "INT130";
221 case VEX_TRC_JMP_SYS_SYSENTER: return "SYSENTER";
222 case VEX_TRC_JMP_BORING: return "VEX_BORING";
223
224 case VG_TRC_BORING: return "VG_BORING";
225 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
226 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
227 case VG_TRC_FAULT_SIGNAL: return "FAULTSIGNAL";
228 case VG_TRC_INVARIANT_FAILED: return "INVFAILED";
229 case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
230 case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
231 default: return "??UNKNOWN??";
232 }
233 }
234
235 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)236 ThreadId VG_(alloc_ThreadState) ( void )
237 {
238 Int i;
239 for (i = 1; i < VG_N_THREADS; i++) {
240 if (VG_(threads)[i].status == VgTs_Empty) {
241 VG_(threads)[i].status = VgTs_Init;
242 VG_(threads)[i].exitreason = VgSrc_None;
243 if (VG_(threads)[i].thread_name)
244 VG_(arena_free)(VG_AR_CORE, VG_(threads)[i].thread_name);
245 VG_(threads)[i].thread_name = NULL;
246 return i;
247 }
248 }
249 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
250 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
251 VG_(core_panic)("VG_N_THREADS is too low");
252 /*NOTREACHED*/
253 }
254
255 /*
256 Mark a thread as Runnable. This will block until the_BigLock is
257 available, so that we get exclusive access to all the shared
258 structures and the CPU. Up until we get the_BigLock, we must not
259 touch any shared state.
260
261 When this returns, we'll actually be running.
262 */
VG_(acquire_BigLock)263 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
264 {
265 ThreadState *tst;
266
267 #if 0
268 if (VG_(clo_trace_sched)) {
269 HChar buf[100];
270 vg_assert(VG_(strlen)(who) <= 100-50);
271 VG_(sprintf)(buf, "waiting for lock (%s)", who);
272 print_sched_event(tid, buf);
273 }
274 #endif
275
276 /* First, acquire the_BigLock. We can't do anything else safely
277 prior to this point. Even doing debug printing prior to this
278 point is, technically, wrong. */
279 VG_(acquire_BigLock_LL)(NULL);
280
281 tst = VG_(get_ThreadState)(tid);
282
283 vg_assert(tst->status != VgTs_Runnable);
284
285 tst->status = VgTs_Runnable;
286
287 if (VG_(running_tid) != VG_INVALID_THREADID)
288 VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
289 vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
290 VG_(running_tid) = tid;
291
292 { Addr gsp = VG_(get_SP)(tid);
293 if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
294 VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
295 else
296 VG_(unknown_SP_update)(gsp, gsp);
297 }
298
299 if (VG_(clo_trace_sched)) {
300 HChar buf[150];
301 vg_assert(VG_(strlen)(who) <= 150-50);
302 VG_(sprintf)(buf, " acquired lock (%s)", who);
303 print_sched_event(tid, buf);
304 }
305 }
306
307 /*
308 Set a thread into a sleeping state, and give up exclusive access to
309 the CPU. On return, the thread must be prepared to block until it
310 is ready to run again (generally this means blocking in a syscall,
311 but it may mean that we remain in a Runnable state and we're just
312 yielding the CPU to another thread).
313 */
VG_(release_BigLock)314 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
315 const HChar* who)
316 {
317 ThreadState *tst = VG_(get_ThreadState)(tid);
318
319 vg_assert(tst->status == VgTs_Runnable);
320
321 vg_assert(sleepstate == VgTs_WaitSys ||
322 sleepstate == VgTs_Yielding);
323
324 tst->status = sleepstate;
325
326 vg_assert(VG_(running_tid) == tid);
327 VG_(running_tid) = VG_INVALID_THREADID;
328
329 if (VG_(clo_trace_sched)) {
330 HChar buf[200];
331 vg_assert(VG_(strlen)(who) <= 200-100);
332 VG_(sprintf)(buf, "releasing lock (%s) -> %s",
333 who, VG_(name_of_ThreadStatus)(sleepstate));
334 print_sched_event(tid, buf);
335 }
336
337 /* Release the_BigLock; this will reschedule any runnable
338 thread. */
339 VG_(release_BigLock_LL)(NULL);
340 }
341
init_BigLock(void)342 static void init_BigLock(void)
343 {
344 vg_assert(!the_BigLock);
345 the_BigLock = ML_(create_sched_lock)();
346 }
347
deinit_BigLock(void)348 static void deinit_BigLock(void)
349 {
350 ML_(destroy_sched_lock)(the_BigLock);
351 the_BigLock = NULL;
352 }
353
354 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)355 void VG_(acquire_BigLock_LL) ( const HChar* who )
356 {
357 ML_(acquire_sched_lock)(the_BigLock);
358 }
359
360 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)361 void VG_(release_BigLock_LL) ( const HChar* who )
362 {
363 ML_(release_sched_lock)(the_BigLock);
364 }
365
VG_(owns_BigLock_LL)366 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
367 {
368 return (ML_(get_sched_lock_owner)(the_BigLock)
369 == VG_(threads)[tid].os_state.lwpid);
370 }
371
372
373 /* Clear out the ThreadState and release the semaphore. Leaves the
374 ThreadState in VgTs_Zombie state, so that it doesn't get
375 reallocated until the caller is really ready. */
VG_(exit_thread)376 void VG_(exit_thread)(ThreadId tid)
377 {
378 vg_assert(VG_(is_valid_tid)(tid));
379 vg_assert(VG_(is_running_thread)(tid));
380 vg_assert(VG_(is_exiting)(tid));
381
382 mostly_clear_thread_record(tid);
383 VG_(running_tid) = VG_INVALID_THREADID;
384
385 /* There should still be a valid exitreason for this thread */
386 vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
387
388 if (VG_(clo_trace_sched))
389 print_sched_event(tid, "release lock in VG_(exit_thread)");
390
391 VG_(release_BigLock_LL)(NULL);
392 }
393
394 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
395 out of the syscall and onto doing the next thing, whatever that is.
396 If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)397 void VG_(get_thread_out_of_syscall)(ThreadId tid)
398 {
399 vg_assert(VG_(is_valid_tid)(tid));
400 vg_assert(!VG_(is_running_thread)(tid));
401
402 if (VG_(threads)[tid].status == VgTs_WaitSys) {
403 if (VG_(clo_trace_signals)) {
404 VG_(message)(Vg_DebugMsg,
405 "get_thread_out_of_syscall zaps tid %d lwp %d\n",
406 tid, VG_(threads)[tid].os_state.lwpid);
407 }
408 # if defined(VGO_darwin)
409 {
410 // GrP fixme use mach primitives on darwin?
411 // GrP fixme thread_abort_safely?
412 // GrP fixme race for thread with WaitSys set but not in syscall yet?
413 extern kern_return_t thread_abort(mach_port_t);
414 thread_abort(VG_(threads)[tid].os_state.lwpid);
415 }
416 # else
417 {
418 __attribute__((unused))
419 Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
420 /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
421 I'm really not sure. Here's a race scenario which argues
422 that we shoudn't; but equally I'm not sure the scenario is
423 even possible, because of constraints caused by the question
424 of who holds the BigLock when.
425
426 Target thread tid does sys_read on a socket and blocks. This
427 function gets called, and we observe correctly that tid's
428 status is WaitSys but then for whatever reason this function
429 goes very slowly for a while. Then data arrives from
430 wherever, tid's sys_read returns, tid exits. Then we do
431 tkill on tid, but tid no longer exists; tkill returns an
432 error code and the assert fails. */
433 /* vg_assert(r == 0); */
434 }
435 # endif
436 }
437 }
438
439 /*
440 Yield the CPU for a short time to let some other thread run.
441 */
VG_(vg_yield)442 void VG_(vg_yield)(void)
443 {
444 ThreadId tid = VG_(running_tid);
445
446 vg_assert(tid != VG_INVALID_THREADID);
447 vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
448
449 VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
450
451 /*
452 Tell the kernel we're yielding.
453 */
454 VG_(do_syscall0)(__NR_sched_yield);
455
456 VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
457 }
458
459
460 /* Set the standard set of blocked signals, used whenever we're not
461 running a client syscall. */
block_signals(void)462 static void block_signals(void)
463 {
464 vki_sigset_t mask;
465
466 VG_(sigfillset)(&mask);
467
468 /* Don't block these because they're synchronous */
469 VG_(sigdelset)(&mask, VKI_SIGSEGV);
470 VG_(sigdelset)(&mask, VKI_SIGBUS);
471 VG_(sigdelset)(&mask, VKI_SIGFPE);
472 VG_(sigdelset)(&mask, VKI_SIGILL);
473 VG_(sigdelset)(&mask, VKI_SIGTRAP);
474
475 /* Can't block these anyway */
476 VG_(sigdelset)(&mask, VKI_SIGSTOP);
477 VG_(sigdelset)(&mask, VKI_SIGKILL);
478
479 VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
480 }
481
os_state_clear(ThreadState * tst)482 static void os_state_clear(ThreadState *tst)
483 {
484 tst->os_state.lwpid = 0;
485 tst->os_state.threadgroup = 0;
486 # if defined(VGO_linux)
487 /* no other fields to clear */
488 # elif defined(VGO_darwin)
489 tst->os_state.post_mach_trap_fn = NULL;
490 tst->os_state.pthread = 0;
491 tst->os_state.func_arg = 0;
492 VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
493 VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
494 tst->os_state.wq_jmpbuf_valid = False;
495 tst->os_state.remote_port = 0;
496 tst->os_state.msgh_id = 0;
497 VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
498 # else
499 # error "Unknown OS"
500 # endif
501 }
502
os_state_init(ThreadState * tst)503 static void os_state_init(ThreadState *tst)
504 {
505 tst->os_state.valgrind_stack_base = 0;
506 tst->os_state.valgrind_stack_init_SP = 0;
507 os_state_clear(tst);
508 }
509
510 static
mostly_clear_thread_record(ThreadId tid)511 void mostly_clear_thread_record ( ThreadId tid )
512 {
513 vki_sigset_t savedmask;
514
515 vg_assert(tid >= 0 && tid < VG_N_THREADS);
516 VG_(cleanup_thread)(&VG_(threads)[tid].arch);
517 VG_(threads)[tid].tid = tid;
518
519 /* Leave the thread in Zombie, so that it doesn't get reallocated
520 until the caller is finally done with the thread stack. */
521 VG_(threads)[tid].status = VgTs_Zombie;
522
523 VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
524 VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
525
526 os_state_clear(&VG_(threads)[tid]);
527
528 /* start with no altstack */
529 VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
530 VG_(threads)[tid].altstack.ss_size = 0;
531 VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
532
533 VG_(clear_out_queued_signals)(tid, &savedmask);
534
535 VG_(threads)[tid].sched_jmpbuf_valid = False;
536 }
537
538 /*
539 Called in the child after fork. If the parent has multiple
540 threads, then we've inherited a VG_(threads) array describing them,
541 but only the thread which called fork() is actually alive in the
542 child. This functions needs to clean up all those other thread
543 structures.
544
545 Whichever tid in the parent which called fork() becomes the
546 master_tid in the child. That's because the only living slot in
547 VG_(threads) in the child after fork is VG_(threads)[tid], and it
548 would be too hard to try to re-number the thread and relocate the
549 thread state down to VG_(threads)[1].
550
551 This function also needs to reinitialize the_BigLock, since
552 otherwise we may end up sharing its state with the parent, which
553 would be deeply confusing.
554 */
sched_fork_cleanup(ThreadId me)555 static void sched_fork_cleanup(ThreadId me)
556 {
557 ThreadId tid;
558 vg_assert(VG_(running_tid) == me);
559
560 # if defined(VGO_darwin)
561 // GrP fixme hack reset Mach ports
562 VG_(mach_init)();
563 # endif
564
565 VG_(threads)[me].os_state.lwpid = VG_(gettid)();
566 VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
567
568 /* clear out all the unused thread slots */
569 for (tid = 1; tid < VG_N_THREADS; tid++) {
570 if (tid != me) {
571 mostly_clear_thread_record(tid);
572 VG_(threads)[tid].status = VgTs_Empty;
573 VG_(clear_syscallInfo)(tid);
574 }
575 }
576
577 /* re-init and take the sema */
578 deinit_BigLock();
579 init_BigLock();
580 VG_(acquire_BigLock_LL)(NULL);
581 }
582
583
584 /* First phase of initialisation of the scheduler. Initialise the
585 bigLock, zeroise the VG_(threads) structure and decide on the
586 ThreadId of the root thread.
587 */
VG_(scheduler_init_phase1)588 ThreadId VG_(scheduler_init_phase1) ( void )
589 {
590 Int i;
591 ThreadId tid_main;
592
593 VG_(debugLog)(1,"sched","sched_init_phase1\n");
594
595 if (VG_(clo_fair_sched) != disable_fair_sched
596 && !ML_(set_sched_lock_impl)(sched_lock_ticket)
597 && VG_(clo_fair_sched) == enable_fair_sched)
598 {
599 VG_(printf)("Error: fair scheduling is not supported on this system.\n");
600 VG_(exit)(1);
601 }
602
603 if (VG_(clo_verbosity) > 1) {
604 VG_(message)(Vg_DebugMsg,
605 "Scheduler: using %s scheduler lock implementation.\n",
606 ML_(get_sched_lock_name)());
607 }
608
609 init_BigLock();
610
611 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
612 /* Paranoia .. completely zero it out. */
613 VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
614
615 VG_(threads)[i].sig_queue = NULL;
616
617 os_state_init(&VG_(threads)[i]);
618 mostly_clear_thread_record(i);
619
620 VG_(threads)[i].status = VgTs_Empty;
621 VG_(threads)[i].client_stack_szB = 0;
622 VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
623 VG_(threads)[i].err_disablement_level = 0;
624 VG_(threads)[i].thread_name = NULL;
625 }
626
627 tid_main = VG_(alloc_ThreadState)();
628
629 /* Bleh. Unfortunately there are various places in the system that
630 assume that the main thread has a ThreadId of 1.
631 - Helgrind (possibly)
632 - stack overflow message in default_action() in m_signals.c
633 - definitely a lot more places
634 */
635 vg_assert(tid_main == 1);
636
637 return tid_main;
638 }
639
640
641 /* Second phase of initialisation of the scheduler. Given the root
642 ThreadId computed by first phase of initialisation, fill in stack
643 details and acquire bigLock. Initialise the scheduler. This is
644 called at startup. The caller subsequently initialises the guest
645 state components of this main thread.
646 */
VG_(scheduler_init_phase2)647 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
648 Addr clstack_end,
649 SizeT clstack_size )
650 {
651 VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
652 "cls_end=0x%lx, cls_sz=%ld\n",
653 tid_main, clstack_end, clstack_size);
654
655 vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
656 vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
657
658 VG_(threads)[tid_main].client_stack_highest_word
659 = clstack_end + 1 - sizeof(UWord);
660 VG_(threads)[tid_main].client_stack_szB
661 = clstack_size;
662
663 VG_(atfork)(NULL, NULL, sched_fork_cleanup);
664 }
665
666
667 /* ---------------------------------------------------------------------
668 Helpers for running translations.
669 ------------------------------------------------------------------ */
670
671 /* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal
672 mask state, but does need to pass "val" through. jumped must be a
673 volatile UWord. */
674 #define SCHEDSETJMP(tid, jumped, stmt) \
675 do { \
676 ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \
677 \
678 (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf); \
679 if ((jumped) == ((UWord)0)) { \
680 vg_assert(!_qq_tst->sched_jmpbuf_valid); \
681 _qq_tst->sched_jmpbuf_valid = True; \
682 stmt; \
683 } else if (VG_(clo_trace_sched)) \
684 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n", \
685 __LINE__, tid, jumped); \
686 vg_assert(_qq_tst->sched_jmpbuf_valid); \
687 _qq_tst->sched_jmpbuf_valid = False; \
688 } while(0)
689
690
691 /* Do various guest state alignment checks prior to running a thread.
692 Specifically, check that what we have matches Vex's guest state
693 layout requirements. See libvex.h for details, but in short the
694 requirements are: There must be no holes in between the primary
695 guest state, its two copies, and the spill area. In short, all 4
696 areas must have a 16-aligned size and be 16-aligned, and placed
697 back-to-back. */
do_pre_run_checks(ThreadState * tst)698 static void do_pre_run_checks ( ThreadState* tst )
699 {
700 Addr a_vex = (Addr) & tst->arch.vex;
701 Addr a_vexsh1 = (Addr) & tst->arch.vex_shadow1;
702 Addr a_vexsh2 = (Addr) & tst->arch.vex_shadow2;
703 Addr a_spill = (Addr) & tst->arch.vex_spill;
704 UInt sz_vex = (UInt) sizeof tst->arch.vex;
705 UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
706 UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
707 UInt sz_spill = (UInt) sizeof tst->arch.vex_spill;
708
709 if (0)
710 VG_(printf)("gst %p %d, sh1 %p %d, "
711 "sh2 %p %d, spill %p %d\n",
712 (void*)a_vex, sz_vex,
713 (void*)a_vexsh1, sz_vexsh1,
714 (void*)a_vexsh2, sz_vexsh2,
715 (void*)a_spill, sz_spill );
716
717 vg_assert(VG_IS_16_ALIGNED(sz_vex));
718 vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
719 vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
720 vg_assert(VG_IS_16_ALIGNED(sz_spill));
721
722 vg_assert(VG_IS_16_ALIGNED(a_vex));
723 vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
724 vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
725 vg_assert(VG_IS_16_ALIGNED(a_spill));
726
727 /* Check that the guest state and its two shadows have the same
728 size, and that there are no holes in between. The latter is
729 important because Memcheck assumes that it can reliably access
730 the shadows by indexing off a pointer to the start of the
731 primary guest state area. */
732 vg_assert(sz_vex == sz_vexsh1);
733 vg_assert(sz_vex == sz_vexsh2);
734 vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
735 vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
736 /* Also check there's no hole between the second shadow area and
737 the spill area. */
738 vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
739 vg_assert(a_vex + 3 * sz_vex == a_spill);
740
741 # if defined(VGA_x86)
742 /* x86 XMM regs must form an array, ie, have no holes in
743 between. */
744 vg_assert(
745 (offsetof(VexGuestX86State,guest_XMM7)
746 - offsetof(VexGuestX86State,guest_XMM0))
747 == (8/*#regs*/-1) * 16/*bytes per reg*/
748 );
749 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
750 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
751 vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
752 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
753 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
754 # endif
755
756 # if defined(VGA_amd64)
757 /* amd64 YMM regs must form an array, ie, have no holes in
758 between. */
759 vg_assert(
760 (offsetof(VexGuestAMD64State,guest_YMM16)
761 - offsetof(VexGuestAMD64State,guest_YMM0))
762 == (17/*#regs*/-1) * 32/*bytes per reg*/
763 );
764 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
765 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
766 vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
767 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
768 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
769 # endif
770
771 # if defined(VGA_ppc32) || defined(VGA_ppc64)
772 /* ppc guest_state vector regs must be 16 byte aligned for
773 loads/stores. This is important! */
774 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
775 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
776 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
777 /* be extra paranoid .. */
778 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
779 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
780 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
781 # endif
782
783 # if defined(VGA_arm)
784 /* arm guest_state VFP regs must be 8 byte aligned for
785 loads/stores. Let's use 16 just to be on the safe side. */
786 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
787 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
788 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
789 /* be extra paranoid .. */
790 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
791 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
792 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
793 # endif
794
795 # if defined(VGA_arm64)
796 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
797 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
798 vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
799 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
800 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
801 vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
802 # endif
803
804 # if defined(VGA_s390x)
805 /* no special requirements */
806 # endif
807
808 # if defined(VGA_mips32) || defined(VGA_mips64)
809 /* no special requirements */
810 # endif
811 }
812
813 // NO_VGDB_POLL value ensures vgdb is not polled, while
814 // VGDB_POLL_ASAP ensures that the next scheduler call
815 // will cause a poll.
816 #define NO_VGDB_POLL 0xffffffffffffffffULL
817 #define VGDB_POLL_ASAP 0x0ULL
818
VG_(disable_vgdb_poll)819 void VG_(disable_vgdb_poll) (void )
820 {
821 vgdb_next_poll = NO_VGDB_POLL;
822 }
VG_(force_vgdb_poll)823 void VG_(force_vgdb_poll) ( void )
824 {
825 vgdb_next_poll = VGDB_POLL_ASAP;
826 }
827
828 /* Run the thread tid for a while, and return a VG_TRC_* value
829 indicating why VG_(disp_run_translations) stopped, and possibly an
830 auxiliary word. Also, only allow the thread to run for at most
831 *dispatchCtrP events. If (as is the normal case) use_alt_host_addr
832 is False, we are running ordinary redir'd translations, and we
833 should therefore start by looking up the guest next IP in TT. If
834 it is True then we ignore the guest next IP and just run from
835 alt_host_addr, which presumably points at host code for a no-redir
836 translation.
837
838 Return results are placed in two_words. two_words[0] is set to the
839 TRC. In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
840 the address to patch is placed in two_words[1].
841 */
842 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)843 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
844 /*MOD*/Int* dispatchCtrP,
845 ThreadId tid,
846 HWord alt_host_addr,
847 Bool use_alt_host_addr )
848 {
849 volatile HWord jumped = 0;
850 volatile ThreadState* tst = NULL; /* stop gcc complaining */
851 volatile Int done_this_time = 0;
852 volatile HWord host_code_addr = 0;
853
854 /* Paranoia */
855 vg_assert(VG_(is_valid_tid)(tid));
856 vg_assert(VG_(is_running_thread)(tid));
857 vg_assert(!VG_(is_exiting)(tid));
858 vg_assert(*dispatchCtrP > 0);
859
860 tst = VG_(get_ThreadState)(tid);
861 do_pre_run_checks( (ThreadState*)tst );
862 /* end Paranoia */
863
864 /* Futz with the XIndir stats counters. */
865 vg_assert(VG_(stats__n_xindirs_32) == 0);
866 vg_assert(VG_(stats__n_xindir_misses_32) == 0);
867
868 /* Clear return area. */
869 two_words[0] = two_words[1] = 0;
870
871 /* Figure out where we're starting from. */
872 if (use_alt_host_addr) {
873 /* unusual case -- no-redir translation */
874 host_code_addr = alt_host_addr;
875 } else {
876 /* normal case -- redir translation */
877 UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
878 if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
879 host_code_addr = VG_(tt_fast)[cno].host;
880 else {
881 AddrH res = 0;
882 /* not found in VG_(tt_fast). Searching here the transtab
883 improves the performance compared to returning directly
884 to the scheduler. */
885 Bool found = VG_(search_transtab)(&res, NULL, NULL,
886 (Addr)tst->arch.vex.VG_INSTR_PTR,
887 True/*upd cache*/
888 );
889 if (LIKELY(found)) {
890 host_code_addr = res;
891 } else {
892 /* At this point, we know that we intended to start at a
893 normal redir translation, but it was not found. In
894 which case we can return now claiming it's not
895 findable. */
896 two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
897 return;
898 }
899 }
900 }
901 /* We have either a no-redir or a redir translation. */
902 vg_assert(host_code_addr != 0); /* implausible */
903
904 /* there should be no undealt-with signals */
905 //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
906
907 /* Set up event counter stuff for the run. */
908 tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
909 tst->arch.vex.host_EvC_FAILADDR
910 = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
911
912 if (0) {
913 vki_sigset_t m;
914 Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
915 vg_assert(err == 0);
916 VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
917 for (i = 1; i <= _VKI_NSIG; i++)
918 if (!VG_(sigismember)(&m, i))
919 VG_(printf)("%d ", i);
920 VG_(printf)("\n");
921 }
922
923 /* Set up return-value area. */
924
925 // Tell the tool this thread is about to run client code
926 VG_TRACK( start_client_code, tid, bbs_done );
927
928 vg_assert(VG_(in_generated_code) == False);
929 VG_(in_generated_code) = True;
930
931 SCHEDSETJMP(
932 tid,
933 jumped,
934 VG_(disp_run_translations)(
935 two_words,
936 (void*)&tst->arch.vex,
937 host_code_addr
938 )
939 );
940
941 vg_assert(VG_(in_generated_code) == True);
942 VG_(in_generated_code) = False;
943
944 if (jumped != (HWord)0) {
945 /* We get here if the client took a fault that caused our signal
946 handler to longjmp. */
947 vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
948 two_words[0] = VG_TRC_FAULT_SIGNAL;
949 two_words[1] = 0;
950 block_signals();
951 }
952
953 /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
954 and zero out the 32-bit ones in preparation for the next run of
955 generated code. */
956 stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
957 VG_(stats__n_xindirs_32) = 0;
958 stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
959 VG_(stats__n_xindir_misses_32) = 0;
960
961 /* Inspect the event counter. */
962 vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
963 vg_assert(tst->arch.vex.host_EvC_FAILADDR
964 == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
965
966 done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
967
968 vg_assert(done_this_time >= 0);
969 bbs_done += (ULong)done_this_time;
970
971 *dispatchCtrP -= done_this_time;
972 vg_assert(*dispatchCtrP >= 0);
973
974 // Tell the tool this thread has stopped running client code
975 VG_TRACK( stop_client_code, tid, bbs_done );
976
977 if (bbs_done >= vgdb_next_poll) {
978 if (VG_(clo_vgdb_poll))
979 vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
980 else
981 /* value was changed due to gdbserver invocation via ptrace */
982 vgdb_next_poll = NO_VGDB_POLL;
983 if (VG_(gdbserver_activity) (tid))
984 VG_(gdbserver) (tid);
985 }
986
987 /* TRC value and possible auxiliary patch-address word are already
988 in two_words[0] and [1] respectively, as a result of the call to
989 VG_(run_innerloop). */
990 /* Stay sane .. */
991 if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
992 || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
993 vg_assert(two_words[1] != 0); /* we have a legit patch addr */
994 } else {
995 vg_assert(two_words[1] == 0); /* nobody messed with it */
996 }
997 }
998
999
1000 /* ---------------------------------------------------------------------
1001 The scheduler proper.
1002 ------------------------------------------------------------------ */
1003
handle_tt_miss(ThreadId tid)1004 static void handle_tt_miss ( ThreadId tid )
1005 {
1006 Bool found;
1007 Addr ip = VG_(get_IP)(tid);
1008
1009 /* Trivial event. Miss in the fast-cache. Do a full
1010 lookup for it. */
1011 found = VG_(search_transtab)( NULL, NULL, NULL,
1012 ip, True/*upd_fast_cache*/ );
1013 if (UNLIKELY(!found)) {
1014 /* Not found; we need to request a translation. */
1015 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1016 bbs_done, True/*allow redirection*/ )) {
1017 found = VG_(search_transtab)( NULL, NULL, NULL,
1018 ip, True );
1019 vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1020
1021 } else {
1022 // If VG_(translate)() fails, it's because it had to throw a
1023 // signal because the client jumped to a bad address. That
1024 // means that either a signal has been set up for delivery,
1025 // or the thread has been marked for termination. Either
1026 // way, we just need to go back into the scheduler loop.
1027 }
1028 }
1029 }
1030
1031 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1032 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1033 {
1034 Bool found = False;
1035 Addr ip = VG_(get_IP)(tid);
1036 UInt to_sNo = (UInt)-1;
1037 UInt to_tteNo = (UInt)-1;
1038
1039 found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1040 ip, False/*dont_upd_fast_cache*/ );
1041 if (!found) {
1042 /* Not found; we need to request a translation. */
1043 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1044 bbs_done, True/*allow redirection*/ )) {
1045 found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1046 ip, False );
1047 vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1048 } else {
1049 // If VG_(translate)() fails, it's because it had to throw a
1050 // signal because the client jumped to a bad address. That
1051 // means that either a signal has been set up for delivery,
1052 // or the thread has been marked for termination. Either
1053 // way, we just need to go back into the scheduler loop.
1054 return;
1055 }
1056 }
1057 vg_assert(found);
1058 vg_assert(to_sNo != -1);
1059 vg_assert(to_tteNo != -1);
1060
1061 /* So, finally we know where to patch through to. Do the patching
1062 and update the various admin tables that allow it to be undone
1063 in the case that the destination block gets deleted. */
1064 VG_(tt_tc_do_chaining)( place_to_chain,
1065 to_sNo, to_tteNo, toFastEP );
1066 }
1067
handle_syscall(ThreadId tid,UInt trc)1068 static void handle_syscall(ThreadId tid, UInt trc)
1069 {
1070 ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1071 volatile UWord jumped;
1072
1073 /* Syscall may or may not block; either way, it will be
1074 complete by the time this call returns, and we'll be
1075 runnable again. We could take a signal while the
1076 syscall runs. */
1077
1078 if (VG_(clo_sanity_level >= 3))
1079 VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
1080
1081 SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1082
1083 if (VG_(clo_sanity_level >= 3))
1084 VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
1085
1086 if (!VG_(is_running_thread)(tid))
1087 VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
1088 tid, VG_(running_tid), tid, tst->status);
1089 vg_assert(VG_(is_running_thread)(tid));
1090
1091 if (jumped != (UWord)0) {
1092 block_signals();
1093 VG_(poll_signals)(tid);
1094 }
1095 }
1096
1097 /* tid just requested a jump to the noredir version of its current
1098 program counter. So make up that translation if needed, run it,
1099 and return the resulting thread return code in two_words[]. */
1100 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1101 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1102 /*MOD*/Int* dispatchCtrP,
1103 ThreadId tid )
1104 {
1105 /* Clear return area. */
1106 two_words[0] = two_words[1] = 0;
1107
1108 AddrH hcode = 0;
1109 Addr ip = VG_(get_IP)(tid);
1110
1111 Bool found = VG_(search_unredir_transtab)( &hcode, ip );
1112 if (!found) {
1113 /* Not found; we need to request a translation. */
1114 if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1115 False/*NO REDIRECTION*/ )) {
1116
1117 found = VG_(search_unredir_transtab)( &hcode, ip );
1118 vg_assert2(found, "unredir translation missing after creation?!");
1119 } else {
1120 // If VG_(translate)() fails, it's because it had to throw a
1121 // signal because the client jumped to a bad address. That
1122 // means that either a signal has been set up for delivery,
1123 // or the thread has been marked for termination. Either
1124 // way, we just need to go back into the scheduler loop.
1125 two_words[0] = VG_TRC_BORING;
1126 return;
1127 }
1128
1129 }
1130
1131 vg_assert(found);
1132 vg_assert(hcode != 0);
1133
1134 /* Otherwise run it and return the resulting VG_TRC_* value. */
1135 vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1136 run_thread_for_a_while( two_words, dispatchCtrP, tid,
1137 hcode, True/*use hcode*/ );
1138 }
1139
1140
1141 /*
1142 Run a thread until it wants to exit.
1143
1144 We assume that the caller has already called VG_(acquire_BigLock) for
1145 us, so we own the VCPU. Also, all signals are blocked.
1146 */
VG_(scheduler)1147 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1148 {
1149 /* Holds the remaining size of this thread's "timeslice". */
1150 Int dispatch_ctr = 0;
1151
1152 ThreadState *tst = VG_(get_ThreadState)(tid);
1153 static Bool vgdb_startup_action_done = False;
1154
1155 if (VG_(clo_trace_sched))
1156 print_sched_event(tid, "entering VG_(scheduler)");
1157
1158 /* Do vgdb initialization (but once). Only the first (main) task
1159 starting up will do the below.
1160 Initialize gdbserver earlier than at the first
1161 thread VG_(scheduler) is causing problems:
1162 * at the end of VG_(scheduler_init_phase2) :
1163 The main thread is in VgTs_Init state, but in a not yet
1164 consistent state => the thread cannot be reported to gdb
1165 (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1166 back the guest registers to gdb).
1167 * at end of valgrind_main, just
1168 before VG_(main_thread_wrapper_NORETURN)(1) :
1169 The main thread is still in VgTs_Init state but in a
1170 more advanced state. However, the thread state is not yet
1171 completely initialized : a.o., the os_state is not yet fully
1172 set => the thread is then not properly reported to gdb,
1173 which is then confused (causing e.g. a duplicate thread be
1174 shown, without thread id).
1175 * it would be possible to initialize gdbserver "lower" in the
1176 call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1177 these are platform dependent and the place at which
1178 the thread state is completely initialized is not
1179 specific anymore to the main thread (so a similar "do it only
1180 once" would be needed).
1181
1182 => a "once only" initialization here is the best compromise. */
1183 if (!vgdb_startup_action_done) {
1184 vg_assert(tid == 1); // it must be the main thread.
1185 vgdb_startup_action_done = True;
1186 if (VG_(clo_vgdb) != Vg_VgdbNo) {
1187 /* If we have to poll, ensures we do an initial poll at first
1188 scheduler call. Otherwise, ensure no poll (unless interrupted
1189 by ptrace). */
1190 if (VG_(clo_vgdb_poll))
1191 VG_(force_vgdb_poll) ();
1192 else
1193 VG_(disable_vgdb_poll) ();
1194
1195 vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1196 /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1197 changed yet. */
1198
1199 VG_(gdbserver_prerun_action) (1);
1200 } else {
1201 VG_(disable_vgdb_poll) ();
1202 }
1203 }
1204
1205 /* set the proper running signal mask */
1206 block_signals();
1207
1208 vg_assert(VG_(is_running_thread)(tid));
1209
1210 dispatch_ctr = SCHEDULING_QUANTUM;
1211
1212 while (!VG_(is_exiting)(tid)) {
1213
1214 vg_assert(dispatch_ctr >= 0);
1215 if (dispatch_ctr == 0) {
1216
1217 /* Our slice is done, so yield the CPU to another thread. On
1218 Linux, this doesn't sleep between sleeping and running,
1219 since that would take too much time. */
1220
1221 /* 4 July 06: it seems that a zero-length nsleep is needed to
1222 cause async thread cancellation (canceller.c) to terminate
1223 in finite time; else it is in some kind of race/starvation
1224 situation and completion is arbitrarily delayed (although
1225 this is not a deadlock).
1226
1227 Unfortunately these sleeps cause MPI jobs not to terminate
1228 sometimes (some kind of livelock). So sleeping once
1229 every N opportunities appears to work. */
1230
1231 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1232 sys_yield also helps the problem, whilst not crashing apps. */
1233
1234 VG_(release_BigLock)(tid, VgTs_Yielding,
1235 "VG_(scheduler):timeslice");
1236 /* ------------ now we don't have The Lock ------------ */
1237
1238 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1239 /* ------------ now we do have The Lock ------------ */
1240
1241 /* OK, do some relatively expensive housekeeping stuff */
1242 scheduler_sanity(tid);
1243 VG_(sanity_check_general)(False);
1244
1245 /* Look for any pending signals for this thread, and set them up
1246 for delivery */
1247 VG_(poll_signals)(tid);
1248
1249 if (VG_(is_exiting)(tid))
1250 break; /* poll_signals picked up a fatal signal */
1251
1252 /* For stats purposes only. */
1253 n_scheduling_events_MAJOR++;
1254
1255 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1256 that it decrements the counter before testing it for zero, so
1257 that if tst->dispatch_ctr is set to N you get at most N-1
1258 iterations. Also this means that tst->dispatch_ctr must
1259 exceed zero before entering the innerloop. Also also, the
1260 decrement is done before the bb is actually run, so you
1261 always get at least one decrement even if nothing happens. */
1262 // FIXME is this right?
1263 dispatch_ctr = SCHEDULING_QUANTUM;
1264
1265 /* paranoia ... */
1266 vg_assert(tst->tid == tid);
1267 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1268 }
1269
1270 /* For stats purposes only. */
1271 n_scheduling_events_MINOR++;
1272
1273 if (0)
1274 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1275 tid, dispatch_ctr - 1 );
1276
1277 HWord trc[2]; /* "two_words" */
1278 run_thread_for_a_while( &trc[0],
1279 &dispatch_ctr,
1280 tid, 0/*ignored*/, False );
1281
1282 if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1283 HChar buf[50];
1284 VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc[0]));
1285 print_sched_event(tid, buf);
1286 }
1287
1288 if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1289 /* If we got a request to run a no-redir version of
1290 something, do so now -- handle_noredir_jump just (creates
1291 and) runs that one translation. The flip side is that the
1292 noredir translation can't itself return another noredir
1293 request -- that would be nonsensical. It can, however,
1294 return VG_TRC_BORING, which just means keep going as
1295 normal. */
1296 /* Note that the fact that we need to continue with a
1297 no-redir jump is not recorded anywhere else in this
1298 thread's state. So we *must* execute the block right now
1299 -- we can't fail to execute it and later resume with it,
1300 because by then we'll have forgotten the fact that it
1301 should be run as no-redir, but will get run as a normal
1302 potentially-redir'd, hence screwing up. This really ought
1303 to be cleaned up, by noting in the guest state that the
1304 next block to be executed should be no-redir. Then we can
1305 suspend and resume at any point, which isn't the case at
1306 the moment. */
1307 handle_noredir_jump( &trc[0],
1308 &dispatch_ctr,
1309 tid );
1310 vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1311
1312 /* This can't be allowed to happen, since it means the block
1313 didn't execute, and we have no way to resume-as-noredir
1314 after we get more timeslice. But I don't think it ever
1315 can, since handle_noredir_jump will assert if the counter
1316 is zero on entry. */
1317 vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1318
1319 /* A no-redir translation can't return with a chain-me
1320 request, since chaining in the no-redir cache is too
1321 complex. */
1322 vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1323 && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1324 }
1325
1326 switch (trc[0]) {
1327 case VEX_TRC_JMP_BORING:
1328 /* assisted dispatch, no event. Used by no-redir
1329 translations to force return to the scheduler. */
1330 case VG_TRC_BORING:
1331 /* no special event, just keep going. */
1332 break;
1333
1334 case VG_TRC_INNER_FASTMISS:
1335 vg_assert(dispatch_ctr > 0);
1336 handle_tt_miss(tid);
1337 break;
1338
1339 case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1340 if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1341 handle_chain_me(tid, (void*)trc[1], False);
1342 break;
1343 }
1344
1345 case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1346 if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1347 handle_chain_me(tid, (void*)trc[1], True);
1348 break;
1349 }
1350
1351 case VEX_TRC_JMP_CLIENTREQ:
1352 do_client_request(tid);
1353 break;
1354
1355 case VEX_TRC_JMP_SYS_INT128: /* x86-linux */
1356 case VEX_TRC_JMP_SYS_INT129: /* x86-darwin */
1357 case VEX_TRC_JMP_SYS_INT130: /* x86-darwin */
1358 case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1359 handle_syscall(tid, trc[0]);
1360 if (VG_(clo_sanity_level) > 2)
1361 VG_(sanity_check_general)(True); /* sanity-check every syscall */
1362 break;
1363
1364 case VEX_TRC_JMP_YIELD:
1365 /* Explicit yield, because this thread is in a spin-lock
1366 or something. Only let the thread run for a short while
1367 longer. Because swapping to another thread is expensive,
1368 we're prepared to let this thread eat a little more CPU
1369 before swapping to another. That means that short term
1370 spins waiting for hardware to poke memory won't cause a
1371 thread swap. */
1372 if (dispatch_ctr > 1000)
1373 dispatch_ctr = 1000;
1374 break;
1375
1376 case VG_TRC_INNER_COUNTERZERO:
1377 /* Timeslice is out. Let a new thread be scheduled. */
1378 vg_assert(dispatch_ctr == 0);
1379 break;
1380
1381 case VG_TRC_FAULT_SIGNAL:
1382 /* Everything should be set up (either we're exiting, or
1383 about to start in a signal handler). */
1384 break;
1385
1386 case VEX_TRC_JMP_MAPFAIL:
1387 /* Failure of arch-specific address translation (x86/amd64
1388 segment override use) */
1389 /* jrs 2005 03 11: is this correct? */
1390 VG_(synth_fault)(tid);
1391 break;
1392
1393 case VEX_TRC_JMP_EMWARN: {
1394 static Int counts[EmNote_NUMBER];
1395 static Bool counts_initted = False;
1396 VexEmNote ew;
1397 const HChar* what;
1398 Bool show;
1399 Int q;
1400 if (!counts_initted) {
1401 counts_initted = True;
1402 for (q = 0; q < EmNote_NUMBER; q++)
1403 counts[q] = 0;
1404 }
1405 ew = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1406 what = (ew < 0 || ew >= EmNote_NUMBER)
1407 ? "unknown (?!)"
1408 : LibVEX_EmNote_string(ew);
1409 show = (ew < 0 || ew >= EmNote_NUMBER)
1410 ? True
1411 : counts[ew]++ < 3;
1412 if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1413 VG_(message)( Vg_UserMsg,
1414 "Emulation warning: unsupported action:\n");
1415 VG_(message)( Vg_UserMsg, " %s\n", what);
1416 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1417 }
1418 break;
1419 }
1420
1421 case VEX_TRC_JMP_EMFAIL: {
1422 VexEmNote ew;
1423 const HChar* what;
1424 ew = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1425 what = (ew < 0 || ew >= EmNote_NUMBER)
1426 ? "unknown (?!)"
1427 : LibVEX_EmNote_string(ew);
1428 VG_(message)( Vg_UserMsg,
1429 "Emulation fatal error -- Valgrind cannot continue:\n");
1430 VG_(message)( Vg_UserMsg, " %s\n", what);
1431 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1432 VG_(message)(Vg_UserMsg, "\n");
1433 VG_(message)(Vg_UserMsg, "Valgrind has to exit now. Sorry.\n");
1434 VG_(message)(Vg_UserMsg, "\n");
1435 VG_(exit)(1);
1436 break;
1437 }
1438
1439 case VEX_TRC_JMP_SIGILL:
1440 VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1441 break;
1442
1443 case VEX_TRC_JMP_SIGTRAP:
1444 VG_(synth_sigtrap)(tid);
1445 break;
1446
1447 case VEX_TRC_JMP_SIGSEGV:
1448 VG_(synth_fault)(tid);
1449 break;
1450
1451 case VEX_TRC_JMP_SIGBUS:
1452 VG_(synth_sigbus)(tid);
1453 break;
1454
1455 case VEX_TRC_JMP_SIGFPE_INTDIV:
1456 VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1457 break;
1458
1459 case VEX_TRC_JMP_SIGFPE_INTOVF:
1460 VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1461 break;
1462
1463 case VEX_TRC_JMP_NODECODE: {
1464 Addr addr = VG_(get_IP)(tid);
1465
1466 if (VG_(clo_sigill_diag)) {
1467 VG_(umsg)(
1468 "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1469 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1470 # define M(a) VG_(umsg)(a "\n");
1471 M("Your program just tried to execute an instruction that Valgrind" );
1472 M("did not recognise. There are two possible reasons for this." );
1473 M("1. Your program has a bug and erroneously jumped to a non-code" );
1474 M(" location. If you are running Memcheck and you just saw a" );
1475 M(" warning about a bad jump, it's probably your program's fault.");
1476 M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1477 M(" i.e. it's Valgrind's fault. If you think this is the case or");
1478 M(" you are not sure, please let us know and we'll try to fix it.");
1479 M("Either way, Valgrind will now raise a SIGILL signal which will" );
1480 M("probably kill your program." );
1481 # undef M
1482 }
1483 # if defined(VGA_s390x)
1484 /* Now that the complaint is out we need to adjust the guest_IA. The
1485 reason is that -- after raising the exception -- execution will
1486 continue with the insn that follows the invalid insn. As the first
1487 2 bits of the invalid insn determine its length in the usual way,
1488 we can compute the address of the next insn here and adjust the
1489 guest_IA accordingly. This adjustment is essential and tested by
1490 none/tests/s390x/op_exception.c (which would loop forever
1491 otherwise) */
1492 UChar byte = ((UChar *)addr)[0];
1493 UInt insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1494 Addr next_insn_addr = addr + insn_length;
1495 VG_(set_IP)(tid, next_insn_addr);
1496 # endif
1497 VG_(synth_sigill)(tid, addr);
1498 break;
1499 }
1500
1501 case VEX_TRC_JMP_INVALICACHE:
1502 VG_(discard_translations)(
1503 (Addr64)VG_(threads)[tid].arch.vex.guest_CMSTART,
1504 VG_(threads)[tid].arch.vex.guest_CMLEN,
1505 "scheduler(VEX_TRC_JMP_INVALICACHE)"
1506 );
1507 if (0)
1508 VG_(printf)("dump translations done.\n");
1509 break;
1510
1511 case VEX_TRC_JMP_FLUSHDCACHE: {
1512 void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
1513 SizeT len = VG_(threads)[tid].arch.vex.guest_CMLEN;
1514 VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1515 VG_(flush_dcache)(start, len);
1516 break;
1517 }
1518
1519 case VG_TRC_INVARIANT_FAILED:
1520 /* This typically happens if, after running generated code,
1521 it is detected that host CPU settings (eg, FPU/Vector
1522 control words) are not as they should be. Vex's code
1523 generation specifies the state such control words should
1524 be in on entry to Vex-generated code, and they should be
1525 unchanged on exit from it. Failure of this assertion
1526 usually means a bug in Vex's code generation. */
1527 //{ UInt xx;
1528 // __asm__ __volatile__ (
1529 // "\t.word 0xEEF12A10\n" // fmrx r2,fpscr
1530 // "\tmov %0, r2" : "=r"(xx) : : "r2" );
1531 // VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1532 //}
1533 vg_assert2(0, "VG_(scheduler), phase 3: "
1534 "run_innerloop detected host "
1535 "state invariant failure", trc);
1536
1537 case VEX_TRC_JMP_SYS_SYSENTER:
1538 /* Do whatever simulation is appropriate for an x86 sysenter
1539 instruction. Note that it is critical to set this thread's
1540 guest_EIP to point at the code to execute after the
1541 sysenter, since Vex-generated code will not have set it --
1542 vex does not know what it should be. Vex sets the next
1543 address to zero, so if you don't set guest_EIP, the thread
1544 will jump to zero afterwards and probably die as a result. */
1545 # if defined(VGP_x86_linux)
1546 vg_assert2(0, "VG_(scheduler), phase 3: "
1547 "sysenter_x86 on x86-linux is not supported");
1548 # elif defined(VGP_x86_darwin)
1549 /* return address in client edx */
1550 VG_(threads)[tid].arch.vex.guest_EIP
1551 = VG_(threads)[tid].arch.vex.guest_EDX;
1552 handle_syscall(tid, trc[0]);
1553 # else
1554 vg_assert2(0, "VG_(scheduler), phase 3: "
1555 "sysenter_x86 on non-x86 platform?!?!");
1556 # endif
1557 break;
1558
1559 default:
1560 vg_assert2(0, "VG_(scheduler), phase 3: "
1561 "unexpected thread return code (%u)", trc[0]);
1562 /* NOTREACHED */
1563 break;
1564
1565 } /* switch (trc) */
1566
1567 if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1568 maybe_show_sb_profile();
1569 }
1570
1571 if (VG_(clo_trace_sched))
1572 print_sched_event(tid, "exiting VG_(scheduler)");
1573
1574 vg_assert(VG_(is_exiting)(tid));
1575
1576 return tst->exitreason;
1577 }
1578
1579
1580 /*
1581 This causes all threads to forceably exit. They aren't actually
1582 dead by the time this returns; you need to call
1583 VG_(reap_threads)() to wait for them.
1584 */
VG_(nuke_all_threads_except)1585 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1586 {
1587 ThreadId tid;
1588
1589 vg_assert(VG_(is_running_thread)(me));
1590
1591 for (tid = 1; tid < VG_N_THREADS; tid++) {
1592 if (tid == me
1593 || VG_(threads)[tid].status == VgTs_Empty)
1594 continue;
1595 if (0)
1596 VG_(printf)(
1597 "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1598
1599 VG_(threads)[tid].exitreason = src;
1600 if (src == VgSrc_FatalSig)
1601 VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1602 VG_(get_thread_out_of_syscall)(tid);
1603 }
1604 }
1605
1606
1607 /* ---------------------------------------------------------------------
1608 Specifying shadow register values
1609 ------------------------------------------------------------------ */
1610
1611 #if defined(VGA_x86)
1612 # define VG_CLREQ_ARGS guest_EAX
1613 # define VG_CLREQ_RET guest_EDX
1614 #elif defined(VGA_amd64)
1615 # define VG_CLREQ_ARGS guest_RAX
1616 # define VG_CLREQ_RET guest_RDX
1617 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
1618 # define VG_CLREQ_ARGS guest_GPR4
1619 # define VG_CLREQ_RET guest_GPR3
1620 #elif defined(VGA_arm)
1621 # define VG_CLREQ_ARGS guest_R4
1622 # define VG_CLREQ_RET guest_R3
1623 #elif defined(VGA_arm64)
1624 # define VG_CLREQ_ARGS guest_X4
1625 # define VG_CLREQ_RET guest_X3
1626 #elif defined (VGA_s390x)
1627 # define VG_CLREQ_ARGS guest_r2
1628 # define VG_CLREQ_RET guest_r3
1629 #elif defined(VGA_mips32) || defined(VGA_mips64)
1630 # define VG_CLREQ_ARGS guest_r12
1631 # define VG_CLREQ_RET guest_r11
1632 #else
1633 # error Unknown arch
1634 #endif
1635
1636 #define CLREQ_ARGS(regs) ((regs).vex.VG_CLREQ_ARGS)
1637 #define CLREQ_RET(regs) ((regs).vex.VG_CLREQ_RET)
1638 #define O_CLREQ_RET (offsetof(VexGuestArchState, VG_CLREQ_RET))
1639
1640 // These macros write a value to a client's thread register, and tell the
1641 // tool that it's happened (if necessary).
1642
1643 #define SET_CLREQ_RETVAL(zztid, zzval) \
1644 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1645 VG_TRACK( post_reg_write, \
1646 Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1647 } while (0)
1648
1649 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1650 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1651 VG_TRACK( post_reg_write_clientcall_return, \
1652 zztid, O_CLREQ_RET, sizeof(UWord), f); \
1653 } while (0)
1654
1655
1656 /* ---------------------------------------------------------------------
1657 Handle client requests.
1658 ------------------------------------------------------------------ */
1659
1660 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1661 static Bool os_client_request(ThreadId tid, UWord *args)
1662 {
1663 Bool handled = True;
1664
1665 vg_assert(VG_(is_running_thread)(tid));
1666
1667 switch(args[0]) {
1668 case VG_USERREQ__LIBC_FREERES_DONE:
1669 /* This is equivalent to an exit() syscall, but we don't set the
1670 exitcode (since it might already be set) */
1671 if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1672 VG_(message)(Vg_DebugMsg,
1673 "__libc_freeres() done; really quitting!\n");
1674 VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1675 break;
1676
1677 default:
1678 handled = False;
1679 break;
1680 }
1681
1682 return handled;
1683 }
1684
1685
1686 /* Write out a client message, possibly including a back trace. Return
1687 the number of characters written. In case of XML output, the format
1688 string as well as any arguments it requires will be XML'ified.
1689 I.e. special characters such as the angle brackets will be translated
1690 into proper escape sequences. */
1691 static
print_client_message(ThreadId tid,const HChar * format,va_list * vargsp,Bool include_backtrace)1692 Int print_client_message( ThreadId tid, const HChar *format,
1693 va_list *vargsp, Bool include_backtrace)
1694 {
1695 Int count;
1696
1697 if (VG_(clo_xml)) {
1698 /* Translate the format string as follows:
1699 < --> <
1700 > --> >
1701 & --> &
1702 %s --> %pS
1703 Yes, yes, it's simplified but in synch with
1704 myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1705 */
1706
1707 /* Allocate a buffer that is for sure large enough. */
1708 HChar xml_format[VG_(strlen)(format) * 5 + 1];
1709
1710 const HChar *p;
1711 HChar *q = xml_format;
1712
1713 for (p = format; *p; ++p) {
1714 switch (*p) {
1715 case '<': VG_(strcpy)(q, "<"); q += 4; break;
1716 case '>': VG_(strcpy)(q, ">"); q += 4; break;
1717 case '&': VG_(strcpy)(q, "&"); q += 5; break;
1718 case '%':
1719 /* Careful: make sure %%s stays %%s */
1720 *q++ = *p++;
1721 if (*p == 's') {
1722 *q++ = 'p';
1723 *q++ = 'S';
1724 } else {
1725 *q++ = *p;
1726 }
1727 break;
1728
1729 default:
1730 *q++ = *p;
1731 break;
1732 }
1733 }
1734 *q = '\0';
1735
1736 VG_(printf_xml)( "<clientmsg>\n" );
1737 VG_(printf_xml)( " <tid>%d</tid>\n", tid );
1738 VG_(printf_xml)( " <text>" );
1739 count = VG_(vprintf_xml)( xml_format, *vargsp );
1740 VG_(printf_xml)( " </text>\n" );
1741 } else {
1742 count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1743 VG_(message_flush)();
1744 }
1745
1746 if (include_backtrace)
1747 VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1748
1749 if (VG_(clo_xml))
1750 VG_(printf_xml)( "</clientmsg>\n" );
1751
1752 return count;
1753 }
1754
1755
1756 /* Do a client request for the thread tid. After the request, tid may
1757 or may not still be runnable; if not, the scheduler will have to
1758 choose a new thread to run.
1759 */
1760 static
do_client_request(ThreadId tid)1761 void do_client_request ( ThreadId tid )
1762 {
1763 UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1764 UWord req_no = arg[0];
1765
1766 if (0)
1767 VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1768 switch (req_no) {
1769
1770 case VG_USERREQ__CLIENT_CALL0: {
1771 UWord (*f)(ThreadId) = (void*)arg[1];
1772 if (f == NULL)
1773 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1774 else
1775 SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1776 break;
1777 }
1778 case VG_USERREQ__CLIENT_CALL1: {
1779 UWord (*f)(ThreadId, UWord) = (void*)arg[1];
1780 if (f == NULL)
1781 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1782 else
1783 SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1784 break;
1785 }
1786 case VG_USERREQ__CLIENT_CALL2: {
1787 UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
1788 if (f == NULL)
1789 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1790 else
1791 SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1792 break;
1793 }
1794 case VG_USERREQ__CLIENT_CALL3: {
1795 UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
1796 if (f == NULL)
1797 VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1798 else
1799 SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1800 break;
1801 }
1802
1803 // Nb: this looks like a circular definition, because it kind of is.
1804 // See comment in valgrind.h to understand what's going on.
1805 case VG_USERREQ__RUNNING_ON_VALGRIND:
1806 SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1807 break;
1808
1809 case VG_USERREQ__PRINTF: {
1810 const HChar* format = (HChar *)arg[1];
1811 /* JRS 2010-Jan-28: this is DEPRECATED; use the
1812 _VALIST_BY_REF version instead */
1813 if (sizeof(va_list) != sizeof(UWord))
1814 goto va_list_casting_error_NORETURN;
1815 union {
1816 va_list vargs;
1817 unsigned long uw;
1818 } u;
1819 u.uw = (unsigned long)arg[2];
1820 Int count =
1821 print_client_message( tid, format, &u.vargs,
1822 /* include_backtrace */ False );
1823 SET_CLREQ_RETVAL( tid, count );
1824 break;
1825 }
1826
1827 case VG_USERREQ__PRINTF_BACKTRACE: {
1828 const HChar* format = (HChar *)arg[1];
1829 /* JRS 2010-Jan-28: this is DEPRECATED; use the
1830 _VALIST_BY_REF version instead */
1831 if (sizeof(va_list) != sizeof(UWord))
1832 goto va_list_casting_error_NORETURN;
1833 union {
1834 va_list vargs;
1835 unsigned long uw;
1836 } u;
1837 u.uw = (unsigned long)arg[2];
1838 Int count =
1839 print_client_message( tid, format, &u.vargs,
1840 /* include_backtrace */ True );
1841 SET_CLREQ_RETVAL( tid, count );
1842 break;
1843 }
1844
1845 case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1846 const HChar* format = (HChar *)arg[1];
1847 va_list* vargsp = (va_list*)arg[2];
1848 Int count =
1849 print_client_message( tid, format, vargsp,
1850 /* include_backtrace */ False );
1851
1852 SET_CLREQ_RETVAL( tid, count );
1853 break;
1854 }
1855
1856 case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1857 const HChar* format = (HChar *)arg[1];
1858 va_list* vargsp = (va_list*)arg[2];
1859 Int count =
1860 print_client_message( tid, format, vargsp,
1861 /* include_backtrace */ True );
1862 SET_CLREQ_RETVAL( tid, count );
1863 break;
1864 }
1865
1866 case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1867 va_list* vargsp = (va_list*)arg[2];
1868 Int count =
1869 VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
1870 VG_(message_flush)();
1871 SET_CLREQ_RETVAL( tid, count );
1872 break;
1873 }
1874
1875 case VG_USERREQ__ADD_IFUNC_TARGET: {
1876 VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1877 SET_CLREQ_RETVAL( tid, 0);
1878 break; }
1879
1880 case VG_USERREQ__STACK_REGISTER: {
1881 UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1882 SET_CLREQ_RETVAL( tid, sid );
1883 break; }
1884
1885 case VG_USERREQ__STACK_DEREGISTER: {
1886 VG_(deregister_stack)(arg[1]);
1887 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1888 break; }
1889
1890 case VG_USERREQ__STACK_CHANGE: {
1891 VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1892 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1893 break; }
1894
1895 case VG_USERREQ__GET_MALLOCFUNCS: {
1896 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1897
1898 info->tl_malloc = VG_(tdict).tool_malloc;
1899 info->tl_calloc = VG_(tdict).tool_calloc;
1900 info->tl_realloc = VG_(tdict).tool_realloc;
1901 info->tl_memalign = VG_(tdict).tool_memalign;
1902 info->tl___builtin_new = VG_(tdict).tool___builtin_new;
1903 info->tl___builtin_vec_new = VG_(tdict).tool___builtin_vec_new;
1904 info->tl_free = VG_(tdict).tool_free;
1905 info->tl___builtin_delete = VG_(tdict).tool___builtin_delete;
1906 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1907 info->tl_malloc_usable_size = VG_(tdict).tool_malloc_usable_size;
1908
1909 info->mallinfo = VG_(mallinfo);
1910 info->clo_trace_malloc = VG_(clo_trace_malloc);
1911
1912 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1913
1914 break;
1915 }
1916
1917 /* Requests from the client program */
1918
1919 case VG_USERREQ__DISCARD_TRANSLATIONS:
1920 if (VG_(clo_verbosity) > 2)
1921 VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1922 " addr %p, len %lu\n",
1923 (void*)arg[1], arg[2] );
1924
1925 VG_(discard_translations)(
1926 arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1927 );
1928
1929 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1930 break;
1931
1932 case VG_USERREQ__COUNT_ERRORS:
1933 SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1934 break;
1935
1936 case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1937 VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1938 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1939 break;
1940
1941 case VG_USERREQ__MAP_IP_TO_SRCLOC: {
1942 Addr ip = arg[1];
1943 HChar* buf64 = (HChar*)arg[2];
1944
1945 VG_(memset)(buf64, 0, 64);
1946 UInt linenum = 0;
1947 Bool ok = VG_(get_filename_linenum)(
1948 ip, &buf64[0], 50, NULL, 0, NULL, &linenum
1949 );
1950 if (ok) {
1951 /* Find the terminating zero in the first 50 bytes. */
1952 UInt i;
1953 for (i = 0; i < 50; i++) {
1954 if (buf64[i] == 0)
1955 break;
1956 }
1957 /* We must find a zero somewhere in 0 .. 49. Else
1958 VG_(get_filename_linenum) is not properly zero
1959 terminating. */
1960 vg_assert(i < 50);
1961 VG_(sprintf)(&buf64[i], ":%u", linenum);
1962 } else {
1963 buf64[0] = 0;
1964 }
1965
1966 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1967 break;
1968 }
1969
1970 case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
1971 Word delta = arg[1];
1972 vg_assert(delta == 1 || delta == -1);
1973 ThreadState* tst = VG_(get_ThreadState)(tid);
1974 vg_assert(tst);
1975 if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
1976 tst->err_disablement_level++;
1977 }
1978 else
1979 if (delta == -1 && tst->err_disablement_level > 0) {
1980 tst->err_disablement_level--;
1981 }
1982 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1983 break;
1984 }
1985
1986 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1987 UWord ret;
1988 ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
1989 SET_CLREQ_RETVAL(tid, ret);
1990 break;
1991 }
1992
1993 case VG_USERREQ__MALLOCLIKE_BLOCK:
1994 case VG_USERREQ__RESIZEINPLACE_BLOCK:
1995 case VG_USERREQ__FREELIKE_BLOCK:
1996 // Ignore them if the addr is NULL; otherwise pass onto the tool.
1997 if (!arg[1]) {
1998 SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1999 break;
2000 } else {
2001 goto my_default;
2002 }
2003
2004 case VG_USERREQ__VEX_INIT_FOR_IRI:
2005 LibVEX_InitIRI ( (IRICB *)arg[1] );
2006 break;
2007
2008 default:
2009 my_default:
2010 if (os_client_request(tid, arg)) {
2011 // do nothing, os_client_request() handled it
2012 } else if (VG_(needs).client_requests) {
2013 UWord ret;
2014
2015 if (VG_(clo_verbosity) > 2)
2016 VG_(printf)("client request: code %lx, addr %p, len %lu\n",
2017 arg[0], (void*)arg[1], arg[2] );
2018
2019 if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2020 SET_CLREQ_RETVAL(tid, ret);
2021 } else {
2022 static Bool whined = False;
2023
2024 if (!whined && VG_(clo_verbosity) > 2) {
2025 // Allow for requests in core, but defined by tools, which
2026 // have 0 and 0 in their two high bytes.
2027 HChar c1 = (arg[0] >> 24) & 0xff;
2028 HChar c2 = (arg[0] >> 16) & 0xff;
2029 if (c1 == 0) c1 = '_';
2030 if (c2 == 0) c2 = '_';
2031 VG_(message)(Vg_UserMsg, "Warning:\n"
2032 " unhandled client request: 0x%lx (%c%c+0x%lx). Perhaps\n"
2033 " VG_(needs).client_requests should be set?\n",
2034 arg[0], c1, c2, arg[0] & 0xffff);
2035 whined = True;
2036 }
2037 }
2038 break;
2039 }
2040 return;
2041
2042 /*NOTREACHED*/
2043 va_list_casting_error_NORETURN:
2044 VG_(umsg)(
2045 "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2046 "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2047 "on a platform where they cannot be supported. Please use the\n"
2048 "equivalent _VALIST_BY_REF versions instead.\n"
2049 "\n"
2050 "This is a binary-incompatible change in Valgrind's client request\n"
2051 "mechanism. It is unfortunate, but difficult to avoid. End-users\n"
2052 "are expected to almost never see this message. The only case in\n"
2053 "which you might see this message is if your code uses the macros\n"
2054 "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE. If so, you will need\n"
2055 "to recompile such code, using the header files from this version of\n"
2056 "Valgrind, and not any previous version.\n"
2057 "\n"
2058 "If you see this mesage in any other circumstances, it is probably\n"
2059 "a bug in Valgrind. In this case, please file a bug report at\n"
2060 "\n"
2061 " http://www.valgrind.org/support/bug_reports.html\n"
2062 "\n"
2063 "Will now abort.\n"
2064 );
2065 vg_assert(0);
2066 }
2067
2068
2069 /* ---------------------------------------------------------------------
2070 Sanity checking (permanently engaged)
2071 ------------------------------------------------------------------ */
2072
2073 /* Internal consistency checks on the sched structures. */
2074 static
scheduler_sanity(ThreadId tid)2075 void scheduler_sanity ( ThreadId tid )
2076 {
2077 Bool bad = False;
2078 Int lwpid = VG_(gettid)();
2079
2080 if (!VG_(is_running_thread)(tid)) {
2081 VG_(message)(Vg_DebugMsg,
2082 "Thread %d is supposed to be running, "
2083 "but doesn't own the_BigLock (owned by %d)\n",
2084 tid, VG_(running_tid));
2085 bad = True;
2086 }
2087
2088 if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2089 VG_(message)(Vg_DebugMsg,
2090 "Thread %d supposed to be in LWP %d, but we're actually %d\n",
2091 tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2092 bad = True;
2093 }
2094
2095 if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2096 VG_(message)(Vg_DebugMsg,
2097 "Thread (LWPID) %d doesn't own the_BigLock\n",
2098 tid);
2099 bad = True;
2100 }
2101
2102 if (0) {
2103 /* Periodically show the state of all threads, for debugging
2104 purposes. */
2105 static UInt lasttime = 0;
2106 UInt now;
2107 now = VG_(read_millisecond_timer)();
2108 if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2109 lasttime = now;
2110 VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2111 (Int)now);
2112 VG_(show_sched_status)(True, // host_stacktrace
2113 True, // valgrind_stack_usage
2114 True); // exited_threads);
2115 }
2116 }
2117
2118 /* core_panic also shows the sched status, which is why we don't
2119 show it above if bad==True. */
2120 if (bad)
2121 VG_(core_panic)("scheduler_sanity: failed");
2122 }
2123
VG_(sanity_check_general)2124 void VG_(sanity_check_general) ( Bool force_expensive )
2125 {
2126 ThreadId tid;
2127
2128 static UInt next_slow_check_at = 1;
2129 static UInt slow_check_interval = 25;
2130
2131 if (VG_(clo_sanity_level) < 1) return;
2132
2133 /* --- First do all the tests that we can do quickly. ---*/
2134
2135 sanity_fast_count++;
2136
2137 /* Check stuff pertaining to the memory check system. */
2138
2139 /* Check that nobody has spuriously claimed that the first or
2140 last 16 pages of memory have become accessible [...] */
2141 if (VG_(needs).sanity_checks) {
2142 vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2143 }
2144
2145 /* --- Now some more expensive checks. ---*/
2146
2147 /* Once every now and again, check some more expensive stuff.
2148 Gradually increase the interval between such checks so as not to
2149 burden long-running programs too much. */
2150 if ( force_expensive
2151 || VG_(clo_sanity_level) > 1
2152 || (VG_(clo_sanity_level) == 1
2153 && sanity_fast_count == next_slow_check_at)) {
2154
2155 if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
2156
2157 next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2158 slow_check_interval++;
2159 sanity_slow_count++;
2160
2161 if (VG_(needs).sanity_checks) {
2162 vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2163 }
2164
2165 /* Look for stack overruns. Visit all threads. */
2166 for (tid = 1; tid < VG_N_THREADS; tid++) {
2167 SizeT remains;
2168 VgStack* stack;
2169
2170 if (VG_(threads)[tid].status == VgTs_Empty ||
2171 VG_(threads)[tid].status == VgTs_Zombie)
2172 continue;
2173
2174 stack
2175 = (VgStack*)
2176 VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2177 SizeT limit
2178 = 4096; // Let's say. Checking more causes lots of L2 misses.
2179 remains
2180 = VG_(am_get_VgStack_unused_szB)(stack, limit);
2181 if (remains < limit)
2182 VG_(message)(Vg_DebugMsg,
2183 "WARNING: Thread %d is within %ld bytes "
2184 "of running out of stack!\n",
2185 tid, remains);
2186 }
2187 }
2188
2189 if (VG_(clo_sanity_level) > 1) {
2190 /* Check sanity of the low-level memory manager. Note that bugs
2191 in the client's code can cause this to fail, so we don't do
2192 this check unless specially asked for. And because it's
2193 potentially very expensive. */
2194 VG_(sanity_check_malloc_all)();
2195 }
2196 }
2197
2198 /*--------------------------------------------------------------------*/
2199 /*--- end ---*/
2200 /*--------------------------------------------------------------------*/
2201