1
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
4 /*--- accessibility (A) and validity (V) status of each byte. ---*/
5 /*--- mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of MemCheck, a heavyweight Valgrind tool for
10 detecting memory errors.
11
12 Copyright (C) 2000-2013 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h" // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_rangemap.h"
46 #include "pub_tool_replacemalloc.h"
47 #include "pub_tool_tooliface.h"
48 #include "pub_tool_threadstate.h"
49
50 #include "mc_include.h"
51 #include "memcheck.h" /* for client requests */
52
53
54 /* Set to 1 to do a little more sanity checking */
55 #define VG_DEBUG_MEMORY 0
56
57 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
58
59 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
60 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
61
62
63 /*------------------------------------------------------------*/
64 /*--- Fast-case knobs ---*/
65 /*------------------------------------------------------------*/
66
67 // Comment these out to disable the fast cases (don't just set them to zero).
68
69 #define PERF_FAST_LOADV 1
70 #define PERF_FAST_STOREV 1
71
72 #define PERF_FAST_SARP 1
73
74 #define PERF_FAST_STACK 1
75 #define PERF_FAST_STACK2 1
76
77 /* Change this to 1 to enable assertions on origin tracking cache fast
78 paths */
79 #define OC_ENABLE_ASSERTIONS 0
80
81
82 /*------------------------------------------------------------*/
83 /*--- Comments on the origin tracking implementation ---*/
84 /*------------------------------------------------------------*/
85
86 /* See detailed comment entitled
87 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
88 which is contained further on in this file. */
89
90
91 /*------------------------------------------------------------*/
92 /*--- V bits and A bits ---*/
93 /*------------------------------------------------------------*/
94
95 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
96 thinks the corresponding value bit is defined. And every memory byte
97 has an A bit, which tracks whether Memcheck thinks the program can access
98 it safely (ie. it's mapped, and has at least one of the RWX permission bits
99 set). So every N-bit register is shadowed with N V bits, and every memory
100 byte is shadowed with 8 V bits and one A bit.
101
102 In the implementation, we use two forms of compression (compressed V bits
103 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
104 for memory.
105
106 Memcheck also tracks extra information about each heap block that is
107 allocated, for detecting memory leaks and other purposes.
108 */
109
110 /*------------------------------------------------------------*/
111 /*--- Basic A/V bitmap representation. ---*/
112 /*------------------------------------------------------------*/
113
114 /* All reads and writes are checked against a memory map (a.k.a. shadow
115 memory), which records the state of all memory in the process.
116
117 On 32-bit machines the memory map is organised as follows.
118 The top 16 bits of an address are used to index into a top-level
119 map table, containing 65536 entries. Each entry is a pointer to a
120 second-level map, which records the accesibililty and validity
121 permissions for the 65536 bytes indexed by the lower 16 bits of the
122 address. Each byte is represented by two bits (details are below). So
123 each second-level map contains 16384 bytes. This two-level arrangement
124 conveniently divides the 4G address space into 64k lumps, each size 64k
125 bytes.
126
127 All entries in the primary (top-level) map must point to a valid
128 secondary (second-level) map. Since many of the 64kB chunks will
129 have the same status for every bit -- ie. noaccess (for unused
130 address space) or entirely addressable and defined (for code segments) --
131 there are three distinguished secondary maps, which indicate 'noaccess',
132 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
133 map entry points to the relevant distinguished map. In practice,
134 typically more than half of the addressable memory is represented with
135 the 'undefined' or 'defined' distinguished secondary map, so it gives a
136 good saving. It also lets us set the V+A bits of large address regions
137 quickly in set_address_range_perms().
138
139 On 64-bit machines it's more complicated. If we followed the same basic
140 scheme we'd have a four-level table which would require too many memory
141 accesses. So instead the top-level map table has 2^20 entries (indexed
142 using bits 16..35 of the address); this covers the bottom 64GB. Any
143 accesses above 64GB are handled with a slow, sparse auxiliary table.
144 Valgrind's address space manager tries very hard to keep things below
145 this 64GB barrier so that performance doesn't suffer too much.
146
147 Note that this file has a lot of different functions for reading and
148 writing shadow memory. Only a couple are strictly necessary (eg.
149 get_vabits2 and set_vabits2), most are just specialised for specific
150 common cases to improve performance.
151
152 Aside: the V+A bits are less precise than they could be -- we have no way
153 of marking memory as read-only. It would be great if we could add an
154 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
155 which requires 2.3 bits to hold, and there's no way to do that elegantly
156 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
157 seem worth it.
158 */
159
160 /* --------------- Basic configuration --------------- */
161
162 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
163
164 #if VG_WORDSIZE == 4
165
166 /* cover the entire address space */
167 # define N_PRIMARY_BITS 16
168
169 #else
170
171 /* Just handle the first 64G fast and the rest via auxiliary
172 primaries. If you change this, Memcheck will assert at startup.
173 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
174 # define N_PRIMARY_BITS 20
175
176 #endif
177
178
179 /* Do not change this. */
180 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
181
182 /* Do not change this. */
183 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
184
185
186 /* --------------- Secondary maps --------------- */
187
188 // Each byte of memory conceptually has an A bit, which indicates its
189 // addressability, and 8 V bits, which indicates its definedness.
190 //
191 // But because very few bytes are partially defined, we can use a nice
192 // compression scheme to reduce the size of shadow memory. Each byte of
193 // memory has 2 bits which indicates its state (ie. V+A bits):
194 //
195 // 00: noaccess (unaddressable but treated as fully defined)
196 // 01: undefined (addressable and fully undefined)
197 // 10: defined (addressable and fully defined)
198 // 11: partdefined (addressable and partially defined)
199 //
200 // In the "partdefined" case, we use a secondary table to store the V bits.
201 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
202 // bits.
203 //
204 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
205 // four bytes (32 bits) of memory are in each chunk. Hence the name
206 // "vabits8". This lets us get the V+A bits for four bytes at a time
207 // easily (without having to do any shifting and/or masking), and that is a
208 // very common operation. (Note that although each vabits8 chunk
209 // is 8 bits in size, it represents 32 bits of memory.)
210 //
211 // The representation is "inverse" little-endian... each 4 bytes of
212 // memory is represented by a 1 byte value, where:
213 //
214 // - the status of byte (a+0) is held in bits [1..0]
215 // - the status of byte (a+1) is held in bits [3..2]
216 // - the status of byte (a+2) is held in bits [5..4]
217 // - the status of byte (a+3) is held in bits [7..6]
218 //
219 // It's "inverse" because endianness normally describes a mapping from
220 // value bits to memory addresses; in this case the mapping is inverted.
221 // Ie. instead of particular value bits being held in certain addresses, in
222 // this case certain addresses are represented by particular value bits.
223 // See insert_vabits2_into_vabits8() for an example.
224 //
225 // But note that we don't compress the V bits stored in registers; they
226 // need to be explicit to made the shadow operations possible. Therefore
227 // when moving values between registers and memory we need to convert
228 // between the expanded in-register format and the compressed in-memory
229 // format. This isn't so difficult, it just requires careful attention in a
230 // few places.
231
232 // These represent eight bits of memory.
233 #define VA_BITS2_NOACCESS 0x0 // 00b
234 #define VA_BITS2_UNDEFINED 0x1 // 01b
235 #define VA_BITS2_DEFINED 0x2 // 10b
236 #define VA_BITS2_PARTDEFINED 0x3 // 11b
237
238 // These represent 16 bits of memory.
239 #define VA_BITS4_NOACCESS 0x0 // 00_00b
240 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
241 #define VA_BITS4_DEFINED 0xa // 10_10b
242
243 // These represent 32 bits of memory.
244 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
245 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
246 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
247
248 // These represent 64 bits of memory.
249 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
250 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
251 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
252
253
254 #define SM_CHUNKS 16384
255 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
256 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
257
258 // Paranoia: it's critical for performance that the requested inlining
259 // occurs. So try extra hard.
260 #define INLINE inline __attribute__((always_inline))
261
start_of_this_sm(Addr a)262 static INLINE Addr start_of_this_sm ( Addr a ) {
263 return (a & (~SM_MASK));
264 }
is_start_of_sm(Addr a)265 static INLINE Bool is_start_of_sm ( Addr a ) {
266 return (start_of_this_sm(a) == a);
267 }
268
269 typedef
270 struct {
271 UChar vabits8[SM_CHUNKS];
272 }
273 SecMap;
274
275 // 3 distinguished secondary maps, one for no-access, one for
276 // accessible but undefined, and one for accessible and defined.
277 // Distinguished secondaries may never be modified.
278 #define SM_DIST_NOACCESS 0
279 #define SM_DIST_UNDEFINED 1
280 #define SM_DIST_DEFINED 2
281
282 static SecMap sm_distinguished[3];
283
is_distinguished_sm(SecMap * sm)284 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
285 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
286 }
287
288 // Forward declaration
289 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
290
291 /* dist_sm points to one of our three distinguished secondaries. Make
292 a copy of it so that we can write to it.
293 */
copy_for_writing(SecMap * dist_sm)294 static SecMap* copy_for_writing ( SecMap* dist_sm )
295 {
296 SecMap* new_sm;
297 tl_assert(dist_sm == &sm_distinguished[0]
298 || dist_sm == &sm_distinguished[1]
299 || dist_sm == &sm_distinguished[2]);
300
301 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
302 if (new_sm == NULL)
303 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
304 sizeof(SecMap) );
305 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
306 update_SM_counts(dist_sm, new_sm);
307 return new_sm;
308 }
309
310 /* --------------- Stats --------------- */
311
312 static Int n_issued_SMs = 0;
313 static Int n_deissued_SMs = 0;
314 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
315 static Int n_undefined_SMs = 0;
316 static Int n_defined_SMs = 0;
317 static Int n_non_DSM_SMs = 0;
318 static Int max_noaccess_SMs = 0;
319 static Int max_undefined_SMs = 0;
320 static Int max_defined_SMs = 0;
321 static Int max_non_DSM_SMs = 0;
322
323 /* # searches initiated in auxmap_L1, and # base cmps required */
324 static ULong n_auxmap_L1_searches = 0;
325 static ULong n_auxmap_L1_cmps = 0;
326 /* # of searches that missed in auxmap_L1 and therefore had to
327 be handed to auxmap_L2. And the number of nodes inserted. */
328 static ULong n_auxmap_L2_searches = 0;
329 static ULong n_auxmap_L2_nodes = 0;
330
331 static Int n_sanity_cheap = 0;
332 static Int n_sanity_expensive = 0;
333
334 static Int n_secVBit_nodes = 0;
335 static Int max_secVBit_nodes = 0;
336
update_SM_counts(SecMap * oldSM,SecMap * newSM)337 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
338 {
339 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
340 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
341 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
342 else { n_non_DSM_SMs --;
343 n_deissued_SMs ++; }
344
345 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
346 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
347 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
348 else { n_non_DSM_SMs ++;
349 n_issued_SMs ++; }
350
351 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
352 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
353 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
354 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
355 }
356
357 /* --------------- Primary maps --------------- */
358
359 /* The main primary map. This covers some initial part of the address
360 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
361 handled using the auxiliary primary map.
362 */
363 static SecMap* primary_map[N_PRIMARY_MAP];
364
365
366 /* An entry in the auxiliary primary map. base must be a 64k-aligned
367 value, and sm points at the relevant secondary map. As with the
368 main primary map, the secondary may be either a real secondary, or
369 one of the three distinguished secondaries. DO NOT CHANGE THIS
370 LAYOUT: the first word has to be the key for OSet fast lookups.
371 */
372 typedef
373 struct {
374 Addr base;
375 SecMap* sm;
376 }
377 AuxMapEnt;
378
379 /* Tunable parameter: How big is the L1 queue? */
380 #define N_AUXMAP_L1 24
381
382 /* Tunable parameter: How far along the L1 queue to insert
383 entries resulting from L2 lookups? */
384 #define AUXMAP_L1_INSERT_IX 12
385
386 static struct {
387 Addr base;
388 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
389 }
390 auxmap_L1[N_AUXMAP_L1];
391
392 static OSet* auxmap_L2 = NULL;
393
init_auxmap_L1_L2(void)394 static void init_auxmap_L1_L2 ( void )
395 {
396 Int i;
397 for (i = 0; i < N_AUXMAP_L1; i++) {
398 auxmap_L1[i].base = 0;
399 auxmap_L1[i].ent = NULL;
400 }
401
402 tl_assert(0 == offsetof(AuxMapEnt,base));
403 tl_assert(sizeof(Addr) == sizeof(void*));
404 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
405 /*fastCmp*/ NULL,
406 VG_(malloc), "mc.iaLL.1", VG_(free) );
407 }
408
409 /* Check representation invariants; if OK return NULL; else a
410 descriptive bit of text. Also return the number of
411 non-distinguished secondary maps referred to from the auxiliary
412 primary maps. */
413
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)414 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
415 {
416 Word i, j;
417 /* On a 32-bit platform, the L2 and L1 tables should
418 both remain empty forever.
419
420 On a 64-bit platform:
421 In the L2 table:
422 all .base & 0xFFFF == 0
423 all .base > MAX_PRIMARY_ADDRESS
424 In the L1 table:
425 all .base & 0xFFFF == 0
426 all (.base > MAX_PRIMARY_ADDRESS
427 .base & 0xFFFF == 0
428 and .ent points to an AuxMapEnt with the same .base)
429 or
430 (.base == 0 and .ent == NULL)
431 */
432 *n_secmaps_found = 0;
433 if (sizeof(void*) == 4) {
434 /* 32-bit platform */
435 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
436 return "32-bit: auxmap_L2 is non-empty";
437 for (i = 0; i < N_AUXMAP_L1; i++)
438 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
439 return "32-bit: auxmap_L1 is non-empty";
440 } else {
441 /* 64-bit platform */
442 UWord elems_seen = 0;
443 AuxMapEnt *elem, *res;
444 AuxMapEnt key;
445 /* L2 table */
446 VG_(OSetGen_ResetIter)(auxmap_L2);
447 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
448 elems_seen++;
449 if (0 != (elem->base & (Addr)0xFFFF))
450 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
451 if (elem->base <= MAX_PRIMARY_ADDRESS)
452 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
453 if (elem->sm == NULL)
454 return "64-bit: .sm in _L2 is NULL";
455 if (!is_distinguished_sm(elem->sm))
456 (*n_secmaps_found)++;
457 }
458 if (elems_seen != n_auxmap_L2_nodes)
459 return "64-bit: disagreement on number of elems in _L2";
460 /* Check L1-L2 correspondence */
461 for (i = 0; i < N_AUXMAP_L1; i++) {
462 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
463 continue;
464 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
465 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
466 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
467 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
468 if (auxmap_L1[i].ent == NULL)
469 return "64-bit: .ent is NULL in auxmap_L1";
470 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
471 return "64-bit: _L1 and _L2 bases are inconsistent";
472 /* Look it up in auxmap_L2. */
473 key.base = auxmap_L1[i].base;
474 key.sm = 0;
475 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
476 if (res == NULL)
477 return "64-bit: _L1 .base not found in _L2";
478 if (res != auxmap_L1[i].ent)
479 return "64-bit: _L1 .ent disagrees with _L2 entry";
480 }
481 /* Check L1 contains no duplicates */
482 for (i = 0; i < N_AUXMAP_L1; i++) {
483 if (auxmap_L1[i].base == 0)
484 continue;
485 for (j = i+1; j < N_AUXMAP_L1; j++) {
486 if (auxmap_L1[j].base == 0)
487 continue;
488 if (auxmap_L1[j].base == auxmap_L1[i].base)
489 return "64-bit: duplicate _L1 .base entries";
490 }
491 }
492 }
493 return NULL; /* ok */
494 }
495
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)496 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
497 {
498 Word i;
499 tl_assert(ent);
500 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
501 for (i = N_AUXMAP_L1-1; i > rank; i--)
502 auxmap_L1[i] = auxmap_L1[i-1];
503 auxmap_L1[rank].base = ent->base;
504 auxmap_L1[rank].ent = ent;
505 }
506
maybe_find_in_auxmap(Addr a)507 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
508 {
509 AuxMapEnt key;
510 AuxMapEnt* res;
511 Word i;
512
513 tl_assert(a > MAX_PRIMARY_ADDRESS);
514 a &= ~(Addr)0xFFFF;
515
516 /* First search the front-cache, which is a self-organising
517 list containing the most popular entries. */
518
519 if (LIKELY(auxmap_L1[0].base == a))
520 return auxmap_L1[0].ent;
521 if (LIKELY(auxmap_L1[1].base == a)) {
522 Addr t_base = auxmap_L1[0].base;
523 AuxMapEnt* t_ent = auxmap_L1[0].ent;
524 auxmap_L1[0].base = auxmap_L1[1].base;
525 auxmap_L1[0].ent = auxmap_L1[1].ent;
526 auxmap_L1[1].base = t_base;
527 auxmap_L1[1].ent = t_ent;
528 return auxmap_L1[0].ent;
529 }
530
531 n_auxmap_L1_searches++;
532
533 for (i = 0; i < N_AUXMAP_L1; i++) {
534 if (auxmap_L1[i].base == a) {
535 break;
536 }
537 }
538 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
539
540 n_auxmap_L1_cmps += (ULong)(i+1);
541
542 if (i < N_AUXMAP_L1) {
543 if (i > 0) {
544 Addr t_base = auxmap_L1[i-1].base;
545 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
546 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
547 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
548 auxmap_L1[i-0].base = t_base;
549 auxmap_L1[i-0].ent = t_ent;
550 i--;
551 }
552 return auxmap_L1[i].ent;
553 }
554
555 n_auxmap_L2_searches++;
556
557 /* First see if we already have it. */
558 key.base = a;
559 key.sm = 0;
560
561 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
562 if (res)
563 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
564 return res;
565 }
566
find_or_alloc_in_auxmap(Addr a)567 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
568 {
569 AuxMapEnt *nyu, *res;
570
571 /* First see if we already have it. */
572 res = maybe_find_in_auxmap( a );
573 if (LIKELY(res))
574 return res;
575
576 /* Ok, there's no entry in the secondary map, so we'll have
577 to allocate one. */
578 a &= ~(Addr)0xFFFF;
579
580 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
581 nyu->base = a;
582 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
583 VG_(OSetGen_Insert)( auxmap_L2, nyu );
584 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585 n_auxmap_L2_nodes++;
586 return nyu;
587 }
588
589 /* --------------- SecMap fundamentals --------------- */
590
591 // In all these, 'low' means it's definitely in the main primary map,
592 // 'high' means it's definitely in the auxiliary table.
593
get_secmap_low_ptr(Addr a)594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595 {
596 UWord pm_off = a >> 16;
597 # if VG_DEBUG_MEMORY >= 1
598 tl_assert(pm_off < N_PRIMARY_MAP);
599 # endif
600 return &primary_map[ pm_off ];
601 }
602
get_secmap_high_ptr(Addr a)603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604 {
605 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606 return &am->sm;
607 }
608
get_secmap_ptr(Addr a)609 static INLINE SecMap** get_secmap_ptr ( Addr a )
610 {
611 return ( a <= MAX_PRIMARY_ADDRESS
612 ? get_secmap_low_ptr(a)
613 : get_secmap_high_ptr(a));
614 }
615
get_secmap_for_reading_low(Addr a)616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617 {
618 return *get_secmap_low_ptr(a);
619 }
620
get_secmap_for_reading_high(Addr a)621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622 {
623 return *get_secmap_high_ptr(a);
624 }
625
get_secmap_for_writing_low(Addr a)626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627 {
628 SecMap** p = get_secmap_low_ptr(a);
629 if (UNLIKELY(is_distinguished_sm(*p)))
630 *p = copy_for_writing(*p);
631 return *p;
632 }
633
get_secmap_for_writing_high(Addr a)634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635 {
636 SecMap** p = get_secmap_high_ptr(a);
637 if (UNLIKELY(is_distinguished_sm(*p)))
638 *p = copy_for_writing(*p);
639 return *p;
640 }
641
642 /* Produce the secmap for 'a', either from the primary map or by
643 ensuring there is an entry for it in the aux primary map. The
644 secmap may be a distinguished one as the caller will only want to
645 be able to read it.
646 */
get_secmap_for_reading(Addr a)647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
648 {
649 return ( a <= MAX_PRIMARY_ADDRESS
650 ? get_secmap_for_reading_low (a)
651 : get_secmap_for_reading_high(a) );
652 }
653
654 /* Produce the secmap for 'a', either from the primary map or by
655 ensuring there is an entry for it in the aux primary map. The
656 secmap may not be a distinguished one, since the caller will want
657 to be able to write it. If it is a distinguished secondary, make a
658 writable copy of it, install it, and return the copy instead. (COW
659 semantics).
660 */
get_secmap_for_writing(Addr a)661 static INLINE SecMap* get_secmap_for_writing ( Addr a )
662 {
663 return ( a <= MAX_PRIMARY_ADDRESS
664 ? get_secmap_for_writing_low (a)
665 : get_secmap_for_writing_high(a) );
666 }
667
668 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
669 allocate one if one doesn't already exist. This is used by the
670 leak checker.
671 */
maybe_get_secmap_for(Addr a)672 static SecMap* maybe_get_secmap_for ( Addr a )
673 {
674 if (a <= MAX_PRIMARY_ADDRESS) {
675 return get_secmap_for_reading_low(a);
676 } else {
677 AuxMapEnt* am = maybe_find_in_auxmap(a);
678 return am ? am->sm : NULL;
679 }
680 }
681
682 /* --------------- Fundamental functions --------------- */
683
684 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686 {
687 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
688 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
689 *vabits8 |= (vabits2 << shift); // mask in the two new bits
690 }
691
692 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694 {
695 UInt shift;
696 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
697 shift = (a & 2) << 1; // shift by 0 or 4
698 *vabits8 &= ~(0xf << shift); // mask out the four old bits
699 *vabits8 |= (vabits4 << shift); // mask in the four new bits
700 }
701
702 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704 {
705 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
706 vabits8 >>= shift; // shift the two bits to the bottom
707 return 0x3 & vabits8; // mask out the rest
708 }
709
710 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713 UInt shift;
714 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
715 shift = (a & 2) << 1; // shift by 0 or 4
716 vabits8 >>= shift; // shift the four bits to the bottom
717 return 0xf & vabits8; // mask out the rest
718 }
719
720 // Note that these four are only used in slow cases. The fast cases do
721 // clever things like combine the auxmap check (in
722 // get_secmap_{read,writ}able) with alignment checks.
723
724 // *** WARNING! ***
725 // Any time this function is called, if it is possible that vabits2
726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727 // sec-V-bits table must also be set!
728 static INLINE
set_vabits2(Addr a,UChar vabits2)729 void set_vabits2 ( Addr a, UChar vabits2 )
730 {
731 SecMap* sm = get_secmap_for_writing(a);
732 UWord sm_off = SM_OFF(a);
733 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734 }
735
736 static INLINE
get_vabits2(Addr a)737 UChar get_vabits2 ( Addr a )
738 {
739 SecMap* sm = get_secmap_for_reading(a);
740 UWord sm_off = SM_OFF(a);
741 UChar vabits8 = sm->vabits8[sm_off];
742 return extract_vabits2_from_vabits8(a, vabits8);
743 }
744
745 // *** WARNING! ***
746 // Any time this function is called, if it is possible that any of the
747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748 // corresponding entry(s) in the sec-V-bits table must also be set!
749 static INLINE
get_vabits8_for_aligned_word32(Addr a)750 UChar get_vabits8_for_aligned_word32 ( Addr a )
751 {
752 SecMap* sm = get_secmap_for_reading(a);
753 UWord sm_off = SM_OFF(a);
754 UChar vabits8 = sm->vabits8[sm_off];
755 return vabits8;
756 }
757
758 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760 {
761 SecMap* sm = get_secmap_for_writing(a);
762 UWord sm_off = SM_OFF(a);
763 sm->vabits8[sm_off] = vabits8;
764 }
765
766
767 // Forward declarations
768 static UWord get_sec_vbits8(Addr a);
769 static void set_sec_vbits8(Addr a, UWord vbits8);
770
771 // Returns False if there was an addressability error.
772 static INLINE
set_vbits8(Addr a,UChar vbits8)773 Bool set_vbits8 ( Addr a, UChar vbits8 )
774 {
775 Bool ok = True;
776 UChar vabits2 = get_vabits2(a);
777 if ( VA_BITS2_NOACCESS != vabits2 ) {
778 // Addressable. Convert in-register format to in-memory format.
779 // Also remove any existing sec V bit entry for the byte if no
780 // longer necessary.
781 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
782 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783 else { vabits2 = VA_BITS2_PARTDEFINED;
784 set_sec_vbits8(a, vbits8); }
785 set_vabits2(a, vabits2);
786
787 } else {
788 // Unaddressable! Do nothing -- when writing to unaddressable
789 // memory it acts as a black hole, and the V bits can never be seen
790 // again. So we don't have to write them at all.
791 ok = False;
792 }
793 return ok;
794 }
795
796 // Returns False if there was an addressability error. In that case, we put
797 // all defined bits into vbits8.
798 static INLINE
get_vbits8(Addr a,UChar * vbits8)799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
800 {
801 Bool ok = True;
802 UChar vabits2 = get_vabits2(a);
803
804 // Convert the in-memory format to in-register format.
805 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
806 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807 else if ( VA_BITS2_NOACCESS == vabits2 ) {
808 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
809 ok = False;
810 } else {
811 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812 *vbits8 = get_sec_vbits8(a);
813 }
814 return ok;
815 }
816
817
818 /* --------------- Secondary V bit table ------------ */
819
820 // This table holds the full V bit pattern for partially-defined bytes
821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822 // memory.
823 //
824 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
825 // then overwrite the same address with a fully defined byte, the sec-V-bit
826 // node will not necessarily be removed. This is because checking for
827 // whether removal is necessary would slow down the fast paths.
828 //
829 // To avoid the stale nodes building up too much, we periodically (once the
830 // table reaches a certain size) garbage collect (GC) the table by
831 // traversing it and evicting any nodes not having PDB.
832 // If more than a certain proportion of nodes survived, we increase the
833 // table size so that GCs occur less often.
834 //
835 // This policy is designed to avoid bad table bloat in the worst case where
836 // a program creates huge numbers of stale PDBs -- we would get this bloat
837 // if we had no GC -- while handling well the case where a node becomes
838 // stale but shortly afterwards is rewritten with a PDB and so becomes
839 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
840 // remove all stale nodes as soon as possible, we just end up re-adding a
841 // lot of them in later again. The "sufficiently stale" approach avoids
842 // this. (If a program has many live PDBs, performance will just suck,
843 // there's no way around that.)
844 //
845 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
846 // holding on to stale entries for 2 GCs before discarding them can lead
847 // to massive space leaks. So we're changing to an arrangement where
848 // lines are evicted as soon as they are observed to be stale during a
849 // GC. This also has a side benefit of allowing the sufficiently_stale
850 // field to be removed from the SecVBitNode struct, reducing its size by
851 // 8 bytes, which is a substantial space saving considering that the
852 // struct was previously 32 or so bytes, on a 64 bit target.
853 //
854 // In order to try and mitigate the problem that the "sufficiently stale"
855 // heuristic was designed to avoid, the table size is allowed to drift
856 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
857 // means that nodes will exist in the table longer on average, and hopefully
858 // will be deleted and re-added less frequently.
859 //
860 // The previous scaling up mechanism (now called STEPUP) is retained:
861 // if residency exceeds 50%, the table is scaled up, although by a
862 // factor sqrt(2) rather than 2 as before. This effectively doubles the
863 // frequency of GCs when there are many PDBs at reduces the tendency of
864 // stale PDBs to reside for long periods in the table.
865
866 static OSet* secVBitTable;
867
868 // Stats
869 static ULong sec_vbits_new_nodes = 0;
870 static ULong sec_vbits_updates = 0;
871
872 // This must be a power of two; this is checked in mc_pre_clo_init().
873 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
874 // a larger address range) they take more space but we can get multiple
875 // partially-defined bytes in one if they are close to each other, reducing
876 // the number of total nodes. In practice sometimes they are clustered (eg.
877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878 // row), but often not. So we choose something intermediate.
879 #define BYTES_PER_SEC_VBIT_NODE 16
880
881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882 // more than this many nodes survive a GC.
883 #define STEPUP_SURVIVOR_PROPORTION 0.5
884 #define STEPUP_GROWTH_FACTOR 1.414213562
885
886 // If the above heuristic doesn't apply, then we may make the table
887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888 // this many nodes survive a GC, _and_ the total table size does
889 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
890 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
891 // effectively although gradually reduces residency and increases time
892 // between GCs for programs with small numbers of PDBs. The 80000 limit
893 // effectively limits the table size to around 2MB for programs with
894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
895 // entries, to try and reduce the costs resulting from deleting and
896 // re-adding of entries.
897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
898 #define DRIFTUP_GROWTH_FACTOR 1.015
899 #define DRIFTUP_MAX_SIZE 80000
900
901 // We GC the table when it gets this many nodes in it, ie. it's effectively
902 // the table size. It can change.
903 static Int secVBitLimit = 1000;
904
905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
907 // come out anyway.
908 static UInt GCs_done = 0;
909
910 typedef
911 struct {
912 Addr a;
913 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914 }
915 SecVBitNode;
916
createSecVBitTable(void)917 static OSet* createSecVBitTable(void)
918 {
919 OSet* newSecVBitTable;
920 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921 ( offsetof(SecVBitNode, a),
922 NULL, // use fast comparisons
923 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924 VG_(free),
925 1000,
926 sizeof(SecVBitNode));
927 return newSecVBitTable;
928 }
929
gcSecVBitTable(void)930 static void gcSecVBitTable(void)
931 {
932 OSet* secVBitTable2;
933 SecVBitNode* n;
934 Int i, n_nodes = 0, n_survivors = 0;
935
936 GCs_done++;
937
938 // Create the new table.
939 secVBitTable2 = createSecVBitTable();
940
941 // Traverse the table, moving fresh nodes into the new table.
942 VG_(OSetGen_ResetIter)(secVBitTable);
943 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944 // Keep node if any of its bytes are non-stale. Using
945 // get_vabits2() for the lookup is not very efficient, but I don't
946 // think it matters.
947 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949 // Found a non-stale byte, so keep =>
950 // Insert a copy of the node into the new table.
951 SecVBitNode* n2 =
952 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953 *n2 = *n;
954 VG_(OSetGen_Insert)(secVBitTable2, n2);
955 break;
956 }
957 }
958 }
959
960 // Get the before and after sizes.
961 n_nodes = VG_(OSetGen_Size)(secVBitTable);
962 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963
964 // Destroy the old table, and put the new one in its place.
965 VG_(OSetGen_Destroy)(secVBitTable);
966 secVBitTable = secVBitTable2;
967
968 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
969 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
970 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
971 }
972
973 // Increase table size if necessary.
974 if ((Double)n_survivors
975 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
976 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
977 if (VG_(clo_verbosity) > 1)
978 VG_(message)(Vg_DebugMsg,
979 "memcheck GC: %d new table size (stepup)\n",
980 secVBitLimit);
981 }
982 else
983 if (secVBitLimit < DRIFTUP_MAX_SIZE
984 && (Double)n_survivors
985 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
986 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
987 if (VG_(clo_verbosity) > 1)
988 VG_(message)(Vg_DebugMsg,
989 "memcheck GC: %d new table size (driftup)\n",
990 secVBitLimit);
991 }
992 }
993
get_sec_vbits8(Addr a)994 static UWord get_sec_vbits8(Addr a)
995 {
996 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
997 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
998 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
999 UChar vbits8;
1000 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1001 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1002 // make it to the secondary V bits table.
1003 vbits8 = n->vbits8[amod];
1004 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1005 return vbits8;
1006 }
1007
set_sec_vbits8(Addr a,UWord vbits8)1008 static void set_sec_vbits8(Addr a, UWord vbits8)
1009 {
1010 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1011 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1012 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1013 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1014 // make it to the secondary V bits table.
1015 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1016 if (n) {
1017 n->vbits8[amod] = vbits8; // update
1018 sec_vbits_updates++;
1019 } else {
1020 // Do a table GC if necessary. Nb: do this before creating and
1021 // inserting the new node, to avoid erroneously GC'ing the new node.
1022 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1023 gcSecVBitTable();
1024 }
1025
1026 // New node: assign the specific byte, make the rest invalid (they
1027 // should never be read as-is, but be cautious).
1028 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1029 n->a = aAligned;
1030 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1031 n->vbits8[i] = V_BITS8_UNDEFINED;
1032 }
1033 n->vbits8[amod] = vbits8;
1034
1035 // Insert the new node.
1036 VG_(OSetGen_Insert)(secVBitTable, n);
1037 sec_vbits_new_nodes++;
1038
1039 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1040 if (n_secVBit_nodes > max_secVBit_nodes)
1041 max_secVBit_nodes = n_secVBit_nodes;
1042 }
1043 }
1044
1045 /* --------------- Endianness helpers --------------- */
1046
1047 /* Returns the offset in memory of the byteno-th most significant byte
1048 in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1049 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1050 UWord byteno ) {
1051 return bigendian ? (wordszB-1-byteno) : byteno;
1052 }
1053
1054
1055 /* --------------- Ignored address ranges --------------- */
1056
1057 /* Denotes the address-error-reportability status for address ranges:
1058 IAR_NotIgnored: the usual case -- report errors in this range
1059 IAR_CommandLine: don't report errors -- from command line setting
1060 IAR_ClientReq: don't report errors -- from client request
1061 */
1062 typedef
1063 enum { IAR_INVALID=99,
1064 IAR_NotIgnored,
1065 IAR_CommandLine,
1066 IAR_ClientReq }
1067 IARKind;
1068
showIARKind(IARKind iark)1069 static const HChar* showIARKind ( IARKind iark )
1070 {
1071 switch (iark) {
1072 case IAR_INVALID: return "INVALID";
1073 case IAR_NotIgnored: return "NotIgnored";
1074 case IAR_CommandLine: return "CommandLine";
1075 case IAR_ClientReq: return "ClientReq";
1076 default: return "???";
1077 }
1078 }
1079
1080 // RangeMap<IARKind>
1081 static RangeMap* gIgnoredAddressRanges = NULL;
1082
init_gIgnoredAddressRanges(void)1083 static void init_gIgnoredAddressRanges ( void )
1084 {
1085 if (LIKELY(gIgnoredAddressRanges != NULL))
1086 return;
1087 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1088 VG_(free), IAR_NotIgnored );
1089 }
1090
MC_(in_ignored_range)1091 Bool MC_(in_ignored_range) ( Addr a )
1092 {
1093 if (LIKELY(gIgnoredAddressRanges == NULL))
1094 return False;
1095 UWord how = IAR_INVALID;
1096 UWord key_min = ~(UWord)0;
1097 UWord key_max = (UWord)0;
1098 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1099 tl_assert(key_min <= a && a <= key_max);
1100 switch (how) {
1101 case IAR_NotIgnored: return False;
1102 case IAR_CommandLine: return True;
1103 case IAR_ClientReq: return True;
1104 default: break; /* invalid */
1105 }
1106 VG_(tool_panic)("MC_(in_ignore_range)");
1107 /*NOTREACHED*/
1108 }
1109
1110 /* Parse two Addr separated by a dash, or fail. */
1111
parse_range(const HChar ** ppc,Addr * result1,Addr * result2)1112 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1113 {
1114 Bool ok = VG_(parse_Addr) (ppc, result1);
1115 if (!ok)
1116 return False;
1117 if (**ppc != '-')
1118 return False;
1119 (*ppc)++;
1120 ok = VG_(parse_Addr) (ppc, result2);
1121 if (!ok)
1122 return False;
1123 return True;
1124 }
1125
1126 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1127 fail. If they are valid, add them to the global set of ignored
1128 ranges. */
parse_ignore_ranges(const HChar * str0)1129 static Bool parse_ignore_ranges ( const HChar* str0 )
1130 {
1131 init_gIgnoredAddressRanges();
1132 const HChar* str = str0;
1133 const HChar** ppc = &str;
1134 while (1) {
1135 Addr start = ~(Addr)0;
1136 Addr end = (Addr)0;
1137 Bool ok = parse_range(ppc, &start, &end);
1138 if (!ok)
1139 return False;
1140 if (start > end)
1141 return False;
1142 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1143 if (**ppc == 0)
1144 return True;
1145 if (**ppc != ',')
1146 return False;
1147 (*ppc)++;
1148 }
1149 /*NOTREACHED*/
1150 return False;
1151 }
1152
1153 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1154 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1155 {
1156 init_gIgnoredAddressRanges();
1157 const Bool verbose = (VG_(clo_verbosity) > 1);
1158 if (len == 0) {
1159 return False;
1160 }
1161 if (addRange) {
1162 VG_(bindRangeMap)(gIgnoredAddressRanges,
1163 start, start+len-1, IAR_ClientReq);
1164 if (verbose)
1165 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1166 (void*)start, (void*)(start+len-1));
1167 } else {
1168 VG_(bindRangeMap)(gIgnoredAddressRanges,
1169 start, start+len-1, IAR_NotIgnored);
1170 if (verbose)
1171 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1172 (void*)start, (void*)(start+len-1));
1173 }
1174 if (verbose) {
1175 VG_(dmsg)("memcheck: now have %ld ranges:\n",
1176 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1177 Word i;
1178 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1179 UWord val = IAR_INVALID;
1180 UWord key_min = ~(UWord)0;
1181 UWord key_max = (UWord)0;
1182 VG_(indexRangeMap)( &key_min, &key_max, &val,
1183 gIgnoredAddressRanges, i );
1184 VG_(dmsg)("memcheck: [%ld] %016llx-%016llx %s\n",
1185 i, (ULong)key_min, (ULong)key_max, showIARKind(val));
1186 }
1187 }
1188 return True;
1189 }
1190
1191
1192 /* --------------- Load/store slow cases. --------------- */
1193
1194 static
1195 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1196 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1197 Addr a, SizeT nBits, Bool bigendian )
1198 {
1199 ULong pessim[4]; /* only used when p-l-ok=yes */
1200 SSizeT szB = nBits / 8;
1201 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1202 SSizeT i, j; /* Must be signed. */
1203 SizeT n_addrs_bad = 0;
1204 Addr ai;
1205 UChar vbits8;
1206 Bool ok;
1207
1208 /* Code below assumes load size is a power of two and at least 64
1209 bits. */
1210 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1211
1212 /* If this triggers, you probably just need to increase the size of
1213 the pessim array. */
1214 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1215
1216 for (j = 0; j < szL; j++) {
1217 pessim[j] = V_BITS64_DEFINED;
1218 res[j] = V_BITS64_UNDEFINED;
1219 }
1220
1221 /* Make up a result V word, which contains the loaded data for
1222 valid addresses and Defined for invalid addresses. Iterate over
1223 the bytes in the word, from the most significant down to the
1224 least. The vbits to return are calculated into vbits128. Also
1225 compute the pessimising value to be used when
1226 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1227 info can be gleaned from the pessim array) but is used as a
1228 cross-check. */
1229 for (j = szL-1; j >= 0; j--) {
1230 ULong vbits64 = V_BITS64_UNDEFINED;
1231 ULong pessim64 = V_BITS64_DEFINED;
1232 UWord long_index = byte_offset_w(szL, bigendian, j);
1233 for (i = 8-1; i >= 0; i--) {
1234 PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
1235 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1236 ok = get_vbits8(ai, &vbits8);
1237 vbits64 <<= 8;
1238 vbits64 |= vbits8;
1239 if (!ok) n_addrs_bad++;
1240 pessim64 <<= 8;
1241 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1242 }
1243 res[long_index] = vbits64;
1244 pessim[long_index] = pessim64;
1245 }
1246
1247 /* In the common case, all the addresses involved are valid, so we
1248 just return the computed V bits and have done. */
1249 if (LIKELY(n_addrs_bad == 0))
1250 return;
1251
1252 /* If there's no possibility of getting a partial-loads-ok
1253 exemption, report the error and quit. */
1254 if (!MC_(clo_partial_loads_ok)) {
1255 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1256 return;
1257 }
1258
1259 /* The partial-loads-ok excemption might apply. Find out if it
1260 does. If so, don't report an addressing error, but do return
1261 Undefined for the bytes that are out of range, so as to avoid
1262 false negatives. If it doesn't apply, just report an addressing
1263 error in the usual way. */
1264
1265 /* Some code steps along byte strings in aligned chunks
1266 even when there is only a partially defined word at the end (eg,
1267 optimised strlen). This is allowed by the memory model of
1268 modern machines, since an aligned load cannot span two pages and
1269 thus cannot "partially fault".
1270
1271 Therefore, a load from a partially-addressible place is allowed
1272 if all of the following hold:
1273 - the command-line flag is set [by default, it isn't]
1274 - it's an aligned load
1275 - at least one of the addresses in the word *is* valid
1276
1277 Since this suppresses the addressing error, we avoid false
1278 negatives by marking bytes undefined when they come from an
1279 invalid address.
1280 */
1281
1282 /* "at least one of the addresses is invalid" */
1283 ok = False;
1284 for (j = 0; j < szL; j++)
1285 ok |= pessim[j] != V_BITS64_DEFINED;
1286 tl_assert(ok);
1287
1288 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1289 /* Exemption applies. Use the previously computed pessimising
1290 value and return the combined result, but don't flag an
1291 addressing error. The pessimising value is Defined for valid
1292 addresses and Undefined for invalid addresses. */
1293 /* for assumption that doing bitwise or implements UifU */
1294 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1295 /* (really need "UifU" here...)
1296 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1297 for (j = szL-1; j >= 0; j--)
1298 res[j] |= pessim[j];
1299 return;
1300 }
1301
1302 /* Exemption doesn't apply. Flag an addressing error in the normal
1303 way. */
1304 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1305 }
1306
1307
1308 static
1309 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1310 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1311 {
1312 PROF_EVENT(30, "mc_LOADVn_slow");
1313
1314 /* ------------ BEGIN semi-fast cases ------------ */
1315 /* These deal quickly-ish with the common auxiliary primary map
1316 cases on 64-bit platforms. Are merely a speedup hack; can be
1317 omitted without loss of correctness/functionality. Note that in
1318 both cases the "sizeof(void*) == 8" causes these cases to be
1319 folded out by compilers on 32-bit platforms. These are derived
1320 from LOADV64 and LOADV32.
1321 */
1322 if (LIKELY(sizeof(void*) == 8
1323 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1324 SecMap* sm = get_secmap_for_reading(a);
1325 UWord sm_off16 = SM_OFF_16(a);
1326 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1327 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1328 return V_BITS64_DEFINED;
1329 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1330 return V_BITS64_UNDEFINED;
1331 /* else fall into the slow case */
1332 }
1333 if (LIKELY(sizeof(void*) == 8
1334 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1335 SecMap* sm = get_secmap_for_reading(a);
1336 UWord sm_off = SM_OFF(a);
1337 UWord vabits8 = sm->vabits8[sm_off];
1338 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1339 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1340 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1341 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1342 /* else fall into slow case */
1343 }
1344 /* ------------ END semi-fast cases ------------ */
1345
1346 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1347 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1348 SSizeT szB = nBits / 8;
1349 SSizeT i; /* Must be signed. */
1350 SizeT n_addrs_bad = 0;
1351 Addr ai;
1352 UChar vbits8;
1353 Bool ok;
1354
1355 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1356
1357 /* Make up a 64-bit result V word, which contains the loaded data
1358 for valid addresses and Defined for invalid addresses. Iterate
1359 over the bytes in the word, from the most significant down to
1360 the least. The vbits to return are calculated into vbits64.
1361 Also compute the pessimising value to be used when
1362 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1363 info can be gleaned from pessim64) but is used as a
1364 cross-check. */
1365 for (i = szB-1; i >= 0; i--) {
1366 PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1367 ai = a + byte_offset_w(szB, bigendian, i);
1368 ok = get_vbits8(ai, &vbits8);
1369 vbits64 <<= 8;
1370 vbits64 |= vbits8;
1371 if (!ok) n_addrs_bad++;
1372 pessim64 <<= 8;
1373 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1374 }
1375
1376 /* In the common case, all the addresses involved are valid, so we
1377 just return the computed V bits and have done. */
1378 if (LIKELY(n_addrs_bad == 0))
1379 return vbits64;
1380
1381 /* If there's no possibility of getting a partial-loads-ok
1382 exemption, report the error and quit. */
1383 if (!MC_(clo_partial_loads_ok)) {
1384 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1385 return vbits64;
1386 }
1387
1388 /* The partial-loads-ok excemption might apply. Find out if it
1389 does. If so, don't report an addressing error, but do return
1390 Undefined for the bytes that are out of range, so as to avoid
1391 false negatives. If it doesn't apply, just report an addressing
1392 error in the usual way. */
1393
1394 /* Some code steps along byte strings in aligned word-sized chunks
1395 even when there is only a partially defined word at the end (eg,
1396 optimised strlen). This is allowed by the memory model of
1397 modern machines, since an aligned load cannot span two pages and
1398 thus cannot "partially fault". Despite such behaviour being
1399 declared undefined by ANSI C/C++.
1400
1401 Therefore, a load from a partially-addressible place is allowed
1402 if all of the following hold:
1403 - the command-line flag is set [by default, it isn't]
1404 - it's a word-sized, word-aligned load
1405 - at least one of the addresses in the word *is* valid
1406
1407 Since this suppresses the addressing error, we avoid false
1408 negatives by marking bytes undefined when they come from an
1409 invalid address.
1410 */
1411
1412 /* "at least one of the addresses is invalid" */
1413 tl_assert(pessim64 != V_BITS64_DEFINED);
1414
1415 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1416 && n_addrs_bad < VG_WORDSIZE) {
1417 /* Exemption applies. Use the previously computed pessimising
1418 value for vbits64 and return the combined result, but don't
1419 flag an addressing error. The pessimising value is Defined
1420 for valid addresses and Undefined for invalid addresses. */
1421 /* for assumption that doing bitwise or implements UifU */
1422 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1423 /* (really need "UifU" here...)
1424 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1425 vbits64 |= pessim64;
1426 return vbits64;
1427 }
1428
1429 /* Also, in appears that gcc generates string-stepping code in
1430 32-bit chunks on 64 bit platforms. So, also grant an exception
1431 for this case. Note that the first clause of the conditional
1432 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1433 will get folded out in 32 bit builds. */
1434 if (VG_WORDSIZE == 8
1435 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1436 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1437 /* (really need "UifU" here...)
1438 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1439 vbits64 |= pessim64;
1440 /* Mark the upper 32 bits as undefined, just to be on the safe
1441 side. */
1442 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1443 return vbits64;
1444 }
1445
1446 /* Exemption doesn't apply. Flag an addressing error in the normal
1447 way. */
1448 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1449
1450 return vbits64;
1451 }
1452
1453
1454 static
1455 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1456 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1457 {
1458 SizeT szB = nBits / 8;
1459 SizeT i, n_addrs_bad = 0;
1460 UChar vbits8;
1461 Addr ai;
1462 Bool ok;
1463
1464 PROF_EVENT(35, "mc_STOREVn_slow");
1465
1466 /* ------------ BEGIN semi-fast cases ------------ */
1467 /* These deal quickly-ish with the common auxiliary primary map
1468 cases on 64-bit platforms. Are merely a speedup hack; can be
1469 omitted without loss of correctness/functionality. Note that in
1470 both cases the "sizeof(void*) == 8" causes these cases to be
1471 folded out by compilers on 32-bit platforms. The logic below
1472 is somewhat similar to some cases extensively commented in
1473 MC_(helperc_STOREV8).
1474 */
1475 if (LIKELY(sizeof(void*) == 8
1476 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1477 SecMap* sm = get_secmap_for_reading(a);
1478 UWord sm_off16 = SM_OFF_16(a);
1479 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1480 if (LIKELY( !is_distinguished_sm(sm) &&
1481 (VA_BITS16_DEFINED == vabits16 ||
1482 VA_BITS16_UNDEFINED == vabits16) )) {
1483 /* Handle common case quickly: a is suitably aligned, */
1484 /* is mapped, and is addressible. */
1485 // Convert full V-bits in register to compact 2-bit form.
1486 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1487 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1488 return;
1489 } else if (V_BITS64_UNDEFINED == vbytes) {
1490 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1491 return;
1492 }
1493 /* else fall into the slow case */
1494 }
1495 /* else fall into the slow case */
1496 }
1497 if (LIKELY(sizeof(void*) == 8
1498 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1499 SecMap* sm = get_secmap_for_reading(a);
1500 UWord sm_off = SM_OFF(a);
1501 UWord vabits8 = sm->vabits8[sm_off];
1502 if (LIKELY( !is_distinguished_sm(sm) &&
1503 (VA_BITS8_DEFINED == vabits8 ||
1504 VA_BITS8_UNDEFINED == vabits8) )) {
1505 /* Handle common case quickly: a is suitably aligned, */
1506 /* is mapped, and is addressible. */
1507 // Convert full V-bits in register to compact 2-bit form.
1508 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1509 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1510 return;
1511 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1512 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1513 return;
1514 }
1515 /* else fall into the slow case */
1516 }
1517 /* else fall into the slow case */
1518 }
1519 /* ------------ END semi-fast cases ------------ */
1520
1521 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1522
1523 /* Dump vbytes in memory, iterating from least to most significant
1524 byte. At the same time establish addressibility of the location. */
1525 for (i = 0; i < szB; i++) {
1526 PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1527 ai = a + byte_offset_w(szB, bigendian, i);
1528 vbits8 = vbytes & 0xff;
1529 ok = set_vbits8(ai, vbits8);
1530 if (!ok) n_addrs_bad++;
1531 vbytes >>= 8;
1532 }
1533
1534 /* If an address error has happened, report it. */
1535 if (n_addrs_bad > 0)
1536 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1537 }
1538
1539
1540 /*------------------------------------------------------------*/
1541 /*--- Setting permissions over address ranges. ---*/
1542 /*------------------------------------------------------------*/
1543
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1544 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1545 UWord dsm_num )
1546 {
1547 UWord sm_off, sm_off16;
1548 UWord vabits2 = vabits16 & 0x3;
1549 SizeT lenA, lenB, len_to_next_secmap;
1550 Addr aNext;
1551 SecMap* sm;
1552 SecMap** sm_ptr;
1553 SecMap* example_dsm;
1554
1555 PROF_EVENT(150, "set_address_range_perms");
1556
1557 /* Check the V+A bits make sense. */
1558 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1559 VA_BITS16_UNDEFINED == vabits16 ||
1560 VA_BITS16_DEFINED == vabits16);
1561
1562 // This code should never write PDBs; ensure this. (See comment above
1563 // set_vabits2().)
1564 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1565
1566 if (lenT == 0)
1567 return;
1568
1569 if (lenT > 256 * 1024 * 1024) {
1570 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1571 const HChar* s = "unknown???";
1572 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1573 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1574 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1575 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1576 "large range [0x%lx, 0x%lx) (%s)\n",
1577 a, a + lenT, s);
1578 }
1579 }
1580
1581 #ifndef PERF_FAST_SARP
1582 /*------------------ debug-only case ------------------ */
1583 {
1584 // Endianness doesn't matter here because all bytes are being set to
1585 // the same value.
1586 // Nb: We don't have to worry about updating the sec-V-bits table
1587 // after these set_vabits2() calls because this code never writes
1588 // VA_BITS2_PARTDEFINED values.
1589 SizeT i;
1590 for (i = 0; i < lenT; i++) {
1591 set_vabits2(a + i, vabits2);
1592 }
1593 return;
1594 }
1595 #endif
1596
1597 /*------------------ standard handling ------------------ */
1598
1599 /* Get the distinguished secondary that we might want
1600 to use (part of the space-compression scheme). */
1601 example_dsm = &sm_distinguished[dsm_num];
1602
1603 // We have to handle ranges covering various combinations of partial and
1604 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1605 // Cases marked with a '*' are common.
1606 //
1607 // TYPE PARTS USED
1608 // ---- ----------
1609 // * one partial sec-map (p) 1
1610 // - one whole sec-map (P) 2
1611 //
1612 // * two partial sec-maps (pp) 1,3
1613 // - one partial, one whole sec-map (pP) 1,2
1614 // - one whole, one partial sec-map (Pp) 2,3
1615 // - two whole sec-maps (PP) 2,2
1616 //
1617 // * one partial, one whole, one partial (pPp) 1,2,3
1618 // - one partial, two whole (pPP) 1,2,2
1619 // - two whole, one partial (PPp) 2,2,3
1620 // - three whole (PPP) 2,2,2
1621 //
1622 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1623 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1624 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1625 // - N whole (PP...PP) 2,2...2,3
1626
1627 // Break up total length (lenT) into two parts: length in the first
1628 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1629 aNext = start_of_this_sm(a) + SM_SIZE;
1630 len_to_next_secmap = aNext - a;
1631 if ( lenT <= len_to_next_secmap ) {
1632 // Range entirely within one sec-map. Covers almost all cases.
1633 PROF_EVENT(151, "set_address_range_perms-single-secmap");
1634 lenA = lenT;
1635 lenB = 0;
1636 } else if (is_start_of_sm(a)) {
1637 // Range spans at least one whole sec-map, and starts at the beginning
1638 // of a sec-map; skip to Part 2.
1639 PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1640 lenA = 0;
1641 lenB = lenT;
1642 goto part2;
1643 } else {
1644 // Range spans two or more sec-maps, first one is partial.
1645 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1646 lenA = len_to_next_secmap;
1647 lenB = lenT - lenA;
1648 }
1649
1650 //------------------------------------------------------------------------
1651 // Part 1: Deal with the first sec_map. Most of the time the range will be
1652 // entirely within a sec_map and this part alone will suffice. Also,
1653 // doing it this way lets us avoid repeatedly testing for the crossing of
1654 // a sec-map boundary within these loops.
1655 //------------------------------------------------------------------------
1656
1657 // If it's distinguished, make it undistinguished if necessary.
1658 sm_ptr = get_secmap_ptr(a);
1659 if (is_distinguished_sm(*sm_ptr)) {
1660 if (*sm_ptr == example_dsm) {
1661 // Sec-map already has the V+A bits that we want, so skip.
1662 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1663 a = aNext;
1664 lenA = 0;
1665 } else {
1666 PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1667 *sm_ptr = copy_for_writing(*sm_ptr);
1668 }
1669 }
1670 sm = *sm_ptr;
1671
1672 // 1 byte steps
1673 while (True) {
1674 if (VG_IS_8_ALIGNED(a)) break;
1675 if (lenA < 1) break;
1676 PROF_EVENT(156, "set_address_range_perms-loop1a");
1677 sm_off = SM_OFF(a);
1678 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1679 a += 1;
1680 lenA -= 1;
1681 }
1682 // 8-aligned, 8 byte steps
1683 while (True) {
1684 if (lenA < 8) break;
1685 PROF_EVENT(157, "set_address_range_perms-loop8a");
1686 sm_off16 = SM_OFF_16(a);
1687 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1688 a += 8;
1689 lenA -= 8;
1690 }
1691 // 1 byte steps
1692 while (True) {
1693 if (lenA < 1) break;
1694 PROF_EVENT(158, "set_address_range_perms-loop1b");
1695 sm_off = SM_OFF(a);
1696 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1697 a += 1;
1698 lenA -= 1;
1699 }
1700
1701 // We've finished the first sec-map. Is that it?
1702 if (lenB == 0)
1703 return;
1704
1705 //------------------------------------------------------------------------
1706 // Part 2: Fast-set entire sec-maps at a time.
1707 //------------------------------------------------------------------------
1708 part2:
1709 // 64KB-aligned, 64KB steps.
1710 // Nb: we can reach here with lenB < SM_SIZE
1711 tl_assert(0 == lenA);
1712 while (True) {
1713 if (lenB < SM_SIZE) break;
1714 tl_assert(is_start_of_sm(a));
1715 PROF_EVENT(159, "set_address_range_perms-loop64K");
1716 sm_ptr = get_secmap_ptr(a);
1717 if (!is_distinguished_sm(*sm_ptr)) {
1718 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1719 // Free the non-distinguished sec-map that we're replacing. This
1720 // case happens moderately often, enough to be worthwhile.
1721 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1722 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1723 }
1724 update_SM_counts(*sm_ptr, example_dsm);
1725 // Make the sec-map entry point to the example DSM
1726 *sm_ptr = example_dsm;
1727 lenB -= SM_SIZE;
1728 a += SM_SIZE;
1729 }
1730
1731 // We've finished the whole sec-maps. Is that it?
1732 if (lenB == 0)
1733 return;
1734
1735 //------------------------------------------------------------------------
1736 // Part 3: Finish off the final partial sec-map, if necessary.
1737 //------------------------------------------------------------------------
1738
1739 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1740
1741 // If it's distinguished, make it undistinguished if necessary.
1742 sm_ptr = get_secmap_ptr(a);
1743 if (is_distinguished_sm(*sm_ptr)) {
1744 if (*sm_ptr == example_dsm) {
1745 // Sec-map already has the V+A bits that we want, so stop.
1746 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1747 return;
1748 } else {
1749 PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1750 *sm_ptr = copy_for_writing(*sm_ptr);
1751 }
1752 }
1753 sm = *sm_ptr;
1754
1755 // 8-aligned, 8 byte steps
1756 while (True) {
1757 if (lenB < 8) break;
1758 PROF_EVENT(163, "set_address_range_perms-loop8b");
1759 sm_off16 = SM_OFF_16(a);
1760 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1761 a += 8;
1762 lenB -= 8;
1763 }
1764 // 1 byte steps
1765 while (True) {
1766 if (lenB < 1) return;
1767 PROF_EVENT(164, "set_address_range_perms-loop1c");
1768 sm_off = SM_OFF(a);
1769 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1770 a += 1;
1771 lenB -= 1;
1772 }
1773 }
1774
1775
1776 /* --- Set permissions for arbitrary address ranges --- */
1777
MC_(make_mem_noaccess)1778 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1779 {
1780 PROF_EVENT(40, "MC_(make_mem_noaccess)");
1781 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1782 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1783 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1784 ocache_sarp_Clear_Origins ( a, len );
1785 }
1786
make_mem_undefined(Addr a,SizeT len)1787 static void make_mem_undefined ( Addr a, SizeT len )
1788 {
1789 PROF_EVENT(41, "make_mem_undefined");
1790 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1791 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1792 }
1793
MC_(make_mem_undefined_w_otag)1794 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1795 {
1796 PROF_EVENT(43, "MC_(make_mem_undefined)");
1797 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1798 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1799 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1800 ocache_sarp_Set_Origins ( a, len, otag );
1801 }
1802
1803 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1804 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1805 ThreadId tid, UInt okind )
1806 {
1807 UInt ecu;
1808 ExeContext* here;
1809 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1810 if it is invalid. So no need to do it here. */
1811 tl_assert(okind <= 3);
1812 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1813 tl_assert(here);
1814 ecu = VG_(get_ECU_from_ExeContext)(here);
1815 tl_assert(VG_(is_plausible_ECU)(ecu));
1816 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1817 }
1818
1819 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1820 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1821 {
1822 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1823 }
1824
1825 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1826 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1827 {
1828 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1829 }
1830
MC_(make_mem_defined)1831 void MC_(make_mem_defined) ( Addr a, SizeT len )
1832 {
1833 PROF_EVENT(42, "MC_(make_mem_defined)");
1834 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1835 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1836 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1837 ocache_sarp_Clear_Origins ( a, len );
1838 }
1839
1840 /* For each byte in [a,a+len), if the byte is addressable, make it be
1841 defined, but if it isn't addressible, leave it alone. In other
1842 words a version of MC_(make_mem_defined) that doesn't mess with
1843 addressibility. Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1844 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1845 {
1846 SizeT i;
1847 UChar vabits2;
1848 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1849 for (i = 0; i < len; i++) {
1850 vabits2 = get_vabits2( a+i );
1851 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1852 set_vabits2(a+i, VA_BITS2_DEFINED);
1853 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1854 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1855 }
1856 }
1857 }
1858 }
1859
1860 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1861 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1862 {
1863 SizeT i;
1864 UChar vabits2;
1865 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1866 for (i = 0; i < len; i++) {
1867 vabits2 = get_vabits2( a+i );
1868 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1869 set_vabits2(a+i, VA_BITS2_DEFINED);
1870 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1871 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1872 }
1873 }
1874 }
1875 }
1876
1877 /* --- Block-copy permissions (needed for implementing realloc() and
1878 sys_mremap). --- */
1879
MC_(copy_address_range_state)1880 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1881 {
1882 SizeT i, j;
1883 UChar vabits2, vabits8;
1884 Bool aligned, nooverlap;
1885
1886 DEBUG("MC_(copy_address_range_state)\n");
1887 PROF_EVENT(50, "MC_(copy_address_range_state)");
1888
1889 if (len == 0 || src == dst)
1890 return;
1891
1892 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1893 nooverlap = src+len <= dst || dst+len <= src;
1894
1895 if (nooverlap && aligned) {
1896
1897 /* Vectorised fast case, when no overlap and suitably aligned */
1898 /* vector loop */
1899 i = 0;
1900 while (len >= 4) {
1901 vabits8 = get_vabits8_for_aligned_word32( src+i );
1902 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1903 if (LIKELY(VA_BITS8_DEFINED == vabits8
1904 || VA_BITS8_UNDEFINED == vabits8
1905 || VA_BITS8_NOACCESS == vabits8)) {
1906 /* do nothing */
1907 } else {
1908 /* have to copy secondary map info */
1909 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1910 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1911 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1912 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1913 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1914 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1915 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1916 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1917 }
1918 i += 4;
1919 len -= 4;
1920 }
1921 /* fixup loop */
1922 while (len >= 1) {
1923 vabits2 = get_vabits2( src+i );
1924 set_vabits2( dst+i, vabits2 );
1925 if (VA_BITS2_PARTDEFINED == vabits2) {
1926 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1927 }
1928 i++;
1929 len--;
1930 }
1931
1932 } else {
1933
1934 /* We have to do things the slow way */
1935 if (src < dst) {
1936 for (i = 0, j = len-1; i < len; i++, j--) {
1937 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1938 vabits2 = get_vabits2( src+j );
1939 set_vabits2( dst+j, vabits2 );
1940 if (VA_BITS2_PARTDEFINED == vabits2) {
1941 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1942 }
1943 }
1944 }
1945
1946 if (src > dst) {
1947 for (i = 0; i < len; i++) {
1948 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1949 vabits2 = get_vabits2( src+i );
1950 set_vabits2( dst+i, vabits2 );
1951 if (VA_BITS2_PARTDEFINED == vabits2) {
1952 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1953 }
1954 }
1955 }
1956 }
1957
1958 }
1959
1960
1961 /*------------------------------------------------------------*/
1962 /*--- Origin tracking stuff - cache basics ---*/
1963 /*------------------------------------------------------------*/
1964
1965 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1966 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1967
1968 Note that this implementation draws inspiration from the "origin
1969 tracking by value piggybacking" scheme described in "Tracking Bad
1970 Apples: Reporting the Origin of Null and Undefined Value Errors"
1971 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1972 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1973 implemented completely differently.
1974
1975 Origin tags and ECUs -- about the shadow values
1976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977
1978 This implementation tracks the defining point of all uninitialised
1979 values using so called "origin tags", which are 32-bit integers,
1980 rather than using the values themselves to encode the origins. The
1981 latter, so-called value piggybacking", is what the OOPSLA07 paper
1982 describes.
1983
1984 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1985 ints (UInts), regardless of the machine's word size. Each tag
1986 comprises an upper 30-bit ECU field and a lower 2-bit
1987 'kind' field. The ECU field is a number given out by m_execontext
1988 and has a 1-1 mapping with ExeContext*s. An ECU can be used
1989 directly as an origin tag (otag), but in fact we want to put
1990 additional information 'kind' field to indicate roughly where the
1991 tag came from. This helps print more understandable error messages
1992 for the user -- it has no other purpose. In summary:
1993
1994 * Both ECUs and origin tags are represented as 32-bit words
1995
1996 * m_execontext and the core-tool interface deal purely in ECUs.
1997 They have no knowledge of origin tags - that is a purely
1998 Memcheck-internal matter.
1999
2000 * all valid ECUs have the lowest 2 bits zero and at least
2001 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2002
2003 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2004 constants defined in mc_include.h.
2005
2006 * to convert an otag back to an ECU, AND it with ~3
2007
2008 One important fact is that no valid otag is zero. A zero otag is
2009 used by the implementation to indicate "no origin", which could
2010 mean that either the value is defined, or it is undefined but the
2011 implementation somehow managed to lose the origin.
2012
2013 The ECU used for memory created by malloc etc is derived from the
2014 stack trace at the time the malloc etc happens. This means the
2015 mechanism can show the exact allocation point for heap-created
2016 uninitialised values.
2017
2018 In contrast, it is simply too expensive to create a complete
2019 backtrace for each stack allocation. Therefore we merely use a
2020 depth-1 backtrace for stack allocations, which can be done once at
2021 translation time, rather than N times at run time. The result of
2022 this is that, for stack created uninitialised values, Memcheck can
2023 only show the allocating function, and not what called it.
2024 Furthermore, compilers tend to move the stack pointer just once at
2025 the start of the function, to allocate all locals, and so in fact
2026 the stack origin almost always simply points to the opening brace
2027 of the function. Net result is, for stack origins, the mechanism
2028 can tell you in which function the undefined value was created, but
2029 that's all. Users will need to carefully check all locals in the
2030 specified function.
2031
2032 Shadowing registers and memory
2033 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2034
2035 Memory is shadowed using a two level cache structure (ocacheL1 and
2036 ocacheL2). Memory references are first directed to ocacheL1. This
2037 is a traditional 2-way set associative cache with 32-byte lines and
2038 approximate LRU replacement within each set.
2039
2040 A naive implementation would require storing one 32 bit otag for
2041 each byte of memory covered, a 4:1 space overhead. Instead, there
2042 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2043 that shows which of the 4 bytes have that shadow value and which
2044 have a shadow value of zero (indicating no origin). Hence a lot of
2045 space is saved, but the cost is that only one different origin per
2046 4 bytes of address space can be represented. This is a source of
2047 imprecision, but how much of a problem it really is remains to be
2048 seen.
2049
2050 A cache line that contains all zeroes ("no origins") contains no
2051 useful information, and can be ejected from the L1 cache "for
2052 free", in the sense that a read miss on the L1 causes a line of
2053 zeroes to be installed. However, ejecting a line containing
2054 nonzeroes risks losing origin information permanently. In order to
2055 prevent such lossage, ejected nonzero lines are placed in a
2056 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2057 lines. This can grow arbitrarily large, and so should ensure that
2058 Memcheck runs out of memory in preference to losing useful origin
2059 info due to cache size limitations.
2060
2061 Shadowing registers is a bit tricky, because the shadow values are
2062 32 bits, regardless of the size of the register. That gives a
2063 problem for registers smaller than 32 bits. The solution is to
2064 find spaces in the guest state that are unused, and use those to
2065 shadow guest state fragments smaller than 32 bits. For example, on
2066 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2067 shadow are allocated for the register's otag, then there are still
2068 12 bytes left over which could be used to shadow 3 other values.
2069
2070 This implies there is some non-obvious mapping from guest state
2071 (start,length) pairs to the relevant shadow offset (for the origin
2072 tags). And it is unfortunately guest-architecture specific. The
2073 mapping is contained in mc_machine.c, which is quite lengthy but
2074 straightforward.
2075
2076 Instrumenting the IR
2077 ~~~~~~~~~~~~~~~~~~~~
2078
2079 Instrumentation is largely straightforward, and done by the
2080 functions schemeE and schemeS in mc_translate.c. These generate
2081 code for handling the origin tags of expressions (E) and statements
2082 (S) respectively. The rather strange names are a reference to the
2083 "compilation schemes" shown in Simon Peyton Jones' book "The
2084 Implementation of Functional Programming Languages" (Prentice Hall,
2085 1987, see
2086 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2087
2088 schemeS merely arranges to move shadow values around the guest
2089 state to track the incoming IR. schemeE is largely trivial too.
2090 The only significant point is how to compute the otag corresponding
2091 to binary (or ternary, quaternary, etc) operator applications. The
2092 rule is simple: just take whichever value is larger (32-bit
2093 unsigned max). Constants get the special value zero. Hence this
2094 rule always propagates a nonzero (known) otag in preference to a
2095 zero (unknown, or more likely, value-is-defined) tag, as we want.
2096 If two different undefined values are inputs to a binary operator
2097 application, then which is propagated is arbitrary, but that
2098 doesn't matter, since the program is erroneous in using either of
2099 the values, and so there's no point in attempting to propagate
2100 both.
2101
2102 Since constants are abstracted to (otag) zero, much of the
2103 instrumentation code can be folded out without difficulty by the
2104 generic post-instrumentation IR cleanup pass, using these rules:
2105 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2106 constants is evaluated at JIT time. And the resulting dead code
2107 removal. In practice this causes surprisingly few Max32Us to
2108 survive through to backend code generation.
2109
2110 Integration with the V-bits machinery
2111 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2112
2113 This is again largely straightforward. Mostly the otag and V bits
2114 stuff are independent. The only point of interaction is when the V
2115 bits instrumenter creates a call to a helper function to report an
2116 uninitialised value error -- in that case it must first use schemeE
2117 to get hold of the origin tag expression for the value, and pass
2118 that to the helper too.
2119
2120 There is the usual stuff to do with setting address range
2121 permissions. When memory is painted undefined, we must also know
2122 the origin tag to paint with, which involves some tedious plumbing,
2123 particularly to do with the fast case stack handlers. When memory
2124 is painted defined or noaccess then the origin tags must be forced
2125 to zero.
2126
2127 One of the goals of the implementation was to ensure that the
2128 non-origin tracking mode isn't slowed down at all. To do this,
2129 various functions to do with memory permissions setting (again,
2130 mostly pertaining to the stack) are duplicated for the with- and
2131 without-otag case.
2132
2133 Dealing with stack redzones, and the NIA cache
2134 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2135
2136 This is one of the few non-obvious parts of the implementation.
2137
2138 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2139 reserved area below the stack pointer, that can be used as scratch
2140 space by compiler generated code for functions. In the Memcheck
2141 sources this is referred to as the "stack redzone". The important
2142 thing here is that such redzones are considered volatile across
2143 function calls and returns. So Memcheck takes care to mark them as
2144 undefined for each call and return, on the afflicted platforms.
2145 Past experience shows this is essential in order to get reliable
2146 messages about uninitialised values that come from the stack.
2147
2148 So the question is, when we paint a redzone undefined, what origin
2149 tag should we use for it? Consider a function f() calling g(). If
2150 we paint the redzone using an otag derived from the ExeContext of
2151 the CALL/BL instruction in f, then any errors in g causing it to
2152 use uninitialised values that happen to lie in the redzone, will be
2153 reported as having their origin in f. Which is highly confusing.
2154
2155 The same applies for returns: if, on a return, we paint the redzone
2156 using a origin tag derived from the ExeContext of the RET/BLR
2157 instruction in g, then any later errors in f causing it to use
2158 uninitialised values in the redzone, will be reported as having
2159 their origin in g. Which is just as confusing.
2160
2161 To do it right, in both cases we need to use an origin tag which
2162 pertains to the instruction which dynamically follows the CALL/BL
2163 or RET/BLR. In short, one derived from the NIA - the "next
2164 instruction address".
2165
2166 To make this work, Memcheck's redzone-painting helper,
2167 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2168 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2169 ExeContext's ECU as the basis for the otag used to paint the
2170 redzone. The expensive part of this is converting an NIA into an
2171 ECU, since this happens once for every call and every return. So
2172 we use a simple 511-line, 2-way set associative cache
2173 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2174 the cost out.
2175
2176 Further background comments
2177 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2178
2179 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2180 > it really just the address of the relevant ExeContext?
2181
2182 Well, it's not the address, but a value which has a 1-1 mapping
2183 with ExeContexts, and is guaranteed not to be zero, since zero
2184 denotes (to memcheck) "unknown origin or defined value". So these
2185 UInts are just numbers starting at 4 and incrementing by 4; each
2186 ExeContext is given a number when it is created. (*** NOTE this
2187 confuses otags and ECUs; see comments above ***).
2188
2189 Making these otags 32-bit regardless of the machine's word size
2190 makes the 64-bit implementation easier (next para). And it doesn't
2191 really limit us in any way, since for the tags to overflow would
2192 require that the program somehow caused 2^30-1 different
2193 ExeContexts to be created, in which case it is probably in deep
2194 trouble. Not to mention V will have soaked up many tens of
2195 gigabytes of memory merely to store them all.
2196
2197 So having 64-bit origins doesn't really buy you anything, and has
2198 the following downsides:
2199
2200 Suppose that instead, an otag is a UWord. This would mean that, on
2201 a 64-bit target,
2202
2203 1. It becomes hard to shadow any element of guest state which is
2204 smaller than 8 bytes. To do so means you'd need to find some
2205 8-byte-sized hole in the guest state which you don't want to
2206 shadow, and use that instead to hold the otag. On ppc64, the
2207 condition code register(s) are split into 20 UChar sized pieces,
2208 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2209 and so that would entail finding 160 bytes somewhere else in the
2210 guest state.
2211
2212 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2213 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2214 same) and so I had to look for 4 untracked otag-sized areas in
2215 the guest state to make that possible.
2216
2217 The same problem exists of course when origin tags are only 32
2218 bits, but it's less extreme.
2219
2220 2. (More compelling) it doubles the size of the origin shadow
2221 memory. Given that the shadow memory is organised as a fixed
2222 size cache, and that accuracy of tracking is limited by origins
2223 falling out the cache due to space conflicts, this isn't good.
2224
2225 > Another question: is the origin tracking perfect, or are there
2226 > cases where it fails to determine an origin?
2227
2228 It is imperfect for at least for the following reasons, and
2229 probably more:
2230
2231 * Insufficient capacity in the origin cache. When a line is
2232 evicted from the cache it is gone forever, and so subsequent
2233 queries for the line produce zero, indicating no origin
2234 information. Interestingly, a line containing all zeroes can be
2235 evicted "free" from the cache, since it contains no useful
2236 information, so there is scope perhaps for some cleverer cache
2237 management schemes. (*** NOTE, with the introduction of the
2238 second level origin tag cache, ocacheL2, this is no longer a
2239 problem. ***)
2240
2241 * The origin cache only stores one otag per 32-bits of address
2242 space, plus 4 bits indicating which of the 4 bytes has that tag
2243 and which are considered defined. The result is that if two
2244 undefined bytes in the same word are stored in memory, the first
2245 stored byte's origin will be lost and replaced by the origin for
2246 the second byte.
2247
2248 * Nonzero origin tags for defined values. Consider a binary
2249 operator application op(x,y). Suppose y is undefined (and so has
2250 a valid nonzero origin tag), and x is defined, but erroneously
2251 has a nonzero origin tag (defined values should have tag zero).
2252 If the erroneous tag has a numeric value greater than y's tag,
2253 then the rule for propagating origin tags though binary
2254 operations, which is simply to take the unsigned max of the two
2255 tags, will erroneously propagate x's tag rather than y's.
2256
2257 * Some obscure uses of x86/amd64 byte registers can cause lossage
2258 or confusion of origins. %AH .. %DH are treated as different
2259 from, and unrelated to, their parent registers, %EAX .. %EDX.
2260 So some wierd sequences like
2261
2262 movb undefined-value, %AH
2263 movb defined-value, %AL
2264 .. use %AX or %EAX ..
2265
2266 will cause the origin attributed to %AH to be ignored, since %AL,
2267 %AX, %EAX are treated as the same register, and %AH as a
2268 completely separate one.
2269
2270 But having said all that, it actually seems to work fairly well in
2271 practice.
2272 */
2273
2274 static UWord stats_ocacheL1_find = 0;
2275 static UWord stats_ocacheL1_found_at_1 = 0;
2276 static UWord stats_ocacheL1_found_at_N = 0;
2277 static UWord stats_ocacheL1_misses = 0;
2278 static UWord stats_ocacheL1_lossage = 0;
2279 static UWord stats_ocacheL1_movefwds = 0;
2280
2281 static UWord stats__ocacheL2_refs = 0;
2282 static UWord stats__ocacheL2_misses = 0;
2283 static UWord stats__ocacheL2_n_nodes_max = 0;
2284
2285 /* Cache of 32-bit values, one every 32 bits of address space */
2286
2287 #define OC_BITS_PER_LINE 5
2288 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2289
oc_line_offset(Addr a)2290 static INLINE UWord oc_line_offset ( Addr a ) {
2291 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2292 }
is_valid_oc_tag(Addr tag)2293 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2294 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2295 }
2296
2297 #define OC_LINES_PER_SET 2
2298
2299 #define OC_N_SET_BITS 20
2300 #define OC_N_SETS (1 << OC_N_SET_BITS)
2301
2302 /* These settings give:
2303 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2304 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2305 */
2306
2307 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2308
2309
2310 typedef
2311 struct {
2312 Addr tag;
2313 UInt w32[OC_W32S_PER_LINE];
2314 UChar descr[OC_W32S_PER_LINE];
2315 }
2316 OCacheLine;
2317
2318 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2319 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2320 and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2321 static UChar classify_OCacheLine ( OCacheLine* line )
2322 {
2323 UWord i;
2324 if (line->tag == 1/*invalid*/)
2325 return 'e'; /* EMPTY */
2326 tl_assert(is_valid_oc_tag(line->tag));
2327 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2328 tl_assert(0 == ((~0xF) & line->descr[i]));
2329 if (line->w32[i] > 0 && line->descr[i] > 0)
2330 return 'n'; /* NONZERO - contains useful info */
2331 }
2332 return 'z'; /* ZERO - no useful info */
2333 }
2334
2335 typedef
2336 struct {
2337 OCacheLine line[OC_LINES_PER_SET];
2338 }
2339 OCacheSet;
2340
2341 typedef
2342 struct {
2343 OCacheSet set[OC_N_SETS];
2344 }
2345 OCache;
2346
2347 static OCache* ocacheL1 = NULL;
2348 static UWord ocacheL1_event_ctr = 0;
2349
2350 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2351 static void init_OCache ( void )
2352 {
2353 UWord line, set;
2354 tl_assert(MC_(clo_mc_level) >= 3);
2355 tl_assert(ocacheL1 == NULL);
2356 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2357 if (ocacheL1 == NULL) {
2358 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2359 sizeof(OCache) );
2360 }
2361 tl_assert(ocacheL1 != NULL);
2362 for (set = 0; set < OC_N_SETS; set++) {
2363 for (line = 0; line < OC_LINES_PER_SET; line++) {
2364 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2365 }
2366 }
2367 init_ocacheL2();
2368 }
2369
moveLineForwards(OCacheSet * set,UWord lineno)2370 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2371 {
2372 OCacheLine tmp;
2373 stats_ocacheL1_movefwds++;
2374 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2375 tmp = set->line[lineno-1];
2376 set->line[lineno-1] = set->line[lineno];
2377 set->line[lineno] = tmp;
2378 }
2379
zeroise_OCacheLine(OCacheLine * line,Addr tag)2380 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2381 UWord i;
2382 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2383 line->w32[i] = 0; /* NO ORIGIN */
2384 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2385 }
2386 line->tag = tag;
2387 }
2388
2389 //////////////////////////////////////////////////////////////
2390 //// OCache backing store
2391
2392 static OSet* ocacheL2 = NULL;
2393
ocacheL2_malloc(const HChar * cc,SizeT szB)2394 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2395 return VG_(malloc)(cc, szB);
2396 }
ocacheL2_free(void * v)2397 static void ocacheL2_free ( void* v ) {
2398 VG_(free)( v );
2399 }
2400
2401 /* Stats: # nodes currently in tree */
2402 static UWord stats__ocacheL2_n_nodes = 0;
2403
init_ocacheL2(void)2404 static void init_ocacheL2 ( void )
2405 {
2406 tl_assert(!ocacheL2);
2407 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2408 tl_assert(0 == offsetof(OCacheLine,tag));
2409 ocacheL2
2410 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2411 NULL, /* fast cmp */
2412 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2413 stats__ocacheL2_n_nodes = 0;
2414 }
2415
2416 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2417 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2418 {
2419 OCacheLine* line;
2420 tl_assert(is_valid_oc_tag(tag));
2421 stats__ocacheL2_refs++;
2422 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2423 return line;
2424 }
2425
2426 /* Delete the line with the given tag from the tree, if it is present, and
2427 free up the associated memory. */
ocacheL2_del_tag(Addr tag)2428 static void ocacheL2_del_tag ( Addr tag )
2429 {
2430 OCacheLine* line;
2431 tl_assert(is_valid_oc_tag(tag));
2432 stats__ocacheL2_refs++;
2433 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2434 if (line) {
2435 VG_(OSetGen_FreeNode)(ocacheL2, line);
2436 tl_assert(stats__ocacheL2_n_nodes > 0);
2437 stats__ocacheL2_n_nodes--;
2438 }
2439 }
2440
2441 /* Add a copy of the given line to the tree. It must not already be
2442 present. */
ocacheL2_add_line(OCacheLine * line)2443 static void ocacheL2_add_line ( OCacheLine* line )
2444 {
2445 OCacheLine* copy;
2446 tl_assert(is_valid_oc_tag(line->tag));
2447 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2448 *copy = *line;
2449 stats__ocacheL2_refs++;
2450 VG_(OSetGen_Insert)( ocacheL2, copy );
2451 stats__ocacheL2_n_nodes++;
2452 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2453 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2454 }
2455
2456 ////
2457 //////////////////////////////////////////////////////////////
2458
2459 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2460 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2461 {
2462 OCacheLine *victim, *inL2;
2463 UChar c;
2464 UWord line;
2465 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2466 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2467 UWord tag = a & tagmask;
2468 tl_assert(setno >= 0 && setno < OC_N_SETS);
2469
2470 /* we already tried line == 0; skip therefore. */
2471 for (line = 1; line < OC_LINES_PER_SET; line++) {
2472 if (ocacheL1->set[setno].line[line].tag == tag) {
2473 if (line == 1) {
2474 stats_ocacheL1_found_at_1++;
2475 } else {
2476 stats_ocacheL1_found_at_N++;
2477 }
2478 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2479 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2480 moveLineForwards( &ocacheL1->set[setno], line );
2481 line--;
2482 }
2483 return &ocacheL1->set[setno].line[line];
2484 }
2485 }
2486
2487 /* A miss. Use the last slot. Implicitly this means we're
2488 ejecting the line in the last slot. */
2489 stats_ocacheL1_misses++;
2490 tl_assert(line == OC_LINES_PER_SET);
2491 line--;
2492 tl_assert(line > 0);
2493
2494 /* First, move the to-be-ejected line to the L2 cache. */
2495 victim = &ocacheL1->set[setno].line[line];
2496 c = classify_OCacheLine(victim);
2497 switch (c) {
2498 case 'e':
2499 /* the line is empty (has invalid tag); ignore it. */
2500 break;
2501 case 'z':
2502 /* line contains zeroes. We must ensure the backing store is
2503 updated accordingly, either by copying the line there
2504 verbatim, or by ensuring it isn't present there. We
2505 chosse the latter on the basis that it reduces the size of
2506 the backing store. */
2507 ocacheL2_del_tag( victim->tag );
2508 break;
2509 case 'n':
2510 /* line contains at least one real, useful origin. Copy it
2511 to the backing store. */
2512 stats_ocacheL1_lossage++;
2513 inL2 = ocacheL2_find_tag( victim->tag );
2514 if (inL2) {
2515 *inL2 = *victim;
2516 } else {
2517 ocacheL2_add_line( victim );
2518 }
2519 break;
2520 default:
2521 tl_assert(0);
2522 }
2523
2524 /* Now we must reload the L1 cache from the backing tree, if
2525 possible. */
2526 tl_assert(tag != victim->tag); /* stay sane */
2527 inL2 = ocacheL2_find_tag( tag );
2528 if (inL2) {
2529 /* We're in luck. It's in the L2. */
2530 ocacheL1->set[setno].line[line] = *inL2;
2531 } else {
2532 /* Missed at both levels of the cache hierarchy. We have to
2533 declare it as full of zeroes (unknown origins). */
2534 stats__ocacheL2_misses++;
2535 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2536 }
2537
2538 /* Move it one forwards */
2539 moveLineForwards( &ocacheL1->set[setno], line );
2540 line--;
2541
2542 return &ocacheL1->set[setno].line[line];
2543 }
2544
find_OCacheLine(Addr a)2545 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2546 {
2547 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2548 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2549 UWord tag = a & tagmask;
2550
2551 stats_ocacheL1_find++;
2552
2553 if (OC_ENABLE_ASSERTIONS) {
2554 tl_assert(setno >= 0 && setno < OC_N_SETS);
2555 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2556 }
2557
2558 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2559 return &ocacheL1->set[setno].line[0];
2560 }
2561
2562 return find_OCacheLine_SLOW( a );
2563 }
2564
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2565 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2566 {
2567 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2568 //// Set the origins for a+0 .. a+7
2569 { OCacheLine* line;
2570 UWord lineoff = oc_line_offset(a);
2571 if (OC_ENABLE_ASSERTIONS) {
2572 tl_assert(lineoff >= 0
2573 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2574 }
2575 line = find_OCacheLine( a );
2576 line->descr[lineoff+0] = 0xF;
2577 line->descr[lineoff+1] = 0xF;
2578 line->w32[lineoff+0] = otag;
2579 line->w32[lineoff+1] = otag;
2580 }
2581 //// END inlined, specialised version of MC_(helperc_b_store8)
2582 }
2583
2584
2585 /*------------------------------------------------------------*/
2586 /*--- Aligned fast case permission setters, ---*/
2587 /*--- for dealing with stacks ---*/
2588 /*------------------------------------------------------------*/
2589
2590 /*--------------------- 32-bit ---------------------*/
2591
2592 /* Nb: by "aligned" here we mean 4-byte aligned */
2593
make_aligned_word32_undefined(Addr a)2594 static INLINE void make_aligned_word32_undefined ( Addr a )
2595 {
2596 PROF_EVENT(300, "make_aligned_word32_undefined");
2597
2598 #ifndef PERF_FAST_STACK2
2599 make_mem_undefined(a, 4);
2600 #else
2601 {
2602 UWord sm_off;
2603 SecMap* sm;
2604
2605 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2606 PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2607 make_mem_undefined(a, 4);
2608 return;
2609 }
2610
2611 sm = get_secmap_for_writing_low(a);
2612 sm_off = SM_OFF(a);
2613 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2614 }
2615 #endif
2616 }
2617
2618 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2619 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2620 {
2621 make_aligned_word32_undefined(a);
2622 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2623 //// Set the origins for a+0 .. a+3
2624 { OCacheLine* line;
2625 UWord lineoff = oc_line_offset(a);
2626 if (OC_ENABLE_ASSERTIONS) {
2627 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2628 }
2629 line = find_OCacheLine( a );
2630 line->descr[lineoff] = 0xF;
2631 line->w32[lineoff] = otag;
2632 }
2633 //// END inlined, specialised version of MC_(helperc_b_store4)
2634 }
2635
2636 static INLINE
make_aligned_word32_noaccess(Addr a)2637 void make_aligned_word32_noaccess ( Addr a )
2638 {
2639 PROF_EVENT(310, "make_aligned_word32_noaccess");
2640
2641 #ifndef PERF_FAST_STACK2
2642 MC_(make_mem_noaccess)(a, 4);
2643 #else
2644 {
2645 UWord sm_off;
2646 SecMap* sm;
2647
2648 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2649 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2650 MC_(make_mem_noaccess)(a, 4);
2651 return;
2652 }
2653
2654 sm = get_secmap_for_writing_low(a);
2655 sm_off = SM_OFF(a);
2656 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2657
2658 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2659 //// Set the origins for a+0 .. a+3.
2660 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2661 OCacheLine* line;
2662 UWord lineoff = oc_line_offset(a);
2663 if (OC_ENABLE_ASSERTIONS) {
2664 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2665 }
2666 line = find_OCacheLine( a );
2667 line->descr[lineoff] = 0;
2668 }
2669 //// END inlined, specialised version of MC_(helperc_b_store4)
2670 }
2671 #endif
2672 }
2673
2674 /*--------------------- 64-bit ---------------------*/
2675
2676 /* Nb: by "aligned" here we mean 8-byte aligned */
2677
make_aligned_word64_undefined(Addr a)2678 static INLINE void make_aligned_word64_undefined ( Addr a )
2679 {
2680 PROF_EVENT(320, "make_aligned_word64_undefined");
2681
2682 #ifndef PERF_FAST_STACK2
2683 make_mem_undefined(a, 8);
2684 #else
2685 {
2686 UWord sm_off16;
2687 SecMap* sm;
2688
2689 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2690 PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2691 make_mem_undefined(a, 8);
2692 return;
2693 }
2694
2695 sm = get_secmap_for_writing_low(a);
2696 sm_off16 = SM_OFF_16(a);
2697 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2698 }
2699 #endif
2700 }
2701
2702 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2703 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2704 {
2705 make_aligned_word64_undefined(a);
2706 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2707 //// Set the origins for a+0 .. a+7
2708 { OCacheLine* line;
2709 UWord lineoff = oc_line_offset(a);
2710 tl_assert(lineoff >= 0
2711 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2712 line = find_OCacheLine( a );
2713 line->descr[lineoff+0] = 0xF;
2714 line->descr[lineoff+1] = 0xF;
2715 line->w32[lineoff+0] = otag;
2716 line->w32[lineoff+1] = otag;
2717 }
2718 //// END inlined, specialised version of MC_(helperc_b_store8)
2719 }
2720
2721 static INLINE
make_aligned_word64_noaccess(Addr a)2722 void make_aligned_word64_noaccess ( Addr a )
2723 {
2724 PROF_EVENT(330, "make_aligned_word64_noaccess");
2725
2726 #ifndef PERF_FAST_STACK2
2727 MC_(make_mem_noaccess)(a, 8);
2728 #else
2729 {
2730 UWord sm_off16;
2731 SecMap* sm;
2732
2733 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2734 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2735 MC_(make_mem_noaccess)(a, 8);
2736 return;
2737 }
2738
2739 sm = get_secmap_for_writing_low(a);
2740 sm_off16 = SM_OFF_16(a);
2741 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2742
2743 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2744 //// Clear the origins for a+0 .. a+7.
2745 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2746 OCacheLine* line;
2747 UWord lineoff = oc_line_offset(a);
2748 tl_assert(lineoff >= 0
2749 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2750 line = find_OCacheLine( a );
2751 line->descr[lineoff+0] = 0;
2752 line->descr[lineoff+1] = 0;
2753 }
2754 //// END inlined, specialised version of MC_(helperc_b_store8)
2755 }
2756 #endif
2757 }
2758
2759
2760 /*------------------------------------------------------------*/
2761 /*--- Stack pointer adjustment ---*/
2762 /*------------------------------------------------------------*/
2763
2764 #ifdef PERF_FAST_STACK
2765 # define MAYBE_USED
2766 #else
2767 # define MAYBE_USED __attribute__((unused))
2768 #endif
2769
2770 /*--------------- adjustment by 4 bytes ---------------*/
2771
2772 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2773 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2774 {
2775 UInt otag = ecu | MC_OKIND_STACK;
2776 PROF_EVENT(110, "new_mem_stack_4");
2777 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2778 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2779 } else {
2780 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2781 }
2782 }
2783
2784 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2785 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2786 {
2787 PROF_EVENT(110, "new_mem_stack_4");
2788 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2789 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2790 } else {
2791 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2792 }
2793 }
2794
2795 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2796 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2797 {
2798 PROF_EVENT(120, "die_mem_stack_4");
2799 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2800 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2801 } else {
2802 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2803 }
2804 }
2805
2806 /*--------------- adjustment by 8 bytes ---------------*/
2807
2808 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2809 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2810 {
2811 UInt otag = ecu | MC_OKIND_STACK;
2812 PROF_EVENT(111, "new_mem_stack_8");
2813 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2814 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2815 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2816 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2817 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2818 } else {
2819 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2820 }
2821 }
2822
2823 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2824 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2825 {
2826 PROF_EVENT(111, "new_mem_stack_8");
2827 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2828 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2829 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2830 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2831 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2832 } else {
2833 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2834 }
2835 }
2836
2837 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2838 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2839 {
2840 PROF_EVENT(121, "die_mem_stack_8");
2841 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2842 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2843 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2844 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2845 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2846 } else {
2847 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2848 }
2849 }
2850
2851 /*--------------- adjustment by 12 bytes ---------------*/
2852
2853 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2854 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2855 {
2856 UInt otag = ecu | MC_OKIND_STACK;
2857 PROF_EVENT(112, "new_mem_stack_12");
2858 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2859 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2860 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2861 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2862 /* from previous test we don't have 8-alignment at offset +0,
2863 hence must have 8 alignment at offsets +4/-4. Hence safe to
2864 do 4 at +0 and then 8 at +4/. */
2865 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2866 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2867 } else {
2868 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2869 }
2870 }
2871
2872 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2873 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2874 {
2875 PROF_EVENT(112, "new_mem_stack_12");
2876 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2877 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2878 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2879 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2880 /* from previous test we don't have 8-alignment at offset +0,
2881 hence must have 8 alignment at offsets +4/-4. Hence safe to
2882 do 4 at +0 and then 8 at +4/. */
2883 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2884 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2885 } else {
2886 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2887 }
2888 }
2889
2890 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2891 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2892 {
2893 PROF_EVENT(122, "die_mem_stack_12");
2894 /* Note the -12 in the test */
2895 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2896 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2897 -4. */
2898 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2899 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2900 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2901 /* We have 4-alignment at +0, but we don't have 8-alignment at
2902 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2903 and then 8 at -8. */
2904 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2905 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2906 } else {
2907 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2908 }
2909 }
2910
2911 /*--------------- adjustment by 16 bytes ---------------*/
2912
2913 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2914 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2915 {
2916 UInt otag = ecu | MC_OKIND_STACK;
2917 PROF_EVENT(113, "new_mem_stack_16");
2918 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2919 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2920 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2921 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2922 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2923 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2924 Hence do 4 at +0, 8 at +4, 4 at +12. */
2925 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2926 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2927 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2928 } else {
2929 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2930 }
2931 }
2932
2933 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2934 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2935 {
2936 PROF_EVENT(113, "new_mem_stack_16");
2937 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2938 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2939 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2940 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2941 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2942 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2943 Hence do 4 at +0, 8 at +4, 4 at +12. */
2944 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2945 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2946 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2947 } else {
2948 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2949 }
2950 }
2951
2952 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2953 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2954 {
2955 PROF_EVENT(123, "die_mem_stack_16");
2956 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2957 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2958 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2959 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2960 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2961 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2962 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2963 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2964 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2965 } else {
2966 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2967 }
2968 }
2969
2970 /*--------------- adjustment by 32 bytes ---------------*/
2971
2972 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2973 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2974 {
2975 UInt otag = ecu | MC_OKIND_STACK;
2976 PROF_EVENT(114, "new_mem_stack_32");
2977 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978 /* Straightforward */
2979 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2980 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2981 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2982 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2983 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2984 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2985 +0,+28. */
2986 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2987 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2988 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2990 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2991 } else {
2992 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2993 }
2994 }
2995
2996 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2997 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2998 {
2999 PROF_EVENT(114, "new_mem_stack_32");
3000 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3001 /* Straightforward */
3002 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3003 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3004 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3005 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3006 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3008 +0,+28. */
3009 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3010 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3011 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3012 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3013 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3014 } else {
3015 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3016 }
3017 }
3018
3019 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3020 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3021 {
3022 PROF_EVENT(124, "die_mem_stack_32");
3023 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3024 /* Straightforward */
3025 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3026 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3027 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3029 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3031 4 at -32,-4. */
3032 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3033 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3034 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3035 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3036 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3037 } else {
3038 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3039 }
3040 }
3041
3042 /*--------------- adjustment by 112 bytes ---------------*/
3043
3044 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3045 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3046 {
3047 UInt otag = ecu | MC_OKIND_STACK;
3048 PROF_EVENT(115, "new_mem_stack_112");
3049 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3052 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3053 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3054 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3055 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3056 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3058 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3059 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3060 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3061 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3062 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3063 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3064 } else {
3065 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3066 }
3067 }
3068
3069 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3070 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3071 {
3072 PROF_EVENT(115, "new_mem_stack_112");
3073 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3075 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3076 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3077 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3078 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3080 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3081 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3082 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3083 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3084 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3085 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3086 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3087 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3088 } else {
3089 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3090 }
3091 }
3092
3093 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3094 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3095 {
3096 PROF_EVENT(125, "die_mem_stack_112");
3097 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3098 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3099 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3100 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3101 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3105 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3106 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3107 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3108 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3109 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3110 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3111 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3112 } else {
3113 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3114 }
3115 }
3116
3117 /*--------------- adjustment by 128 bytes ---------------*/
3118
3119 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3120 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3121 {
3122 UInt otag = ecu | MC_OKIND_STACK;
3123 PROF_EVENT(116, "new_mem_stack_128");
3124 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3130 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3131 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3132 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3133 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3134 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3135 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3136 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3137 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3138 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3139 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3140 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3141 } else {
3142 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3143 }
3144 }
3145
3146 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3147 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3148 {
3149 PROF_EVENT(116, "new_mem_stack_128");
3150 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3154 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3155 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3156 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3157 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3158 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3159 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3160 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3161 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3162 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3163 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3164 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3165 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3166 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3167 } else {
3168 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3169 }
3170 }
3171
3172 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3173 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3174 {
3175 PROF_EVENT(126, "die_mem_stack_128");
3176 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3178 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3179 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3180 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3181 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3182 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3183 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3184 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3185 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3186 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3187 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3188 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3189 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3190 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3191 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3192 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3193 } else {
3194 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3195 }
3196 }
3197
3198 /*--------------- adjustment by 144 bytes ---------------*/
3199
3200 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3201 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3202 {
3203 UInt otag = ecu | MC_OKIND_STACK;
3204 PROF_EVENT(117, "new_mem_stack_144");
3205 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3207 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3208 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3209 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3210 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3211 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3212 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3213 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3214 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3215 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3216 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3217 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3218 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3221 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3222 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3223 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3224 } else {
3225 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3226 }
3227 }
3228
3229 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3230 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3231 {
3232 PROF_EVENT(117, "new_mem_stack_144");
3233 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3234 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3235 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3236 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3237 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3238 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3239 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3240 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3241 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3242 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3243 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3244 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3245 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3246 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3247 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3251 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3252 } else {
3253 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3254 }
3255 }
3256
3257 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3258 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3259 {
3260 PROF_EVENT(127, "die_mem_stack_144");
3261 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3262 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3263 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3264 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3265 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3266 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3267 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3268 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3269 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3270 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3271 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3272 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3273 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3274 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3275 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3276 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3277 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3278 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3279 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3280 } else {
3281 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3282 }
3283 }
3284
3285 /*--------------- adjustment by 160 bytes ---------------*/
3286
3287 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3288 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3289 {
3290 UInt otag = ecu | MC_OKIND_STACK;
3291 PROF_EVENT(118, "new_mem_stack_160");
3292 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3301 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3302 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3303 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3304 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3305 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3306 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3307 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3308 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3309 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3310 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3311 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3312 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3313 } else {
3314 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3315 }
3316 }
3317
3318 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3319 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3320 {
3321 PROF_EVENT(118, "new_mem_stack_160");
3322 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3325 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3326 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3327 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3328 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3329 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3330 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3331 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3332 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3333 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3334 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3335 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3336 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3337 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3338 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3339 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3340 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3341 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3342 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3343 } else {
3344 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3345 }
3346 }
3347
3348 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3349 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3350 {
3351 PROF_EVENT(128, "die_mem_stack_160");
3352 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3353 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3354 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3355 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3356 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3357 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3358 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3359 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3360 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3361 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3362 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3363 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3364 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3365 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3366 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3367 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3368 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3369 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3370 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3371 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3372 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3373 } else {
3374 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3375 }
3376 }
3377
3378 /*--------------- adjustment by N bytes ---------------*/
3379
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3380 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3381 {
3382 UInt otag = ecu | MC_OKIND_STACK;
3383 PROF_EVENT(115, "new_mem_stack_w_otag");
3384 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3385 }
3386
mc_new_mem_stack(Addr a,SizeT len)3387 static void mc_new_mem_stack ( Addr a, SizeT len )
3388 {
3389 PROF_EVENT(115, "new_mem_stack");
3390 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3391 }
3392
mc_die_mem_stack(Addr a,SizeT len)3393 static void mc_die_mem_stack ( Addr a, SizeT len )
3394 {
3395 PROF_EVENT(125, "die_mem_stack");
3396 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3397 }
3398
3399
3400 /* The AMD64 ABI says:
3401
3402 "The 128-byte area beyond the location pointed to by %rsp is considered
3403 to be reserved and shall not be modified by signal or interrupt
3404 handlers. Therefore, functions may use this area for temporary data
3405 that is not needed across function calls. In particular, leaf functions
3406 may use this area for their entire stack frame, rather than adjusting
3407 the stack pointer in the prologue and epilogue. This area is known as
3408 red zone [sic]."
3409
3410 So after any call or return we need to mark this redzone as containing
3411 undefined values.
3412
3413 Consider this: we're in function f. f calls g. g moves rsp down
3414 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3415 defined. g returns. f is buggy and reads from parts of the red zone
3416 that it didn't write on. But because g filled that area in, f is going
3417 to be picking up defined V bits and so any errors from reading bits of
3418 the red zone it didn't write, will be missed. The only solution I could
3419 think of was to make the red zone undefined when g returns to f.
3420
3421 This is in accordance with the ABI, which makes it clear the redzone
3422 is volatile across function calls.
3423
3424 The problem occurs the other way round too: f could fill the RZ up
3425 with defined values and g could mistakenly read them. So the RZ
3426 also needs to be nuked on function calls.
3427 */
3428
3429
3430 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3431 improved so as to have a lower miss rate. */
3432
3433 static UWord stats__nia_cache_queries = 0;
3434 static UWord stats__nia_cache_misses = 0;
3435
3436 typedef
3437 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3438 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3439 WCacheEnt;
3440
3441 #define N_NIA_TO_ECU_CACHE 511
3442
3443 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3444
init_nia_to_ecu_cache(void)3445 static void init_nia_to_ecu_cache ( void )
3446 {
3447 UWord i;
3448 Addr zero_addr = 0;
3449 ExeContext* zero_ec;
3450 UInt zero_ecu;
3451 /* Fill all the slots with an entry for address zero, and the
3452 relevant otags accordingly. Hence the cache is initially filled
3453 with valid data. */
3454 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3455 tl_assert(zero_ec);
3456 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3457 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3458 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3459 nia_to_ecu_cache[i].nia0 = zero_addr;
3460 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3461 nia_to_ecu_cache[i].nia1 = zero_addr;
3462 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3463 }
3464 }
3465
convert_nia_to_ecu(Addr nia)3466 static inline UInt convert_nia_to_ecu ( Addr nia )
3467 {
3468 UWord i;
3469 UInt ecu;
3470 ExeContext* ec;
3471
3472 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3473
3474 stats__nia_cache_queries++;
3475 i = nia % N_NIA_TO_ECU_CACHE;
3476 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3477
3478 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3479 return nia_to_ecu_cache[i].ecu0;
3480
3481 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3482 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3483 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3484 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3485 # undef SWAP
3486 return nia_to_ecu_cache[i].ecu0;
3487 }
3488
3489 stats__nia_cache_misses++;
3490 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3491 tl_assert(ec);
3492 ecu = VG_(get_ECU_from_ExeContext)(ec);
3493 tl_assert(VG_(is_plausible_ECU)(ecu));
3494
3495 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3496 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3497
3498 nia_to_ecu_cache[i].nia0 = nia;
3499 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3500 return ecu;
3501 }
3502
3503
3504 /* Note that this serves both the origin-tracking and
3505 no-origin-tracking modes. We assume that calls to it are
3506 sufficiently infrequent that it isn't worth specialising for the
3507 with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3508 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3509 {
3510 UInt otag;
3511 tl_assert(sizeof(UWord) == sizeof(SizeT));
3512 if (0)
3513 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3514 base, len, nia );
3515
3516 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3517 UInt ecu = convert_nia_to_ecu ( nia );
3518 tl_assert(VG_(is_plausible_ECU)(ecu));
3519 otag = ecu | MC_OKIND_STACK;
3520 } else {
3521 tl_assert(nia == 0);
3522 otag = 0;
3523 }
3524
3525 # if 0
3526 /* Really slow version */
3527 MC_(make_mem_undefined)(base, len, otag);
3528 # endif
3529
3530 # if 0
3531 /* Slow(ish) version, which is fairly easily seen to be correct.
3532 */
3533 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3534 make_aligned_word64_undefined(base + 0, otag);
3535 make_aligned_word64_undefined(base + 8, otag);
3536 make_aligned_word64_undefined(base + 16, otag);
3537 make_aligned_word64_undefined(base + 24, otag);
3538
3539 make_aligned_word64_undefined(base + 32, otag);
3540 make_aligned_word64_undefined(base + 40, otag);
3541 make_aligned_word64_undefined(base + 48, otag);
3542 make_aligned_word64_undefined(base + 56, otag);
3543
3544 make_aligned_word64_undefined(base + 64, otag);
3545 make_aligned_word64_undefined(base + 72, otag);
3546 make_aligned_word64_undefined(base + 80, otag);
3547 make_aligned_word64_undefined(base + 88, otag);
3548
3549 make_aligned_word64_undefined(base + 96, otag);
3550 make_aligned_word64_undefined(base + 104, otag);
3551 make_aligned_word64_undefined(base + 112, otag);
3552 make_aligned_word64_undefined(base + 120, otag);
3553 } else {
3554 MC_(make_mem_undefined)(base, len, otag);
3555 }
3556 # endif
3557
3558 /* Idea is: go fast when
3559 * 8-aligned and length is 128
3560 * the sm is available in the main primary map
3561 * the address range falls entirely with a single secondary map
3562 If all those conditions hold, just update the V+A bits by writing
3563 directly into the vabits array. (If the sm was distinguished, this
3564 will make a copy and then write to it.)
3565 */
3566
3567 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3568 /* Now we know the address range is suitably sized and aligned. */
3569 UWord a_lo = (UWord)(base);
3570 UWord a_hi = (UWord)(base + 128 - 1);
3571 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3572 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3573 // Now we know the entire range is within the main primary map.
3574 SecMap* sm = get_secmap_for_writing_low(a_lo);
3575 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3576 /* Now we know that the entire address range falls within a
3577 single secondary map, and that that secondary 'lives' in
3578 the main primary map. */
3579 if (LIKELY(sm == sm_hi)) {
3580 // Finally, we know that the range is entirely within one secmap.
3581 UWord v_off = SM_OFF(a_lo);
3582 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3583 p[ 0] = VA_BITS16_UNDEFINED;
3584 p[ 1] = VA_BITS16_UNDEFINED;
3585 p[ 2] = VA_BITS16_UNDEFINED;
3586 p[ 3] = VA_BITS16_UNDEFINED;
3587 p[ 4] = VA_BITS16_UNDEFINED;
3588 p[ 5] = VA_BITS16_UNDEFINED;
3589 p[ 6] = VA_BITS16_UNDEFINED;
3590 p[ 7] = VA_BITS16_UNDEFINED;
3591 p[ 8] = VA_BITS16_UNDEFINED;
3592 p[ 9] = VA_BITS16_UNDEFINED;
3593 p[10] = VA_BITS16_UNDEFINED;
3594 p[11] = VA_BITS16_UNDEFINED;
3595 p[12] = VA_BITS16_UNDEFINED;
3596 p[13] = VA_BITS16_UNDEFINED;
3597 p[14] = VA_BITS16_UNDEFINED;
3598 p[15] = VA_BITS16_UNDEFINED;
3599 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3600 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3601 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3602 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3603 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3604 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3605 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3606 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3607 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3608 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3609 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3610 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3611 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3612 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3613 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3614 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3615 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3616 }
3617 return;
3618 }
3619 }
3620 }
3621
3622 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3623 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3624 /* Now we know the address range is suitably sized and aligned. */
3625 UWord a_lo = (UWord)(base);
3626 UWord a_hi = (UWord)(base + 288 - 1);
3627 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3628 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3629 // Now we know the entire range is within the main primary map.
3630 SecMap* sm = get_secmap_for_writing_low(a_lo);
3631 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3632 /* Now we know that the entire address range falls within a
3633 single secondary map, and that that secondary 'lives' in
3634 the main primary map. */
3635 if (LIKELY(sm == sm_hi)) {
3636 // Finally, we know that the range is entirely within one secmap.
3637 UWord v_off = SM_OFF(a_lo);
3638 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3639 p[ 0] = VA_BITS16_UNDEFINED;
3640 p[ 1] = VA_BITS16_UNDEFINED;
3641 p[ 2] = VA_BITS16_UNDEFINED;
3642 p[ 3] = VA_BITS16_UNDEFINED;
3643 p[ 4] = VA_BITS16_UNDEFINED;
3644 p[ 5] = VA_BITS16_UNDEFINED;
3645 p[ 6] = VA_BITS16_UNDEFINED;
3646 p[ 7] = VA_BITS16_UNDEFINED;
3647 p[ 8] = VA_BITS16_UNDEFINED;
3648 p[ 9] = VA_BITS16_UNDEFINED;
3649 p[10] = VA_BITS16_UNDEFINED;
3650 p[11] = VA_BITS16_UNDEFINED;
3651 p[12] = VA_BITS16_UNDEFINED;
3652 p[13] = VA_BITS16_UNDEFINED;
3653 p[14] = VA_BITS16_UNDEFINED;
3654 p[15] = VA_BITS16_UNDEFINED;
3655 p[16] = VA_BITS16_UNDEFINED;
3656 p[17] = VA_BITS16_UNDEFINED;
3657 p[18] = VA_BITS16_UNDEFINED;
3658 p[19] = VA_BITS16_UNDEFINED;
3659 p[20] = VA_BITS16_UNDEFINED;
3660 p[21] = VA_BITS16_UNDEFINED;
3661 p[22] = VA_BITS16_UNDEFINED;
3662 p[23] = VA_BITS16_UNDEFINED;
3663 p[24] = VA_BITS16_UNDEFINED;
3664 p[25] = VA_BITS16_UNDEFINED;
3665 p[26] = VA_BITS16_UNDEFINED;
3666 p[27] = VA_BITS16_UNDEFINED;
3667 p[28] = VA_BITS16_UNDEFINED;
3668 p[29] = VA_BITS16_UNDEFINED;
3669 p[30] = VA_BITS16_UNDEFINED;
3670 p[31] = VA_BITS16_UNDEFINED;
3671 p[32] = VA_BITS16_UNDEFINED;
3672 p[33] = VA_BITS16_UNDEFINED;
3673 p[34] = VA_BITS16_UNDEFINED;
3674 p[35] = VA_BITS16_UNDEFINED;
3675 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3676 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3677 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3678 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3679 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3680 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3681 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3682 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3683 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3684 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3685 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3686 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3687 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3688 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3689 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3690 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3691 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3692 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3693 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3694 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3695 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3696 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3697 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3698 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3699 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3700 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3701 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3702 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3703 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3704 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3705 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3706 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3707 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3708 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3709 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3710 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3711 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3712 }
3713 return;
3714 }
3715 }
3716 }
3717
3718 /* else fall into slow case */
3719 MC_(make_mem_undefined_w_otag)(base, len, otag);
3720 }
3721
3722
3723 /*------------------------------------------------------------*/
3724 /*--- Checking memory ---*/
3725 /*------------------------------------------------------------*/
3726
3727 typedef
3728 enum {
3729 MC_Ok = 5,
3730 MC_AddrErr = 6,
3731 MC_ValueErr = 7
3732 }
3733 MC_ReadResult;
3734
3735
3736 /* Check permissions for address range. If inadequate permissions
3737 exist, *bad_addr is set to the offending address, so the caller can
3738 know what it is. */
3739
3740 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3741 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3742 indicate the lowest failing address. Functions below are
3743 similar. */
MC_(check_mem_is_noaccess)3744 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3745 {
3746 SizeT i;
3747 UWord vabits2;
3748
3749 PROF_EVENT(60, "check_mem_is_noaccess");
3750 for (i = 0; i < len; i++) {
3751 PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3752 vabits2 = get_vabits2(a);
3753 if (VA_BITS2_NOACCESS != vabits2) {
3754 if (bad_addr != NULL) *bad_addr = a;
3755 return False;
3756 }
3757 a++;
3758 }
3759 return True;
3760 }
3761
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3762 static Bool is_mem_addressable ( Addr a, SizeT len,
3763 /*OUT*/Addr* bad_addr )
3764 {
3765 SizeT i;
3766 UWord vabits2;
3767
3768 PROF_EVENT(62, "is_mem_addressable");
3769 for (i = 0; i < len; i++) {
3770 PROF_EVENT(63, "is_mem_addressable(loop)");
3771 vabits2 = get_vabits2(a);
3772 if (VA_BITS2_NOACCESS == vabits2) {
3773 if (bad_addr != NULL) *bad_addr = a;
3774 return False;
3775 }
3776 a++;
3777 }
3778 return True;
3779 }
3780
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3781 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3782 /*OUT*/Addr* bad_addr,
3783 /*OUT*/UInt* otag )
3784 {
3785 SizeT i;
3786 UWord vabits2;
3787
3788 PROF_EVENT(64, "is_mem_defined");
3789 DEBUG("is_mem_defined\n");
3790
3791 if (otag) *otag = 0;
3792 if (bad_addr) *bad_addr = 0;
3793 for (i = 0; i < len; i++) {
3794 PROF_EVENT(65, "is_mem_defined(loop)");
3795 vabits2 = get_vabits2(a);
3796 if (VA_BITS2_DEFINED != vabits2) {
3797 // Error! Nb: Report addressability errors in preference to
3798 // definedness errors. And don't report definedeness errors unless
3799 // --undef-value-errors=yes.
3800 if (bad_addr) {
3801 *bad_addr = a;
3802 }
3803 if (VA_BITS2_NOACCESS == vabits2) {
3804 return MC_AddrErr;
3805 }
3806 if (MC_(clo_mc_level) >= 2) {
3807 if (otag && MC_(clo_mc_level) == 3) {
3808 *otag = MC_(helperc_b_load1)( a );
3809 }
3810 return MC_ValueErr;
3811 }
3812 }
3813 a++;
3814 }
3815 return MC_Ok;
3816 }
3817
3818
3819 /* Like is_mem_defined but doesn't give up at the first uninitialised
3820 byte -- the entire range is always checked. This is important for
3821 detecting errors in the case where a checked range strays into
3822 invalid memory, but that fact is not detected by the ordinary
3823 is_mem_defined(), because of an undefined section that precedes the
3824 out of range section, possibly as a result of an alignment hole in
3825 the checked data. This version always checks the entire range and
3826 can report both a definedness and an accessbility error, if
3827 necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3828 static void is_mem_defined_comprehensive (
3829 Addr a, SizeT len,
3830 /*OUT*/Bool* errorV, /* is there a definedness err? */
3831 /*OUT*/Addr* bad_addrV, /* if so where? */
3832 /*OUT*/UInt* otagV, /* and what's its otag? */
3833 /*OUT*/Bool* errorA, /* is there an addressability err? */
3834 /*OUT*/Addr* bad_addrA /* if so where? */
3835 )
3836 {
3837 SizeT i;
3838 UWord vabits2;
3839 Bool already_saw_errV = False;
3840
3841 PROF_EVENT(64, "is_mem_defined"); // fixme
3842 DEBUG("is_mem_defined_comprehensive\n");
3843
3844 tl_assert(!(*errorV || *errorA));
3845
3846 for (i = 0; i < len; i++) {
3847 PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3848 vabits2 = get_vabits2(a);
3849 switch (vabits2) {
3850 case VA_BITS2_DEFINED:
3851 a++;
3852 break;
3853 case VA_BITS2_UNDEFINED:
3854 case VA_BITS2_PARTDEFINED:
3855 if (!already_saw_errV) {
3856 *errorV = True;
3857 *bad_addrV = a;
3858 if (MC_(clo_mc_level) == 3) {
3859 *otagV = MC_(helperc_b_load1)( a );
3860 } else {
3861 *otagV = 0;
3862 }
3863 already_saw_errV = True;
3864 }
3865 a++; /* keep going */
3866 break;
3867 case VA_BITS2_NOACCESS:
3868 *errorA = True;
3869 *bad_addrA = a;
3870 return; /* give up now. */
3871 default:
3872 tl_assert(0);
3873 }
3874 }
3875 }
3876
3877
3878 /* Check a zero-terminated ascii string. Tricky -- don't want to
3879 examine the actual bytes, to find the end, until we're sure it is
3880 safe to do so. */
3881
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3882 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3883 {
3884 UWord vabits2;
3885
3886 PROF_EVENT(66, "mc_is_defined_asciiz");
3887 DEBUG("mc_is_defined_asciiz\n");
3888
3889 if (otag) *otag = 0;
3890 if (bad_addr) *bad_addr = 0;
3891 while (True) {
3892 PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3893 vabits2 = get_vabits2(a);
3894 if (VA_BITS2_DEFINED != vabits2) {
3895 // Error! Nb: Report addressability errors in preference to
3896 // definedness errors. And don't report definedeness errors unless
3897 // --undef-value-errors=yes.
3898 if (bad_addr) {
3899 *bad_addr = a;
3900 }
3901 if (VA_BITS2_NOACCESS == vabits2) {
3902 return MC_AddrErr;
3903 }
3904 if (MC_(clo_mc_level) >= 2) {
3905 if (otag && MC_(clo_mc_level) == 3) {
3906 *otag = MC_(helperc_b_load1)( a );
3907 }
3908 return MC_ValueErr;
3909 }
3910 }
3911 /* Ok, a is safe to read. */
3912 if (* ((UChar*)a) == 0) {
3913 return MC_Ok;
3914 }
3915 a++;
3916 }
3917 }
3918
3919
3920 /*------------------------------------------------------------*/
3921 /*--- Memory event handlers ---*/
3922 /*------------------------------------------------------------*/
3923
3924 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3925 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3926 Addr base, SizeT size )
3927 {
3928 Addr bad_addr;
3929 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3930
3931 if (!ok) {
3932 switch (part) {
3933 case Vg_CoreSysCall:
3934 MC_(record_memparam_error) ( tid, bad_addr,
3935 /*isAddrErr*/True, s, 0/*otag*/ );
3936 break;
3937
3938 case Vg_CoreSignal:
3939 MC_(record_core_mem_error)( tid, s );
3940 break;
3941
3942 default:
3943 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3944 }
3945 }
3946 }
3947
3948 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3949 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3950 Addr base, SizeT size )
3951 {
3952 UInt otag = 0;
3953 Addr bad_addr;
3954 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3955
3956 if (MC_Ok != res) {
3957 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3958
3959 switch (part) {
3960 case Vg_CoreSysCall:
3961 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3962 isAddrErr ? 0 : otag );
3963 break;
3964
3965 case Vg_CoreSysCallArgInMem:
3966 MC_(record_regparam_error) ( tid, s, otag );
3967 break;
3968
3969 /* If we're being asked to jump to a silly address, record an error
3970 message before potentially crashing the entire system. */
3971 case Vg_CoreTranslate:
3972 MC_(record_jump_error)( tid, bad_addr );
3973 break;
3974
3975 default:
3976 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3977 }
3978 }
3979 }
3980
3981 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)3982 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3983 const HChar* s, Addr str )
3984 {
3985 MC_ReadResult res;
3986 Addr bad_addr = 0; // shut GCC up
3987 UInt otag = 0;
3988
3989 tl_assert(part == Vg_CoreSysCall);
3990 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3991 if (MC_Ok != res) {
3992 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3993 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3994 isAddrErr ? 0 : otag );
3995 }
3996 }
3997
3998 /* Handling of mmap and mprotect is not as simple as it seems.
3999
4000 The underlying semantics are that memory obtained from mmap is
4001 always initialised, but may be inaccessible. And changes to the
4002 protection of memory do not change its contents and hence not its
4003 definedness state. Problem is we can't model
4004 inaccessible-but-with-some-definedness state; once we mark memory
4005 as inaccessible we lose all info about definedness, and so can't
4006 restore that if it is later made accessible again.
4007
4008 One obvious thing to do is this:
4009
4010 mmap/mprotect NONE -> noaccess
4011 mmap/mprotect other -> defined
4012
4013 The problem case here is: taking accessible memory, writing
4014 uninitialised data to it, mprotecting it NONE and later mprotecting
4015 it back to some accessible state causes the undefinedness to be
4016 lost.
4017
4018 A better proposal is:
4019
4020 (1) mmap NONE -> make noaccess
4021 (2) mmap other -> make defined
4022
4023 (3) mprotect NONE -> # no change
4024 (4) mprotect other -> change any "noaccess" to "defined"
4025
4026 (2) is OK because memory newly obtained from mmap really is defined
4027 (zeroed out by the kernel -- doing anything else would
4028 constitute a massive security hole.)
4029
4030 (1) is OK because the only way to make the memory usable is via
4031 (4), in which case we also wind up correctly marking it all as
4032 defined.
4033
4034 (3) is the weak case. We choose not to change memory state.
4035 (presumably the range is in some mixture of "defined" and
4036 "undefined", viz, accessible but with arbitrary V bits). Doing
4037 nothing means we retain the V bits, so that if the memory is
4038 later mprotected "other", the V bits remain unchanged, so there
4039 can be no false negatives. The bad effect is that if there's
4040 an access in the area, then MC cannot warn; but at least we'll
4041 get a SEGV to show, so it's better than nothing.
4042
4043 Consider the sequence (3) followed by (4). Any memory that was
4044 "defined" or "undefined" previously retains its state (as
4045 required). Any memory that was "noaccess" before can only have
4046 been made that way by (1), and so it's OK to change it to
4047 "defined".
4048
4049 See https://bugs.kde.org/show_bug.cgi?id=205541
4050 and https://bugs.kde.org/show_bug.cgi?id=210268
4051 */
4052 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4053 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4054 ULong di_handle )
4055 {
4056 if (rr || ww || xx) {
4057 /* (2) mmap/mprotect other -> defined */
4058 MC_(make_mem_defined)(a, len);
4059 } else {
4060 /* (1) mmap/mprotect NONE -> noaccess */
4061 MC_(make_mem_noaccess)(a, len);
4062 }
4063 }
4064
4065 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4066 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4067 {
4068 if (rr || ww || xx) {
4069 /* (4) mprotect other -> change any "noaccess" to "defined" */
4070 make_mem_defined_if_noaccess(a, len);
4071 } else {
4072 /* (3) mprotect NONE -> # no change */
4073 /* do nothing */
4074 }
4075 }
4076
4077
4078 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4079 void mc_new_mem_startup( Addr a, SizeT len,
4080 Bool rr, Bool ww, Bool xx, ULong di_handle )
4081 {
4082 // Because code is defined, initialised variables get put in the data
4083 // segment and are defined, and uninitialised variables get put in the
4084 // bss segment and are auto-zeroed (and so defined).
4085 //
4086 // It's possible that there will be padding between global variables.
4087 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4088 // a program uses it, Memcheck will not complain. This is arguably a
4089 // false negative, but it's a grey area -- the behaviour is defined (the
4090 // padding is zeroed) but it's probably not what the user intended. And
4091 // we can't avoid it.
4092 //
4093 // Note: we generally ignore RWX permissions, because we can't track them
4094 // without requiring more than one A bit which would slow things down a
4095 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4096 // So we mark any such pages as "unaddressable".
4097 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4098 a, (ULong)len, rr, ww, xx);
4099 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4100 }
4101
4102 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4103 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4104 {
4105 MC_(make_mem_defined)(a, len);
4106 }
4107
4108
4109 /*------------------------------------------------------------*/
4110 /*--- Register event handlers ---*/
4111 /*------------------------------------------------------------*/
4112
4113 /* Try and get a nonzero origin for the guest state section of thread
4114 tid characterised by (offset,size). Return 0 if nothing to show
4115 for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4116 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4117 Int offset, SizeT size )
4118 {
4119 Int sh2off;
4120 UInt area[3];
4121 UInt otag;
4122 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4123 if (sh2off == -1)
4124 return 0; /* This piece of guest state is not tracked */
4125 tl_assert(sh2off >= 0);
4126 tl_assert(0 == (sh2off % 4));
4127 area[0] = 0x31313131;
4128 area[2] = 0x27272727;
4129 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4130 tl_assert(area[0] == 0x31313131);
4131 tl_assert(area[2] == 0x27272727);
4132 otag = area[1];
4133 return otag;
4134 }
4135
4136
4137 /* When some chunk of guest state is written, mark the corresponding
4138 shadow area as valid. This is used to initialise arbitrarily large
4139 chunks of guest state, hence the _SIZE value, which has to be as
4140 big as the biggest guest state.
4141 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4142 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4143 PtrdiffT offset, SizeT size)
4144 {
4145 # define MAX_REG_WRITE_SIZE 1712
4146 UChar area[MAX_REG_WRITE_SIZE];
4147 tl_assert(size <= MAX_REG_WRITE_SIZE);
4148 VG_(memset)(area, V_BITS8_DEFINED, size);
4149 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4150 # undef MAX_REG_WRITE_SIZE
4151 }
4152
4153 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4154 void mc_post_reg_write_clientcall ( ThreadId tid,
4155 PtrdiffT offset, SizeT size, Addr f)
4156 {
4157 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4158 }
4159
4160 /* Look at the definedness of the guest's shadow state for
4161 [offset, offset+len). If any part of that is undefined, record
4162 a parameter error.
4163 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4164 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4165 PtrdiffT offset, SizeT size)
4166 {
4167 Int i;
4168 Bool bad;
4169 UInt otag;
4170
4171 UChar area[16];
4172 tl_assert(size <= 16);
4173
4174 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4175
4176 bad = False;
4177 for (i = 0; i < size; i++) {
4178 if (area[i] != V_BITS8_DEFINED) {
4179 bad = True;
4180 break;
4181 }
4182 }
4183
4184 if (!bad)
4185 return;
4186
4187 /* We've found some undefinedness. See if we can also find an
4188 origin for it. */
4189 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4190 MC_(record_regparam_error) ( tid, s, otag );
4191 }
4192
4193
4194 /*------------------------------------------------------------*/
4195 /*--- Functions called directly from generated code: ---*/
4196 /*--- Load/store handlers. ---*/
4197 /*------------------------------------------------------------*/
4198
4199 /* Types: LOADV32, LOADV16, LOADV8 are:
4200 UWord fn ( Addr a )
4201 so they return 32-bits on 32-bit machines and 64-bits on
4202 64-bit machines. Addr has the same size as a host word.
4203
4204 LOADV64 is always ULong fn ( Addr a )
4205
4206 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4207 are a UWord, and for STOREV64 they are a ULong.
4208 */
4209
4210 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4211 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4212 primary map. This is all very tricky (and important!), so let's
4213 work through the maths by hand (below), *and* assert for these
4214 values at startup. */
4215 #define MASK(_szInBytes) \
4216 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4217
4218 /* MASK only exists so as to define this macro. */
4219 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4220 ((_a) & MASK((_szInBits>>3)))
4221
4222 /* On a 32-bit machine:
4223
4224 N_PRIMARY_BITS == 16, so
4225 N_PRIMARY_MAP == 0x10000, so
4226 N_PRIMARY_MAP-1 == 0xFFFF, so
4227 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4228
4229 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4230 = ~ ( 0xFFFF | 0xFFFF0000 )
4231 = ~ 0xFFFF'FFFF
4232 = 0
4233
4234 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4235 = ~ ( 0xFFFE | 0xFFFF0000 )
4236 = ~ 0xFFFF'FFFE
4237 = 1
4238
4239 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4240 = ~ ( 0xFFFC | 0xFFFF0000 )
4241 = ~ 0xFFFF'FFFC
4242 = 3
4243
4244 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4245 = ~ ( 0xFFF8 | 0xFFFF0000 )
4246 = ~ 0xFFFF'FFF8
4247 = 7
4248
4249 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4250 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4251 the 1-byte alignment case, it is always a zero value, since MASK(1)
4252 is zero. All as expected.
4253
4254 On a 64-bit machine, it's more complex, since we're testing
4255 simultaneously for misalignment and for the address being at or
4256 above 64G:
4257
4258 N_PRIMARY_BITS == 20, so
4259 N_PRIMARY_MAP == 0x100000, so
4260 N_PRIMARY_MAP-1 == 0xFFFFF, so
4261 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4262
4263 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4264 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4265 = ~ 0xF'FFFF'FFFF
4266 = 0xFFFF'FFF0'0000'0000
4267
4268 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4269 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4270 = ~ 0xF'FFFF'FFFE
4271 = 0xFFFF'FFF0'0000'0001
4272
4273 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4274 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4275 = ~ 0xF'FFFF'FFFC
4276 = 0xFFFF'FFF0'0000'0003
4277
4278 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4279 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4280 = ~ 0xF'FFFF'FFF8
4281 = 0xFFFF'FFF0'0000'0007
4282 */
4283
4284
4285 /* ------------------------ Size = 16 ------------------------ */
4286
4287 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4288 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4289 Addr a, SizeT nBits, Bool isBigEndian )
4290 {
4291 PROF_EVENT(200, "mc_LOADV_128_or_256");
4292
4293 #ifndef PERF_FAST_LOADV
4294 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4295 return;
4296 #else
4297 {
4298 UWord sm_off16, vabits16, j;
4299 UWord nBytes = nBits / 8;
4300 UWord nULongs = nBytes / 8;
4301 SecMap* sm;
4302
4303 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4304 PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
4305 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4306 return;
4307 }
4308
4309 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4310 suitably aligned, is mapped, and addressible. */
4311 for (j = 0; j < nULongs; j++) {
4312 sm = get_secmap_for_reading_low(a + 8*j);
4313 sm_off16 = SM_OFF_16(a + 8*j);
4314 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4315
4316 // Convert V bits from compact memory form to expanded
4317 // register form.
4318 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4319 res[j] = V_BITS64_DEFINED;
4320 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4321 res[j] = V_BITS64_UNDEFINED;
4322 } else {
4323 /* Slow case: some block of 8 bytes are not all-defined or
4324 all-undefined. */
4325 PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
4326 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4327 return;
4328 }
4329 }
4330 return;
4331 }
4332 #endif
4333 }
4334
MC_(helperc_LOADV256be)4335 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4336 {
4337 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4338 }
MC_(helperc_LOADV256le)4339 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4340 {
4341 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4342 }
4343
MC_(helperc_LOADV128be)4344 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4345 {
4346 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4347 }
MC_(helperc_LOADV128le)4348 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4349 {
4350 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4351 }
4352
4353 /* ------------------------ Size = 8 ------------------------ */
4354
4355 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4356 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4357 {
4358 PROF_EVENT(200, "mc_LOADV64");
4359
4360 #ifndef PERF_FAST_LOADV
4361 return mc_LOADVn_slow( a, 64, isBigEndian );
4362 #else
4363 {
4364 UWord sm_off16, vabits16;
4365 SecMap* sm;
4366
4367 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4368 PROF_EVENT(201, "mc_LOADV64-slow1");
4369 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4370 }
4371
4372 sm = get_secmap_for_reading_low(a);
4373 sm_off16 = SM_OFF_16(a);
4374 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4375
4376 // Handle common case quickly: a is suitably aligned, is mapped, and
4377 // addressible.
4378 // Convert V bits from compact memory form to expanded register form.
4379 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4380 return V_BITS64_DEFINED;
4381 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4382 return V_BITS64_UNDEFINED;
4383 } else {
4384 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4385 PROF_EVENT(202, "mc_LOADV64-slow2");
4386 return mc_LOADVn_slow( a, 64, isBigEndian );
4387 }
4388 }
4389 #endif
4390 }
4391
MC_(helperc_LOADV64be)4392 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4393 {
4394 return mc_LOADV64(a, True);
4395 }
MC_(helperc_LOADV64le)4396 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4397 {
4398 return mc_LOADV64(a, False);
4399 }
4400
4401
4402 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4403 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4404 {
4405 PROF_EVENT(210, "mc_STOREV64");
4406
4407 #ifndef PERF_FAST_STOREV
4408 // XXX: this slow case seems to be marginally faster than the fast case!
4409 // Investigate further.
4410 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4411 #else
4412 {
4413 UWord sm_off16, vabits16;
4414 SecMap* sm;
4415
4416 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4417 PROF_EVENT(211, "mc_STOREV64-slow1");
4418 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4419 return;
4420 }
4421
4422 sm = get_secmap_for_reading_low(a);
4423 sm_off16 = SM_OFF_16(a);
4424 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4425
4426 // To understand the below cleverness, see the extensive comments
4427 // in MC_(helperc_STOREV8).
4428 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4429 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4430 return;
4431 }
4432 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4433 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4434 return;
4435 }
4436 PROF_EVENT(232, "mc_STOREV64-slow2");
4437 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4438 return;
4439 }
4440 if (V_BITS64_UNDEFINED == vbits64) {
4441 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4442 return;
4443 }
4444 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4445 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4446 return;
4447 }
4448 PROF_EVENT(232, "mc_STOREV64-slow3");
4449 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4450 return;
4451 }
4452
4453 PROF_EVENT(212, "mc_STOREV64-slow4");
4454 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4455 }
4456 #endif
4457 }
4458
MC_(helperc_STOREV64be)4459 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4460 {
4461 mc_STOREV64(a, vbits64, True);
4462 }
MC_(helperc_STOREV64le)4463 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4464 {
4465 mc_STOREV64(a, vbits64, False);
4466 }
4467
4468
4469 /* ------------------------ Size = 4 ------------------------ */
4470
4471 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4472 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4473 {
4474 PROF_EVENT(220, "mc_LOADV32");
4475
4476 #ifndef PERF_FAST_LOADV
4477 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4478 #else
4479 {
4480 UWord sm_off, vabits8;
4481 SecMap* sm;
4482
4483 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4484 PROF_EVENT(221, "mc_LOADV32-slow1");
4485 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4486 }
4487
4488 sm = get_secmap_for_reading_low(a);
4489 sm_off = SM_OFF(a);
4490 vabits8 = sm->vabits8[sm_off];
4491
4492 // Handle common case quickly: a is suitably aligned, is mapped, and the
4493 // entire word32 it lives in is addressible.
4494 // Convert V bits from compact memory form to expanded register form.
4495 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4496 // Almost certainly not necessary, but be paranoid.
4497 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4498 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4499 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4500 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4501 } else {
4502 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4503 PROF_EVENT(222, "mc_LOADV32-slow2");
4504 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4505 }
4506 }
4507 #endif
4508 }
4509
MC_(helperc_LOADV32be)4510 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4511 {
4512 return mc_LOADV32(a, True);
4513 }
MC_(helperc_LOADV32le)4514 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4515 {
4516 return mc_LOADV32(a, False);
4517 }
4518
4519
4520 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4521 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4522 {
4523 PROF_EVENT(230, "mc_STOREV32");
4524
4525 #ifndef PERF_FAST_STOREV
4526 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4527 #else
4528 {
4529 UWord sm_off, vabits8;
4530 SecMap* sm;
4531
4532 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4533 PROF_EVENT(231, "mc_STOREV32-slow1");
4534 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4535 return;
4536 }
4537
4538 sm = get_secmap_for_reading_low(a);
4539 sm_off = SM_OFF(a);
4540 vabits8 = sm->vabits8[sm_off];
4541
4542 // To understand the below cleverness, see the extensive comments
4543 // in MC_(helperc_STOREV8).
4544 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4545 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4546 return;
4547 }
4548 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4549 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4550 return;
4551 }
4552 PROF_EVENT(232, "mc_STOREV32-slow2");
4553 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4554 return;
4555 }
4556 if (V_BITS32_UNDEFINED == vbits32) {
4557 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4558 return;
4559 }
4560 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4561 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4562 return;
4563 }
4564 PROF_EVENT(233, "mc_STOREV32-slow3");
4565 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4566 return;
4567 }
4568
4569 PROF_EVENT(234, "mc_STOREV32-slow4");
4570 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4571 }
4572 #endif
4573 }
4574
MC_(helperc_STOREV32be)4575 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4576 {
4577 mc_STOREV32(a, vbits32, True);
4578 }
MC_(helperc_STOREV32le)4579 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4580 {
4581 mc_STOREV32(a, vbits32, False);
4582 }
4583
4584
4585 /* ------------------------ Size = 2 ------------------------ */
4586
4587 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4588 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4589 {
4590 PROF_EVENT(240, "mc_LOADV16");
4591
4592 #ifndef PERF_FAST_LOADV
4593 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4594 #else
4595 {
4596 UWord sm_off, vabits8;
4597 SecMap* sm;
4598
4599 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4600 PROF_EVENT(241, "mc_LOADV16-slow1");
4601 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4602 }
4603
4604 sm = get_secmap_for_reading_low(a);
4605 sm_off = SM_OFF(a);
4606 vabits8 = sm->vabits8[sm_off];
4607 // Handle common case quickly: a is suitably aligned, is mapped, and is
4608 // addressible.
4609 // Convert V bits from compact memory form to expanded register form
4610 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
4611 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4612 else {
4613 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4614 // the two sub-bytes.
4615 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4616 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4617 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4618 else {
4619 /* Slow case: the two bytes are not all-defined or all-undefined. */
4620 PROF_EVENT(242, "mc_LOADV16-slow2");
4621 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4622 }
4623 }
4624 }
4625 #endif
4626 }
4627
MC_(helperc_LOADV16be)4628 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4629 {
4630 return mc_LOADV16(a, True);
4631 }
MC_(helperc_LOADV16le)4632 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4633 {
4634 return mc_LOADV16(a, False);
4635 }
4636
4637 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
4638 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)4639 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
4640 {
4641 UInt shift;
4642 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
4643 shift = (a & 2) << 1; // shift by 0 or 4
4644 vabits8 >>= shift; // shift the four bits to the bottom
4645 // check 2 x vabits2 != VA_BITS2_NOACCESS
4646 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
4647 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
4648 }
4649
4650 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4651 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4652 {
4653 PROF_EVENT(250, "mc_STOREV16");
4654
4655 #ifndef PERF_FAST_STOREV
4656 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4657 #else
4658 {
4659 UWord sm_off, vabits8;
4660 SecMap* sm;
4661
4662 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4663 PROF_EVENT(251, "mc_STOREV16-slow1");
4664 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4665 return;
4666 }
4667
4668 sm = get_secmap_for_reading_low(a);
4669 sm_off = SM_OFF(a);
4670 vabits8 = sm->vabits8[sm_off];
4671
4672 // To understand the below cleverness, see the extensive comments
4673 // in MC_(helperc_STOREV8).
4674 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
4675 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4676 return;
4677 }
4678 if (!is_distinguished_sm(sm)
4679 && accessible_vabits4_in_vabits8(a, vabits8)) {
4680 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
4681 &(sm->vabits8[sm_off]) );
4682 return;
4683 }
4684 PROF_EVENT(232, "mc_STOREV16-slow2");
4685 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4686 }
4687 if (V_BITS16_UNDEFINED == vbits16) {
4688 if (vabits8 == VA_BITS8_UNDEFINED) {
4689 return;
4690 }
4691 if (!is_distinguished_sm(sm)
4692 && accessible_vabits4_in_vabits8(a, vabits8)) {
4693 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4694 &(sm->vabits8[sm_off]) );
4695 return;
4696 }
4697 PROF_EVENT(233, "mc_STOREV16-slow3");
4698 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4699 return;
4700 }
4701
4702 PROF_EVENT(234, "mc_STOREV16-slow4");
4703 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4704 }
4705 #endif
4706 }
4707
MC_(helperc_STOREV16be)4708 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4709 {
4710 mc_STOREV16(a, vbits16, True);
4711 }
MC_(helperc_STOREV16le)4712 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4713 {
4714 mc_STOREV16(a, vbits16, False);
4715 }
4716
4717
4718 /* ------------------------ Size = 1 ------------------------ */
4719 /* Note: endianness is irrelevant for size == 1 */
4720
4721 VG_REGPARM(1)
MC_(helperc_LOADV8)4722 UWord MC_(helperc_LOADV8) ( Addr a )
4723 {
4724 PROF_EVENT(260, "mc_LOADV8");
4725
4726 #ifndef PERF_FAST_LOADV
4727 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4728 #else
4729 {
4730 UWord sm_off, vabits8;
4731 SecMap* sm;
4732
4733 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4734 PROF_EVENT(261, "mc_LOADV8-slow1");
4735 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4736 }
4737
4738 sm = get_secmap_for_reading_low(a);
4739 sm_off = SM_OFF(a);
4740 vabits8 = sm->vabits8[sm_off];
4741 // Convert V bits from compact memory form to expanded register form
4742 // Handle common case quickly: a is mapped, and the entire
4743 // word32 it lives in is addressible.
4744 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
4745 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4746 else {
4747 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4748 // the single byte.
4749 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4750 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
4751 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4752 else {
4753 /* Slow case: the byte is not all-defined or all-undefined. */
4754 PROF_EVENT(262, "mc_LOADV8-slow2");
4755 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4756 }
4757 }
4758 }
4759 #endif
4760 }
4761
4762
4763 VG_REGPARM(2)
MC_(helperc_STOREV8)4764 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4765 {
4766 PROF_EVENT(270, "mc_STOREV8");
4767
4768 #ifndef PERF_FAST_STOREV
4769 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4770 #else
4771 {
4772 UWord sm_off, vabits8;
4773 SecMap* sm;
4774
4775 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4776 PROF_EVENT(271, "mc_STOREV8-slow1");
4777 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4778 return;
4779 }
4780
4781 sm = get_secmap_for_reading_low(a);
4782 sm_off = SM_OFF(a);
4783 vabits8 = sm->vabits8[sm_off];
4784
4785 // Clevernesses to speed up storing V bits.
4786 // The 64/32/16 bit cases also have similar clevernesses, but it
4787 // works a little differently to the code below.
4788 //
4789 // Cleverness 1: sometimes we don't have to write the shadow memory at
4790 // all, if we can tell that what we want to write is the same as what is
4791 // already there. These cases are marked below as "defined on defined" and
4792 // "undefined on undefined".
4793 //
4794 // Cleverness 2:
4795 // We also avoid to call mc_STOREVn_slow if the V bits can directly
4796 // be written in the secondary map. V bits can be directly written
4797 // if 4 conditions are respected:
4798 // * The address for which V bits are written is naturally aligned
4799 // on 1 byte for STOREV8 (this is always true)
4800 // on 2 bytes for STOREV16
4801 // on 4 bytes for STOREV32
4802 // on 8 bytes for STOREV64.
4803 // * V bits being written are either fully defined or fully undefined.
4804 // (for partially defined V bits, V bits cannot be directly written,
4805 // as the secondary vbits table must be maintained).
4806 // * the secmap is not distinguished (distinguished maps cannot be
4807 // modified).
4808 // * the memory corresponding to the V bits being written is
4809 // accessible (if one or more bytes are not accessible,
4810 // we must call mc_STOREVn_slow in order to report accessibility
4811 // errors).
4812 // Note that for STOREV32 and STOREV64, it is too expensive
4813 // to verify the accessibility of each byte for the benefit it
4814 // brings. Instead, a quicker check is done by comparing to
4815 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
4816 // but misses some opportunity of direct modifications.
4817 // Checking each byte accessibility was measured for
4818 // STOREV32+perf tests and was slowing down all perf tests.
4819 // The cases corresponding to cleverness 2 are marked below as
4820 // "direct mod".
4821 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
4822 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4823 return; // defined on defined
4824 }
4825 if (!is_distinguished_sm(sm)
4826 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
4827 // direct mod
4828 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4829 &(sm->vabits8[sm_off]) );
4830 return;
4831 }
4832 PROF_EVENT(232, "mc_STOREV8-slow2");
4833 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4834 return;
4835 }
4836 if (V_BITS8_UNDEFINED == vbits8) {
4837 if (vabits8 == VA_BITS8_UNDEFINED) {
4838 return; // undefined on undefined
4839 }
4840 if (!is_distinguished_sm(sm)
4841 && (VA_BITS2_NOACCESS
4842 != extract_vabits2_from_vabits8(a, vabits8))) {
4843 // direct mod
4844 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4845 &(sm->vabits8[sm_off]) );
4846 return;
4847 }
4848 PROF_EVENT(233, "mc_STOREV8-slow3");
4849 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4850 return;
4851 }
4852
4853 // Partially defined word
4854 PROF_EVENT(234, "mc_STOREV8-slow4");
4855 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4856 }
4857 #endif
4858 }
4859
4860
4861 /*------------------------------------------------------------*/
4862 /*--- Functions called directly from generated code: ---*/
4863 /*--- Value-check failure handlers. ---*/
4864 /*------------------------------------------------------------*/
4865
4866 /* Call these ones when an origin is available ... */
4867 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4868 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4869 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4870 }
4871
4872 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4873 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4874 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4875 }
4876
4877 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4878 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4879 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4880 }
4881
4882 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4883 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4884 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4885 }
4886
4887 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4888 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4889 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4890 }
4891
4892 /* ... and these when an origin isn't available. */
4893
4894 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4895 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4896 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4897 }
4898
4899 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4900 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4901 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4902 }
4903
4904 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4905 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4906 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4907 }
4908
4909 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4910 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4911 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4912 }
4913
4914 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4915 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4916 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4917 }
4918
4919
4920 /*------------------------------------------------------------*/
4921 /*--- Metadata get/set functions, for client requests. ---*/
4922 /*------------------------------------------------------------*/
4923
4924 // Nb: this expands the V+A bits out into register-form V bits, even though
4925 // they're in memory. This is for backward compatibility, and because it's
4926 // probably what the user wants.
4927
4928 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4929 error [no longer used], 3 == addressing error. */
4930 /* Nb: We used to issue various definedness/addressability errors from here,
4931 but we took them out because they ranged from not-very-helpful to
4932 downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4933 static Int mc_get_or_set_vbits_for_client (
4934 Addr a,
4935 Addr vbits,
4936 SizeT szB,
4937 Bool setting, /* True <=> set vbits, False <=> get vbits */
4938 Bool is_client_request /* True <=> real user request
4939 False <=> internal call from gdbserver */
4940 )
4941 {
4942 SizeT i;
4943 Bool ok;
4944 UChar vbits8;
4945
4946 /* Check that arrays are addressible before doing any getting/setting.
4947 vbits to be checked only for real user request. */
4948 for (i = 0; i < szB; i++) {
4949 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4950 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4951 return 3;
4952 }
4953 }
4954
4955 /* Do the copy */
4956 if (setting) {
4957 /* setting */
4958 for (i = 0; i < szB; i++) {
4959 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4960 tl_assert(ok);
4961 }
4962 } else {
4963 /* getting */
4964 for (i = 0; i < szB; i++) {
4965 ok = get_vbits8(a + i, &vbits8);
4966 tl_assert(ok);
4967 ((UChar*)vbits)[i] = vbits8;
4968 }
4969 if (is_client_request)
4970 // The bytes in vbits[] have now been set, so mark them as such.
4971 MC_(make_mem_defined)(vbits, szB);
4972 }
4973
4974 return 1;
4975 }
4976
4977
4978 /*------------------------------------------------------------*/
4979 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
4980 /*------------------------------------------------------------*/
4981
4982 /* For the memory leak detector, say whether an entire 64k chunk of
4983 address space is possibly in use, or not. If in doubt return
4984 True.
4985 */
MC_(is_within_valid_secondary)4986 Bool MC_(is_within_valid_secondary) ( Addr a )
4987 {
4988 SecMap* sm = maybe_get_secmap_for ( a );
4989 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4990 /* Definitely not in use. */
4991 return False;
4992 } else {
4993 return True;
4994 }
4995 }
4996
4997
4998 /* For the memory leak detector, say whether or not a given word
4999 address is to be regarded as valid. */
MC_(is_valid_aligned_word)5000 Bool MC_(is_valid_aligned_word) ( Addr a )
5001 {
5002 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5003 tl_assert(VG_IS_WORD_ALIGNED(a));
5004 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5005 return False;
5006 if (sizeof(UWord) == 8) {
5007 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5008 return False;
5009 }
5010 if (UNLIKELY(MC_(in_ignored_range)(a)))
5011 return False;
5012 else
5013 return True;
5014 }
5015
5016
5017 /*------------------------------------------------------------*/
5018 /*--- Initialisation ---*/
5019 /*------------------------------------------------------------*/
5020
init_shadow_memory(void)5021 static void init_shadow_memory ( void )
5022 {
5023 Int i;
5024 SecMap* sm;
5025
5026 tl_assert(V_BIT_UNDEFINED == 1);
5027 tl_assert(V_BIT_DEFINED == 0);
5028 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5029 tl_assert(V_BITS8_DEFINED == 0);
5030
5031 /* Build the 3 distinguished secondaries */
5032 sm = &sm_distinguished[SM_DIST_NOACCESS];
5033 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5034
5035 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5036 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5037
5038 sm = &sm_distinguished[SM_DIST_DEFINED];
5039 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5040
5041 /* Set up the primary map. */
5042 /* These entries gradually get overwritten as the used address
5043 space expands. */
5044 for (i = 0; i < N_PRIMARY_MAP; i++)
5045 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5046
5047 /* Auxiliary primary maps */
5048 init_auxmap_L1_L2();
5049
5050 /* auxmap_size = auxmap_used = 0;
5051 no ... these are statically initialised */
5052
5053 /* Secondary V bit table */
5054 secVBitTable = createSecVBitTable();
5055 }
5056
5057
5058 /*------------------------------------------------------------*/
5059 /*--- Sanity check machinery (permanently engaged) ---*/
5060 /*------------------------------------------------------------*/
5061
mc_cheap_sanity_check(void)5062 static Bool mc_cheap_sanity_check ( void )
5063 {
5064 n_sanity_cheap++;
5065 PROF_EVENT(490, "cheap_sanity_check");
5066 /* Check for sane operating level */
5067 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5068 return False;
5069 /* nothing else useful we can rapidly check */
5070 return True;
5071 }
5072
mc_expensive_sanity_check(void)5073 static Bool mc_expensive_sanity_check ( void )
5074 {
5075 Int i;
5076 Word n_secmaps_found;
5077 SecMap* sm;
5078 const HChar* errmsg;
5079 Bool bad = False;
5080
5081 if (0) VG_(printf)("expensive sanity check\n");
5082 if (0) return True;
5083
5084 n_sanity_expensive++;
5085 PROF_EVENT(491, "expensive_sanity_check");
5086
5087 /* Check for sane operating level */
5088 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5089 return False;
5090
5091 /* Check that the 3 distinguished SMs are still as they should be. */
5092
5093 /* Check noaccess DSM. */
5094 sm = &sm_distinguished[SM_DIST_NOACCESS];
5095 for (i = 0; i < SM_CHUNKS; i++)
5096 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5097 bad = True;
5098
5099 /* Check undefined DSM. */
5100 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5101 for (i = 0; i < SM_CHUNKS; i++)
5102 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5103 bad = True;
5104
5105 /* Check defined DSM. */
5106 sm = &sm_distinguished[SM_DIST_DEFINED];
5107 for (i = 0; i < SM_CHUNKS; i++)
5108 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5109 bad = True;
5110
5111 if (bad) {
5112 VG_(printf)("memcheck expensive sanity: "
5113 "distinguished_secondaries have changed\n");
5114 return False;
5115 }
5116
5117 /* If we're not checking for undefined value errors, the secondary V bit
5118 * table should be empty. */
5119 if (MC_(clo_mc_level) == 1) {
5120 if (0 != VG_(OSetGen_Size)(secVBitTable))
5121 return False;
5122 }
5123
5124 /* check the auxiliary maps, very thoroughly */
5125 n_secmaps_found = 0;
5126 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5127 if (errmsg) {
5128 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5129 return False;
5130 }
5131
5132 /* n_secmaps_found is now the number referred to by the auxiliary
5133 primary map. Now add on the ones referred to by the main
5134 primary map. */
5135 for (i = 0; i < N_PRIMARY_MAP; i++) {
5136 if (primary_map[i] == NULL) {
5137 bad = True;
5138 } else {
5139 if (!is_distinguished_sm(primary_map[i]))
5140 n_secmaps_found++;
5141 }
5142 }
5143
5144 /* check that the number of secmaps issued matches the number that
5145 are reachable (iow, no secmap leaks) */
5146 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5147 bad = True;
5148
5149 if (bad) {
5150 VG_(printf)("memcheck expensive sanity: "
5151 "apparent secmap leakage\n");
5152 return False;
5153 }
5154
5155 if (bad) {
5156 VG_(printf)("memcheck expensive sanity: "
5157 "auxmap covers wrong address space\n");
5158 return False;
5159 }
5160
5161 /* there is only one pointer to each secmap (expensive) */
5162
5163 return True;
5164 }
5165
5166 /*------------------------------------------------------------*/
5167 /*--- Command line args ---*/
5168 /*------------------------------------------------------------*/
5169
5170 /* --partial-loads-ok: enable by default on MacOS. The MacOS system
5171 graphics libraries are heavily vectorised, and not enabling this by
5172 default causes lots of false errors. */
5173 #if defined(VGO_darwin)
5174 Bool MC_(clo_partial_loads_ok) = True;
5175 #else
5176 Bool MC_(clo_partial_loads_ok) = False;
5177 #endif
5178
5179 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
5180 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
5181 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
5182 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
5183 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
5184 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
5185 UInt MC_(clo_leak_check_heuristics) = 0;
5186 Bool MC_(clo_workaround_gcc296_bugs) = False;
5187 Int MC_(clo_malloc_fill) = -1;
5188 Int MC_(clo_free_fill) = -1;
5189 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_then_free;
5190 Int MC_(clo_mc_level) = 2;
5191 Bool MC_(clo_show_mismatched_frees) = True;
5192
5193 static const HChar * MC_(parse_leak_heuristics_tokens) =
5194 "-,stdstring,length64,newarray,multipleinheritance";
5195 /* The first heuristic value (LchNone) has no keyword, as this is
5196 a fake heuristic used to collect the blocks found without any
5197 heuristic. */
5198
mc_process_cmd_line_options(const HChar * arg)5199 static Bool mc_process_cmd_line_options(const HChar* arg)
5200 {
5201 const HChar* tmp_str;
5202 Int tmp_show;
5203
5204 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5205
5206 /* Set MC_(clo_mc_level):
5207 1 = A bit tracking only
5208 2 = A and V bit tracking, but no V bit origins
5209 3 = A and V bit tracking, and V bit origins
5210
5211 Do this by inspecting --undef-value-errors= and
5212 --track-origins=. Reject the case --undef-value-errors=no
5213 --track-origins=yes as meaningless.
5214 */
5215 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5216 if (MC_(clo_mc_level) == 3) {
5217 goto bad_level;
5218 } else {
5219 MC_(clo_mc_level) = 1;
5220 return True;
5221 }
5222 }
5223 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5224 if (MC_(clo_mc_level) == 1)
5225 MC_(clo_mc_level) = 2;
5226 return True;
5227 }
5228 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5229 if (MC_(clo_mc_level) == 3)
5230 MC_(clo_mc_level) = 2;
5231 return True;
5232 }
5233 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5234 if (MC_(clo_mc_level) == 1) {
5235 goto bad_level;
5236 } else {
5237 MC_(clo_mc_level) = 3;
5238 return True;
5239 }
5240 }
5241
5242 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5243 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5244 MC_(parse_leak_kinds_tokens),
5245 MC_(clo_error_for_leak_kinds)) {}
5246 else if VG_USET_CLO(arg, "--show-leak-kinds",
5247 MC_(parse_leak_kinds_tokens),
5248 MC_(clo_show_leak_kinds)) {}
5249 else if VG_USET_CLO(arg, "--leak-check-heuristics",
5250 MC_(parse_leak_heuristics_tokens),
5251 MC_(clo_leak_check_heuristics)) {}
5252 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5253 if (tmp_show) {
5254 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5255 } else {
5256 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5257 }
5258 }
5259 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5260 if (tmp_show) {
5261 MC_(clo_show_leak_kinds) |= R2S(Possible);
5262 } else {
5263 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5264 }
5265 }
5266 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5267 MC_(clo_workaround_gcc296_bugs)) {}
5268
5269 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
5270 0, 10*1000*1000*1000LL) {}
5271
5272 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5273 MC_(clo_freelist_big_blocks),
5274 0, 10*1000*1000*1000LL) {}
5275
5276 else if VG_XACT_CLO(arg, "--leak-check=no",
5277 MC_(clo_leak_check), LC_Off) {}
5278 else if VG_XACT_CLO(arg, "--leak-check=summary",
5279 MC_(clo_leak_check), LC_Summary) {}
5280 else if VG_XACT_CLO(arg, "--leak-check=yes",
5281 MC_(clo_leak_check), LC_Full) {}
5282 else if VG_XACT_CLO(arg, "--leak-check=full",
5283 MC_(clo_leak_check), LC_Full) {}
5284
5285 else if VG_XACT_CLO(arg, "--leak-resolution=low",
5286 MC_(clo_leak_resolution), Vg_LowRes) {}
5287 else if VG_XACT_CLO(arg, "--leak-resolution=med",
5288 MC_(clo_leak_resolution), Vg_MedRes) {}
5289 else if VG_XACT_CLO(arg, "--leak-resolution=high",
5290 MC_(clo_leak_resolution), Vg_HighRes) {}
5291
5292 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5293 Bool ok = parse_ignore_ranges(tmp_str);
5294 if (!ok) {
5295 VG_(message)(Vg_DebugMsg,
5296 "ERROR: --ignore-ranges: "
5297 "invalid syntax, or end <= start in range\n");
5298 return False;
5299 }
5300 if (gIgnoredAddressRanges) {
5301 Word i;
5302 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5303 UWord val = IAR_INVALID;
5304 UWord key_min = ~(UWord)0;
5305 UWord key_max = (UWord)0;
5306 VG_(indexRangeMap)( &key_min, &key_max, &val,
5307 gIgnoredAddressRanges, i );
5308 tl_assert(key_min <= key_max);
5309 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5310 if (key_max - key_min > limit) {
5311 VG_(message)(Vg_DebugMsg,
5312 "ERROR: --ignore-ranges: suspiciously large range:\n");
5313 VG_(message)(Vg_DebugMsg,
5314 " 0x%lx-0x%lx (size %ld)\n", key_min, key_max,
5315 key_max - key_min + 1);
5316 return False;
5317 }
5318 }
5319 }
5320 }
5321
5322 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5323 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
5324
5325 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5326 MC_(clo_keep_stacktraces), KS_alloc) {}
5327 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5328 MC_(clo_keep_stacktraces), KS_free) {}
5329 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5330 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5331 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5332 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5333 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5334 MC_(clo_keep_stacktraces), KS_none) {}
5335
5336 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5337 MC_(clo_show_mismatched_frees)) {}
5338
5339 else
5340 return VG_(replacement_malloc_process_cmd_line_option)(arg);
5341
5342 return True;
5343
5344
5345 bad_level:
5346 VG_(fmsg_bad_option)(arg,
5347 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5348 }
5349
mc_print_usage(void)5350 static void mc_print_usage(void)
5351 {
5352 const HChar* plo_default = "no";
5353 # if defined(VGO_darwin)
5354 plo_default = "yes";
5355 # endif
5356
5357 VG_(printf)(
5358 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
5359 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
5360 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5361 " [definite,possible]\n"
5362 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
5363 " [definite,possible]\n"
5364 " where kind is one of:\n"
5365 " definite indirect possible reachable all none\n"
5366 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5367 " improving leak search false positive [none]\n"
5368 " where heur is one of:\n"
5369 " stdstring length64 newarray multipleinheritance all none\n"
5370 " --show-reachable=yes same as --show-leak-kinds=all\n"
5371 " --show-reachable=no --show-possibly-lost=yes\n"
5372 " same as --show-leak-kinds=definite,possible\n"
5373 " --show-reachable=no --show-possibly-lost=no\n"
5374 " same as --show-leak-kinds=definite\n"
5375 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
5376 " --track-origins=no|yes show origins of undefined values? [no]\n"
5377 " --partial-loads-ok=no|yes too hard to explain here; see manual [%s]\n"
5378 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
5379 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
5380 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
5381 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
5382 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
5383 " --free-fill=<hexnumber> fill free'd areas with given value\n"
5384 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5385 " stack trace(s) to keep for malloc'd/free'd areas [alloc-then-free]\n"
5386 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
5387 , plo_default
5388 );
5389 }
5390
mc_print_debug_usage(void)5391 static void mc_print_debug_usage(void)
5392 {
5393 VG_(printf)(
5394 " (none)\n"
5395 );
5396 }
5397
5398
5399 /*------------------------------------------------------------*/
5400 /*--- Client blocks ---*/
5401 /*------------------------------------------------------------*/
5402
5403 /* Client block management:
5404
5405 This is managed as an expanding array of client block descriptors.
5406 Indices of live descriptors are issued to the client, so it can ask
5407 to free them later. Therefore we cannot slide live entries down
5408 over dead ones. Instead we must use free/inuse flags and scan for
5409 an empty slot at allocation time. This in turn means allocation is
5410 relatively expensive, so we hope this does not happen too often.
5411
5412 An unused block has start == size == 0
5413 */
5414
5415 /* type CGenBlock is defined in mc_include.h */
5416
5417 /* This subsystem is self-initialising. */
5418 static UWord cgb_size = 0;
5419 static UWord cgb_used = 0;
5420 static CGenBlock* cgbs = NULL;
5421
5422 /* Stats for this subsystem. */
5423 static ULong cgb_used_MAX = 0; /* Max in use. */
5424 static ULong cgb_allocs = 0; /* Number of allocs. */
5425 static ULong cgb_discards = 0; /* Number of discards. */
5426 static ULong cgb_search = 0; /* Number of searches. */
5427
5428
5429 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5430 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5431 /*OUT*/UWord* nBlocks )
5432 {
5433 *blocks = cgbs;
5434 *nBlocks = cgb_used;
5435 }
5436
5437
5438 static
alloc_client_block(void)5439 Int alloc_client_block ( void )
5440 {
5441 UWord i, sz_new;
5442 CGenBlock* cgbs_new;
5443
5444 cgb_allocs++;
5445
5446 for (i = 0; i < cgb_used; i++) {
5447 cgb_search++;
5448 if (cgbs[i].start == 0 && cgbs[i].size == 0)
5449 return i;
5450 }
5451
5452 /* Not found. Try to allocate one at the end. */
5453 if (cgb_used < cgb_size) {
5454 cgb_used++;
5455 return cgb_used-1;
5456 }
5457
5458 /* Ok, we have to allocate a new one. */
5459 tl_assert(cgb_used == cgb_size);
5460 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5461
5462 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5463 for (i = 0; i < cgb_used; i++)
5464 cgbs_new[i] = cgbs[i];
5465
5466 if (cgbs != NULL)
5467 VG_(free)( cgbs );
5468 cgbs = cgbs_new;
5469
5470 cgb_size = sz_new;
5471 cgb_used++;
5472 if (cgb_used > cgb_used_MAX)
5473 cgb_used_MAX = cgb_used;
5474 return cgb_used-1;
5475 }
5476
5477
show_client_block_stats(void)5478 static void show_client_block_stats ( void )
5479 {
5480 VG_(message)(Vg_DebugMsg,
5481 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5482 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5483 );
5484 }
print_monitor_help(void)5485 static void print_monitor_help ( void )
5486 {
5487 VG_(gdb_printf)
5488 (
5489 "\n"
5490 "memcheck monitor commands:\n"
5491 " get_vbits <addr> [<len>]\n"
5492 " returns validity bits for <len> (or 1) bytes at <addr>\n"
5493 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5494 " Example: get_vbits 0x8049c78 10\n"
5495 " make_memory [noaccess|undefined\n"
5496 " |defined|Definedifaddressable] <addr> [<len>]\n"
5497 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5498 " check_memory [addressable|defined] <addr> [<len>]\n"
5499 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5500 " and outputs a description of <addr>\n"
5501 " leak_check [full*|summary]\n"
5502 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
5503 " [heuristics heur1,heur2,...]\n"
5504 " [increased*|changed|any]\n"
5505 " [unlimited*|limited <max_loss_records_output>]\n"
5506 " * = defaults\n"
5507 " where kind is one of:\n"
5508 " definite indirect possible reachable all none\n"
5509 " where heur is one of:\n"
5510 " stdstring length64 newarray multipleinheritance all none*\n"
5511 " Examples: leak_check\n"
5512 " leak_check summary any\n"
5513 " leak_check full kinds indirect,possible\n"
5514 " leak_check full reachable any limited 100\n"
5515 " block_list <loss_record_nr>\n"
5516 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
5517 " who_points_at <addr> [<len>]\n"
5518 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
5519 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5520 " with len > 1, will also show \"interior pointers\")\n"
5521 "\n");
5522 }
5523
5524 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)5525 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
5526 {
5527 HChar* wcmd;
5528 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
5529 HChar *ssaveptr;
5530
5531 VG_(strcpy) (s, req);
5532
5533 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5534 /* NB: if possible, avoid introducing a new command below which
5535 starts with the same first letter(s) as an already existing
5536 command. This ensures a shorter abbreviation for the user. */
5537 switch (VG_(keyword_id)
5538 ("help get_vbits leak_check make_memory check_memory "
5539 "block_list who_points_at",
5540 wcmd, kwd_report_duplicated_matches)) {
5541 case -2: /* multiple matches */
5542 return True;
5543 case -1: /* not found */
5544 return False;
5545 case 0: /* help */
5546 print_monitor_help();
5547 return True;
5548 case 1: { /* get_vbits */
5549 Addr address;
5550 SizeT szB = 1;
5551 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
5552 UChar vbits;
5553 Int i;
5554 Int unaddressable = 0;
5555 for (i = 0; i < szB; i++) {
5556 Int res = mc_get_or_set_vbits_for_client
5557 (address+i, (Addr) &vbits, 1,
5558 False, /* get them */
5559 False /* is client request */ );
5560 /* we are before the first character on next line, print a \n. */
5561 if ((i % 32) == 0 && i != 0)
5562 VG_(printf) ("\n");
5563 /* we are before the next block of 4 starts, print a space. */
5564 else if ((i % 4) == 0 && i != 0)
5565 VG_(printf) (" ");
5566 if (res == 1) {
5567 VG_(printf) ("%02x", vbits);
5568 } else {
5569 tl_assert(3 == res);
5570 unaddressable++;
5571 VG_(printf) ("__");
5572 }
5573 }
5574 VG_(printf) ("\n");
5575 if (unaddressable) {
5576 VG_(printf)
5577 ("Address %p len %ld has %d bytes unaddressable\n",
5578 (void *)address, szB, unaddressable);
5579 }
5580 }
5581 return True;
5582 }
5583 case 2: { /* leak_check */
5584 Int err = 0;
5585 LeakCheckParams lcp;
5586 HChar* kw;
5587
5588 lcp.mode = LC_Full;
5589 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
5590 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
5591 lcp.heuristics = 0;
5592 lcp.deltamode = LCD_Increased;
5593 lcp.max_loss_records_output = 999999999;
5594 lcp.requested_by_monitor_command = True;
5595
5596 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5597 kw != NULL;
5598 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5599 switch (VG_(keyword_id)
5600 ("full summary "
5601 "kinds reachable possibleleak definiteleak "
5602 "heuristics "
5603 "increased changed any "
5604 "unlimited limited ",
5605 kw, kwd_report_all)) {
5606 case -2: err++; break;
5607 case -1: err++; break;
5608 case 0: /* full */
5609 lcp.mode = LC_Full; break;
5610 case 1: /* summary */
5611 lcp.mode = LC_Summary; break;
5612 case 2: { /* kinds */
5613 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5614 if (wcmd == NULL
5615 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
5616 True/*allow_all*/,
5617 wcmd,
5618 &lcp.show_leak_kinds)) {
5619 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
5620 err++;
5621 }
5622 break;
5623 }
5624 case 3: /* reachable */
5625 lcp.show_leak_kinds = MC_(all_Reachedness)();
5626 break;
5627 case 4: /* possibleleak */
5628 lcp.show_leak_kinds
5629 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
5630 break;
5631 case 5: /* definiteleak */
5632 lcp.show_leak_kinds = R2S(Unreached);
5633 break;
5634 case 6: { /* heuristics */
5635 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5636 if (wcmd == NULL
5637 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
5638 True,/*allow_all*/
5639 wcmd,
5640 &lcp.heuristics)) {
5641 VG_(gdb_printf) ("missing or malformed heuristics set\n");
5642 err++;
5643 }
5644 break;
5645 }
5646 case 7: /* increased */
5647 lcp.deltamode = LCD_Increased; break;
5648 case 8: /* changed */
5649 lcp.deltamode = LCD_Changed; break;
5650 case 9: /* any */
5651 lcp.deltamode = LCD_Any; break;
5652 case 10: /* unlimited */
5653 lcp.max_loss_records_output = 999999999; break;
5654 case 11: { /* limited */
5655 Int int_value;
5656 const HChar* endptr;
5657
5658 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5659 if (wcmd == NULL) {
5660 int_value = 0;
5661 endptr = "empty"; /* to report an error below */
5662 } else {
5663 HChar *the_end;
5664 int_value = VG_(strtoll10) (wcmd, &the_end);
5665 endptr = the_end;
5666 }
5667 if (*endptr != '\0')
5668 VG_(gdb_printf) ("missing or malformed integer value\n");
5669 else if (int_value > 0)
5670 lcp.max_loss_records_output = (UInt) int_value;
5671 else
5672 VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5673 int_value);
5674 break;
5675 }
5676 default:
5677 tl_assert (0);
5678 }
5679 }
5680 if (!err)
5681 MC_(detect_memory_leaks)(tid, &lcp);
5682 return True;
5683 }
5684
5685 case 3: { /* make_memory */
5686 Addr address;
5687 SizeT szB = 1;
5688 Int kwdid = VG_(keyword_id)
5689 ("noaccess undefined defined Definedifaddressable",
5690 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5691 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5692 return True;
5693 switch (kwdid) {
5694 case -2: break;
5695 case -1: break;
5696 case 0: MC_(make_mem_noaccess) (address, szB); break;
5697 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5698 MC_OKIND_USER ); break;
5699 case 2: MC_(make_mem_defined) ( address, szB ); break;
5700 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
5701 default: tl_assert(0);
5702 }
5703 return True;
5704 }
5705
5706 case 4: { /* check_memory */
5707 Addr address;
5708 SizeT szB = 1;
5709 Addr bad_addr;
5710 UInt okind;
5711 const HChar* src;
5712 UInt otag;
5713 UInt ecu;
5714 ExeContext* origin_ec;
5715 MC_ReadResult res;
5716
5717 Int kwdid = VG_(keyword_id)
5718 ("addressable defined",
5719 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5720 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5721 return True;
5722 switch (kwdid) {
5723 case -2: break;
5724 case -1: break;
5725 case 0: /* addressable */
5726 if (is_mem_addressable ( address, szB, &bad_addr ))
5727 VG_(printf) ("Address %p len %ld addressable\n",
5728 (void *)address, szB);
5729 else
5730 VG_(printf)
5731 ("Address %p len %ld not addressable:\nbad address %p\n",
5732 (void *)address, szB, (void *) bad_addr);
5733 MC_(pp_describe_addr) (address);
5734 break;
5735 case 1: /* defined */
5736 res = is_mem_defined ( address, szB, &bad_addr, &otag );
5737 if (MC_AddrErr == res)
5738 VG_(printf)
5739 ("Address %p len %ld not addressable:\nbad address %p\n",
5740 (void *)address, szB, (void *) bad_addr);
5741 else if (MC_ValueErr == res) {
5742 okind = otag & 3;
5743 switch (okind) {
5744 case MC_OKIND_STACK:
5745 src = " was created by a stack allocation"; break;
5746 case MC_OKIND_HEAP:
5747 src = " was created by a heap allocation"; break;
5748 case MC_OKIND_USER:
5749 src = " was created by a client request"; break;
5750 case MC_OKIND_UNKNOWN:
5751 src = ""; break;
5752 default: tl_assert(0);
5753 }
5754 VG_(printf)
5755 ("Address %p len %ld not defined:\n"
5756 "Uninitialised value at %p%s\n",
5757 (void *)address, szB, (void *) bad_addr, src);
5758 ecu = otag & ~3;
5759 if (VG_(is_plausible_ECU)(ecu)) {
5760 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5761 VG_(pp_ExeContext)( origin_ec );
5762 }
5763 }
5764 else
5765 VG_(printf) ("Address %p len %ld defined\n",
5766 (void *)address, szB);
5767 MC_(pp_describe_addr) (address);
5768 break;
5769 default: tl_assert(0);
5770 }
5771 return True;
5772 }
5773
5774 case 5: { /* block_list */
5775 HChar* wl;
5776 HChar *endptr;
5777 UInt lr_nr = 0;
5778 wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5779 if (wl != NULL)
5780 lr_nr = VG_(strtoull10) (wl, &endptr);
5781 if (wl == NULL || *endptr != '\0') {
5782 VG_(gdb_printf) ("malformed or missing integer\n");
5783 } else {
5784 // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5785 if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5786 VG_(gdb_printf) ("invalid loss record nr\n");
5787 }
5788 return True;
5789 }
5790
5791 case 6: { /* who_points_at */
5792 Addr address;
5793 SizeT szB = 1;
5794
5795 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5796 return True;
5797 if (address == (Addr) 0) {
5798 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5799 return True;
5800 }
5801 MC_(who_points_at) (address, szB);
5802 return True;
5803 }
5804
5805 default:
5806 tl_assert(0);
5807 return False;
5808 }
5809 }
5810
5811 /*------------------------------------------------------------*/
5812 /*--- Client requests ---*/
5813 /*------------------------------------------------------------*/
5814
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5815 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5816 {
5817 Int i;
5818 Addr bad_addr;
5819
5820 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5821 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5822 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5823 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
5824 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
5825 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
5826 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
5827 && VG_USERREQ__MEMPOOL_FREE != arg[0]
5828 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
5829 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
5830 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
5831 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
5832 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
5833 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
5834 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
5835 return False;
5836
5837 switch (arg[0]) {
5838 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
5839 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5840 if (!ok)
5841 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5842 *ret = ok ? (UWord)NULL : bad_addr;
5843 break;
5844 }
5845
5846 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5847 Bool errorV = False;
5848 Addr bad_addrV = 0;
5849 UInt otagV = 0;
5850 Bool errorA = False;
5851 Addr bad_addrA = 0;
5852 is_mem_defined_comprehensive(
5853 arg[1], arg[2],
5854 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5855 );
5856 if (errorV) {
5857 MC_(record_user_error) ( tid, bad_addrV,
5858 /*isAddrErr*/False, otagV );
5859 }
5860 if (errorA) {
5861 MC_(record_user_error) ( tid, bad_addrA,
5862 /*isAddrErr*/True, 0 );
5863 }
5864 /* Return the lower of the two erring addresses, if any. */
5865 *ret = 0;
5866 if (errorV && !errorA) {
5867 *ret = bad_addrV;
5868 }
5869 if (!errorV && errorA) {
5870 *ret = bad_addrA;
5871 }
5872 if (errorV && errorA) {
5873 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5874 }
5875 break;
5876 }
5877
5878 case VG_USERREQ__DO_LEAK_CHECK: {
5879 LeakCheckParams lcp;
5880
5881 if (arg[1] == 0)
5882 lcp.mode = LC_Full;
5883 else if (arg[1] == 1)
5884 lcp.mode = LC_Summary;
5885 else {
5886 VG_(message)(Vg_UserMsg,
5887 "Warning: unknown memcheck leak search mode\n");
5888 lcp.mode = LC_Full;
5889 }
5890
5891 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
5892 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
5893 lcp.heuristics = MC_(clo_leak_check_heuristics);
5894
5895 if (arg[2] == 0)
5896 lcp.deltamode = LCD_Any;
5897 else if (arg[2] == 1)
5898 lcp.deltamode = LCD_Increased;
5899 else if (arg[2] == 2)
5900 lcp.deltamode = LCD_Changed;
5901 else {
5902 VG_(message)
5903 (Vg_UserMsg,
5904 "Warning: unknown memcheck leak search deltamode\n");
5905 lcp.deltamode = LCD_Any;
5906 }
5907 lcp.max_loss_records_output = 999999999;
5908 lcp.requested_by_monitor_command = False;
5909
5910 MC_(detect_memory_leaks)(tid, &lcp);
5911 *ret = 0; /* return value is meaningless */
5912 break;
5913 }
5914
5915 case VG_USERREQ__MAKE_MEM_NOACCESS:
5916 MC_(make_mem_noaccess) ( arg[1], arg[2] );
5917 *ret = -1;
5918 break;
5919
5920 case VG_USERREQ__MAKE_MEM_UNDEFINED:
5921 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5922 MC_OKIND_USER );
5923 *ret = -1;
5924 break;
5925
5926 case VG_USERREQ__MAKE_MEM_DEFINED:
5927 MC_(make_mem_defined) ( arg[1], arg[2] );
5928 *ret = -1;
5929 break;
5930
5931 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5932 make_mem_defined_if_addressable ( arg[1], arg[2] );
5933 *ret = -1;
5934 break;
5935
5936 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5937 if (arg[1] != 0 && arg[2] != 0) {
5938 i = alloc_client_block();
5939 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5940 cgbs[i].start = arg[1];
5941 cgbs[i].size = arg[2];
5942 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
5943 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5944 *ret = i;
5945 } else
5946 *ret = -1;
5947 break;
5948
5949 case VG_USERREQ__DISCARD: /* discard */
5950 if (cgbs == NULL
5951 || arg[2] >= cgb_used ||
5952 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5953 *ret = 1;
5954 } else {
5955 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5956 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5957 VG_(free)(cgbs[arg[2]].desc);
5958 cgb_discards++;
5959 *ret = 0;
5960 }
5961 break;
5962
5963 case VG_USERREQ__GET_VBITS:
5964 *ret = mc_get_or_set_vbits_for_client
5965 ( arg[1], arg[2], arg[3],
5966 False /* get them */,
5967 True /* is client request */ );
5968 break;
5969
5970 case VG_USERREQ__SET_VBITS:
5971 *ret = mc_get_or_set_vbits_for_client
5972 ( arg[1], arg[2], arg[3],
5973 True /* set them */,
5974 True /* is client request */ );
5975 break;
5976
5977 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5978 UWord** argp = (UWord**)arg;
5979 // MC_(bytes_leaked) et al were set by the last leak check (or zero
5980 // if no prior leak checks performed).
5981 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5982 *argp[2] = MC_(bytes_dubious);
5983 *argp[3] = MC_(bytes_reachable);
5984 *argp[4] = MC_(bytes_suppressed);
5985 // there is no argp[5]
5986 //*argp[5] = MC_(bytes_indirect);
5987 // XXX need to make *argp[1-4] defined; currently done in the
5988 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5989 *ret = 0;
5990 return True;
5991 }
5992 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5993 UWord** argp = (UWord**)arg;
5994 // MC_(blocks_leaked) et al were set by the last leak check (or zero
5995 // if no prior leak checks performed).
5996 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5997 *argp[2] = MC_(blocks_dubious);
5998 *argp[3] = MC_(blocks_reachable);
5999 *argp[4] = MC_(blocks_suppressed);
6000 // there is no argp[5]
6001 //*argp[5] = MC_(blocks_indirect);
6002 // XXX need to make *argp[1-4] defined; currently done in the
6003 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6004 *ret = 0;
6005 return True;
6006 }
6007 case VG_USERREQ__MALLOCLIKE_BLOCK: {
6008 Addr p = (Addr)arg[1];
6009 SizeT sizeB = arg[2];
6010 UInt rzB = arg[3];
6011 Bool is_zeroed = (Bool)arg[4];
6012
6013 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6014 MC_AllocCustom, MC_(malloc_list) );
6015 if (rzB > 0) {
6016 MC_(make_mem_noaccess) ( p - rzB, rzB);
6017 MC_(make_mem_noaccess) ( p + sizeB, rzB);
6018 }
6019 return True;
6020 }
6021 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6022 Addr p = (Addr)arg[1];
6023 SizeT oldSizeB = arg[2];
6024 SizeT newSizeB = arg[3];
6025 UInt rzB = arg[4];
6026
6027 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6028 return True;
6029 }
6030 case VG_USERREQ__FREELIKE_BLOCK: {
6031 Addr p = (Addr)arg[1];
6032 UInt rzB = arg[2];
6033
6034 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6035 return True;
6036 }
6037
6038 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6039 HChar* s = (HChar*)arg[1];
6040 Addr dst = (Addr) arg[2];
6041 Addr src = (Addr) arg[3];
6042 SizeT len = (SizeT)arg[4];
6043 MC_(record_overlap_error)(tid, s, src, dst, len);
6044 return True;
6045 }
6046
6047 case VG_USERREQ__CREATE_MEMPOOL: {
6048 Addr pool = (Addr)arg[1];
6049 UInt rzB = arg[2];
6050 Bool is_zeroed = (Bool)arg[3];
6051
6052 MC_(create_mempool) ( pool, rzB, is_zeroed );
6053 return True;
6054 }
6055
6056 case VG_USERREQ__DESTROY_MEMPOOL: {
6057 Addr pool = (Addr)arg[1];
6058
6059 MC_(destroy_mempool) ( pool );
6060 return True;
6061 }
6062
6063 case VG_USERREQ__MEMPOOL_ALLOC: {
6064 Addr pool = (Addr)arg[1];
6065 Addr addr = (Addr)arg[2];
6066 UInt size = arg[3];
6067
6068 MC_(mempool_alloc) ( tid, pool, addr, size );
6069 return True;
6070 }
6071
6072 case VG_USERREQ__MEMPOOL_FREE: {
6073 Addr pool = (Addr)arg[1];
6074 Addr addr = (Addr)arg[2];
6075
6076 MC_(mempool_free) ( pool, addr );
6077 return True;
6078 }
6079
6080 case VG_USERREQ__MEMPOOL_TRIM: {
6081 Addr pool = (Addr)arg[1];
6082 Addr addr = (Addr)arg[2];
6083 UInt size = arg[3];
6084
6085 MC_(mempool_trim) ( pool, addr, size );
6086 return True;
6087 }
6088
6089 case VG_USERREQ__MOVE_MEMPOOL: {
6090 Addr poolA = (Addr)arg[1];
6091 Addr poolB = (Addr)arg[2];
6092
6093 MC_(move_mempool) ( poolA, poolB );
6094 return True;
6095 }
6096
6097 case VG_USERREQ__MEMPOOL_CHANGE: {
6098 Addr pool = (Addr)arg[1];
6099 Addr addrA = (Addr)arg[2];
6100 Addr addrB = (Addr)arg[3];
6101 UInt size = arg[4];
6102
6103 MC_(mempool_change) ( pool, addrA, addrB, size );
6104 return True;
6105 }
6106
6107 case VG_USERREQ__MEMPOOL_EXISTS: {
6108 Addr pool = (Addr)arg[1];
6109
6110 *ret = (UWord) MC_(mempool_exists) ( pool );
6111 return True;
6112 }
6113
6114 case VG_USERREQ__GDB_MONITOR_COMMAND: {
6115 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6116 if (handled)
6117 *ret = 1;
6118 else
6119 *ret = 0;
6120 return handled;
6121 }
6122
6123 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6124 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6125 Bool addRange
6126 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6127 Bool ok
6128 = modify_ignore_ranges(addRange, arg[1], arg[2]);
6129 *ret = ok ? 1 : 0;
6130 return True;
6131 }
6132
6133 default:
6134 VG_(message)(
6135 Vg_UserMsg,
6136 "Warning: unknown memcheck client request code %llx\n",
6137 (ULong)arg[0]
6138 );
6139 return False;
6140 }
6141 return True;
6142 }
6143
6144
6145 /*------------------------------------------------------------*/
6146 /*--- Crude profiling machinery. ---*/
6147 /*------------------------------------------------------------*/
6148
6149 // We track a number of interesting events (using PROF_EVENT)
6150 // if MC_PROFILE_MEMORY is defined.
6151
6152 #ifdef MC_PROFILE_MEMORY
6153
6154 UInt MC_(event_ctr)[N_PROF_EVENTS];
6155 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
6156
init_prof_mem(void)6157 static void init_prof_mem ( void )
6158 {
6159 Int i;
6160 for (i = 0; i < N_PROF_EVENTS; i++) {
6161 MC_(event_ctr)[i] = 0;
6162 MC_(event_ctr_name)[i] = NULL;
6163 }
6164 }
6165
done_prof_mem(void)6166 static void done_prof_mem ( void )
6167 {
6168 Int i;
6169 Bool spaced = False;
6170 for (i = 0; i < N_PROF_EVENTS; i++) {
6171 if (!spaced && (i % 10) == 0) {
6172 VG_(printf)("\n");
6173 spaced = True;
6174 }
6175 if (MC_(event_ctr)[i] > 0) {
6176 spaced = False;
6177 VG_(printf)( "prof mem event %3d: %9d %s\n",
6178 i, MC_(event_ctr)[i],
6179 MC_(event_ctr_name)[i]
6180 ? MC_(event_ctr_name)[i] : "unnamed");
6181 }
6182 }
6183 }
6184
6185 #else
6186
init_prof_mem(void)6187 static void init_prof_mem ( void ) { }
done_prof_mem(void)6188 static void done_prof_mem ( void ) { }
6189
6190 #endif
6191
6192
6193 /*------------------------------------------------------------*/
6194 /*--- Origin tracking stuff ---*/
6195 /*------------------------------------------------------------*/
6196
6197 /*--------------------------------------------*/
6198 /*--- Origin tracking: load handlers ---*/
6199 /*--------------------------------------------*/
6200
merge_origins(UInt or1,UInt or2)6201 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
6202 return or1 > or2 ? or1 : or2;
6203 }
6204
MC_(helperc_b_load1)6205 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
6206 OCacheLine* line;
6207 UChar descr;
6208 UWord lineoff = oc_line_offset(a);
6209 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6210
6211 if (OC_ENABLE_ASSERTIONS) {
6212 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6213 }
6214
6215 line = find_OCacheLine( a );
6216
6217 descr = line->descr[lineoff];
6218 if (OC_ENABLE_ASSERTIONS) {
6219 tl_assert(descr < 0x10);
6220 }
6221
6222 if (LIKELY(0 == (descr & (1 << byteoff)))) {
6223 return 0;
6224 } else {
6225 return line->w32[lineoff];
6226 }
6227 }
6228
MC_(helperc_b_load2)6229 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
6230 OCacheLine* line;
6231 UChar descr;
6232 UWord lineoff, byteoff;
6233
6234 if (UNLIKELY(a & 1)) {
6235 /* Handle misaligned case, slowly. */
6236 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
6237 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
6238 return merge_origins(oLo, oHi);
6239 }
6240
6241 lineoff = oc_line_offset(a);
6242 byteoff = a & 3; /* 0 or 2 */
6243
6244 if (OC_ENABLE_ASSERTIONS) {
6245 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6246 }
6247 line = find_OCacheLine( a );
6248
6249 descr = line->descr[lineoff];
6250 if (OC_ENABLE_ASSERTIONS) {
6251 tl_assert(descr < 0x10);
6252 }
6253
6254 if (LIKELY(0 == (descr & (3 << byteoff)))) {
6255 return 0;
6256 } else {
6257 return line->w32[lineoff];
6258 }
6259 }
6260
MC_(helperc_b_load4)6261 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
6262 OCacheLine* line;
6263 UChar descr;
6264 UWord lineoff;
6265
6266 if (UNLIKELY(a & 3)) {
6267 /* Handle misaligned case, slowly. */
6268 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
6269 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
6270 return merge_origins(oLo, oHi);
6271 }
6272
6273 lineoff = oc_line_offset(a);
6274 if (OC_ENABLE_ASSERTIONS) {
6275 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6276 }
6277
6278 line = find_OCacheLine( a );
6279
6280 descr = line->descr[lineoff];
6281 if (OC_ENABLE_ASSERTIONS) {
6282 tl_assert(descr < 0x10);
6283 }
6284
6285 if (LIKELY(0 == descr)) {
6286 return 0;
6287 } else {
6288 return line->w32[lineoff];
6289 }
6290 }
6291
MC_(helperc_b_load8)6292 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
6293 OCacheLine* line;
6294 UChar descrLo, descrHi, descr;
6295 UWord lineoff;
6296
6297 if (UNLIKELY(a & 7)) {
6298 /* Handle misaligned case, slowly. */
6299 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
6300 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
6301 return merge_origins(oLo, oHi);
6302 }
6303
6304 lineoff = oc_line_offset(a);
6305 if (OC_ENABLE_ASSERTIONS) {
6306 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6307 }
6308
6309 line = find_OCacheLine( a );
6310
6311 descrLo = line->descr[lineoff + 0];
6312 descrHi = line->descr[lineoff + 1];
6313 descr = descrLo | descrHi;
6314 if (OC_ENABLE_ASSERTIONS) {
6315 tl_assert(descr < 0x10);
6316 }
6317
6318 if (LIKELY(0 == descr)) {
6319 return 0; /* both 32-bit chunks are defined */
6320 } else {
6321 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
6322 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
6323 return merge_origins(oLo, oHi);
6324 }
6325 }
6326
MC_(helperc_b_load16)6327 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
6328 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
6329 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
6330 UInt oBoth = merge_origins(oLo, oHi);
6331 return (UWord)oBoth;
6332 }
6333
MC_(helperc_b_load32)6334 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
6335 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
6336 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
6337 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
6338 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
6339 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
6340 merge_origins(oQ2, oQ3));
6341 return (UWord)oAll;
6342 }
6343
6344
6345 /*--------------------------------------------*/
6346 /*--- Origin tracking: store handlers ---*/
6347 /*--------------------------------------------*/
6348
MC_(helperc_b_store1)6349 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
6350 OCacheLine* line;
6351 UWord lineoff = oc_line_offset(a);
6352 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6353
6354 if (OC_ENABLE_ASSERTIONS) {
6355 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6356 }
6357
6358 line = find_OCacheLine( a );
6359
6360 if (d32 == 0) {
6361 line->descr[lineoff] &= ~(1 << byteoff);
6362 } else {
6363 line->descr[lineoff] |= (1 << byteoff);
6364 line->w32[lineoff] = d32;
6365 }
6366 }
6367
MC_(helperc_b_store2)6368 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
6369 OCacheLine* line;
6370 UWord lineoff, byteoff;
6371
6372 if (UNLIKELY(a & 1)) {
6373 /* Handle misaligned case, slowly. */
6374 MC_(helperc_b_store1)( a + 0, d32 );
6375 MC_(helperc_b_store1)( a + 1, d32 );
6376 return;
6377 }
6378
6379 lineoff = oc_line_offset(a);
6380 byteoff = a & 3; /* 0 or 2 */
6381
6382 if (OC_ENABLE_ASSERTIONS) {
6383 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6384 }
6385
6386 line = find_OCacheLine( a );
6387
6388 if (d32 == 0) {
6389 line->descr[lineoff] &= ~(3 << byteoff);
6390 } else {
6391 line->descr[lineoff] |= (3 << byteoff);
6392 line->w32[lineoff] = d32;
6393 }
6394 }
6395
MC_(helperc_b_store4)6396 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
6397 OCacheLine* line;
6398 UWord lineoff;
6399
6400 if (UNLIKELY(a & 3)) {
6401 /* Handle misaligned case, slowly. */
6402 MC_(helperc_b_store2)( a + 0, d32 );
6403 MC_(helperc_b_store2)( a + 2, d32 );
6404 return;
6405 }
6406
6407 lineoff = oc_line_offset(a);
6408 if (OC_ENABLE_ASSERTIONS) {
6409 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6410 }
6411
6412 line = find_OCacheLine( a );
6413
6414 if (d32 == 0) {
6415 line->descr[lineoff] = 0;
6416 } else {
6417 line->descr[lineoff] = 0xF;
6418 line->w32[lineoff] = d32;
6419 }
6420 }
6421
MC_(helperc_b_store8)6422 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
6423 OCacheLine* line;
6424 UWord lineoff;
6425
6426 if (UNLIKELY(a & 7)) {
6427 /* Handle misaligned case, slowly. */
6428 MC_(helperc_b_store4)( a + 0, d32 );
6429 MC_(helperc_b_store4)( a + 4, d32 );
6430 return;
6431 }
6432
6433 lineoff = oc_line_offset(a);
6434 if (OC_ENABLE_ASSERTIONS) {
6435 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6436 }
6437
6438 line = find_OCacheLine( a );
6439
6440 if (d32 == 0) {
6441 line->descr[lineoff + 0] = 0;
6442 line->descr[lineoff + 1] = 0;
6443 } else {
6444 line->descr[lineoff + 0] = 0xF;
6445 line->descr[lineoff + 1] = 0xF;
6446 line->w32[lineoff + 0] = d32;
6447 line->w32[lineoff + 1] = d32;
6448 }
6449 }
6450
MC_(helperc_b_store16)6451 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
6452 MC_(helperc_b_store8)( a + 0, d32 );
6453 MC_(helperc_b_store8)( a + 8, d32 );
6454 }
6455
MC_(helperc_b_store32)6456 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
6457 MC_(helperc_b_store8)( a + 0, d32 );
6458 MC_(helperc_b_store8)( a + 8, d32 );
6459 MC_(helperc_b_store8)( a + 16, d32 );
6460 MC_(helperc_b_store8)( a + 24, d32 );
6461 }
6462
6463
6464 /*--------------------------------------------*/
6465 /*--- Origin tracking: sarp handlers ---*/
6466 /*--------------------------------------------*/
6467
6468 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)6469 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6470 if ((a & 1) && len >= 1) {
6471 MC_(helperc_b_store1)( a, otag );
6472 a++;
6473 len--;
6474 }
6475 if ((a & 2) && len >= 2) {
6476 MC_(helperc_b_store2)( a, otag );
6477 a += 2;
6478 len -= 2;
6479 }
6480 if (len >= 4)
6481 tl_assert(0 == (a & 3));
6482 while (len >= 4) {
6483 MC_(helperc_b_store4)( a, otag );
6484 a += 4;
6485 len -= 4;
6486 }
6487 if (len >= 2) {
6488 MC_(helperc_b_store2)( a, otag );
6489 a += 2;
6490 len -= 2;
6491 }
6492 if (len >= 1) {
6493 MC_(helperc_b_store1)( a, otag );
6494 //a++;
6495 len--;
6496 }
6497 tl_assert(len == 0);
6498 }
6499
6500 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)6501 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6502 if ((a & 1) && len >= 1) {
6503 MC_(helperc_b_store1)( a, 0 );
6504 a++;
6505 len--;
6506 }
6507 if ((a & 2) && len >= 2) {
6508 MC_(helperc_b_store2)( a, 0 );
6509 a += 2;
6510 len -= 2;
6511 }
6512 if (len >= 4)
6513 tl_assert(0 == (a & 3));
6514 while (len >= 4) {
6515 MC_(helperc_b_store4)( a, 0 );
6516 a += 4;
6517 len -= 4;
6518 }
6519 if (len >= 2) {
6520 MC_(helperc_b_store2)( a, 0 );
6521 a += 2;
6522 len -= 2;
6523 }
6524 if (len >= 1) {
6525 MC_(helperc_b_store1)( a, 0 );
6526 //a++;
6527 len--;
6528 }
6529 tl_assert(len == 0);
6530 }
6531
6532
6533 /*------------------------------------------------------------*/
6534 /*--- Setup and finalisation ---*/
6535 /*------------------------------------------------------------*/
6536
mc_post_clo_init(void)6537 static void mc_post_clo_init ( void )
6538 {
6539 /* If we've been asked to emit XML, mash around various other
6540 options so as to constrain the output somewhat. */
6541 if (VG_(clo_xml)) {
6542 /* Extract as much info as possible from the leak checker. */
6543 MC_(clo_leak_check) = LC_Full;
6544 }
6545
6546 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6547 VG_(message)(Vg_UserMsg,
6548 "Warning: --freelist-big-blocks value %lld has no effect\n"
6549 "as it is >= to --freelist-vol value %lld\n",
6550 MC_(clo_freelist_big_blocks),
6551 MC_(clo_freelist_vol));
6552
6553 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6554
6555 if (MC_(clo_mc_level) == 3) {
6556 /* We're doing origin tracking. */
6557 # ifdef PERF_FAST_STACK
6558 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
6559 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
6560 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
6561 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
6562 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
6563 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6564 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6565 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6566 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6567 # endif
6568 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
6569 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
6570 } else {
6571 /* Not doing origin tracking */
6572 # ifdef PERF_FAST_STACK
6573 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
6574 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
6575 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
6576 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
6577 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
6578 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6579 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6580 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6581 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6582 # endif
6583 VG_(track_new_mem_stack) ( mc_new_mem_stack );
6584 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
6585 }
6586
6587 // We assume that brk()/sbrk() does not initialise new memory. Is this
6588 // accurate? John Reiser says:
6589 //
6590 // 0) sbrk() can *decrease* process address space. No zero fill is done
6591 // for a decrease, not even the fragment on the high end of the last page
6592 // that is beyond the new highest address. For maximum safety and
6593 // portability, then the bytes in the last page that reside above [the
6594 // new] sbrk(0) should be considered to be uninitialized, but in practice
6595 // it is exceedingly likely that they will retain their previous
6596 // contents.
6597 //
6598 // 1) If an increase is large enough to require new whole pages, then
6599 // those new whole pages (like all new pages) are zero-filled by the
6600 // operating system. So if sbrk(0) already is page aligned, then
6601 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6602 //
6603 // 2) Any increase that lies within an existing allocated page is not
6604 // changed. So if (x = sbrk(0)) is not page aligned, then
6605 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6606 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6607 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6608 // of them come along for the ride because the operating system deals
6609 // only in whole pages. Again, for maximum safety and portability, then
6610 // anything that lives above [the new] sbrk(0) should be considered
6611 // uninitialized, but in practice will retain previous contents [zero in
6612 // this case.]"
6613 //
6614 // In short:
6615 //
6616 // A key property of sbrk/brk is that new whole pages that are supplied
6617 // by the operating system *do* get initialized to zero.
6618 //
6619 // As for the portability of all this:
6620 //
6621 // sbrk and brk are not POSIX. However, any system that is a derivative
6622 // of *nix has sbrk and brk because there are too many softwares (such as
6623 // the Bourne shell) which rely on the traditional memory map (.text,
6624 // .data+.bss, stack) and the existence of sbrk/brk.
6625 //
6626 // So we should arguably observe all this. However:
6627 // - The current inaccuracy has caused maybe one complaint in seven years(?)
6628 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6629 // doubt most programmers know the above information.
6630 // So I'm not terribly unhappy with marking it as undefined. --njn.
6631 //
6632 // [More: I think most of what John said only applies to sbrk(). It seems
6633 // that brk() always deals in whole pages. And since this event deals
6634 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6635 // just mark all memory it allocates as defined.]
6636 //
6637 if (MC_(clo_mc_level) == 3)
6638 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
6639 else
6640 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
6641
6642 /* This origin tracking cache is huge (~100M), so only initialise
6643 if we need it. */
6644 if (MC_(clo_mc_level) >= 3) {
6645 init_OCache();
6646 tl_assert(ocacheL1 != NULL);
6647 tl_assert(ocacheL2 != NULL);
6648 } else {
6649 tl_assert(ocacheL1 == NULL);
6650 tl_assert(ocacheL2 == NULL);
6651 }
6652
6653 MC_(chunk_poolalloc) = VG_(newPA)
6654 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
6655 1000,
6656 VG_(malloc),
6657 "mc.cMC.1 (MC_Chunk pools)",
6658 VG_(free));
6659
6660 /* Do not check definedness of guest state if --undef-value-errors=no */
6661 if (MC_(clo_mc_level) >= 2)
6662 VG_(track_pre_reg_read) ( mc_pre_reg_read );
6663 }
6664
print_SM_info(const HChar * type,Int n_SMs)6665 static void print_SM_info(const HChar* type, Int n_SMs)
6666 {
6667 VG_(message)(Vg_DebugMsg,
6668 " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6669 type,
6670 n_SMs,
6671 n_SMs * sizeof(SecMap) / 1024UL,
6672 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6673 }
6674
mc_print_stats(void)6675 static void mc_print_stats (void)
6676 {
6677 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6678
6679 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
6680 VG_(free_queue_volume), VG_(free_queue_length));
6681 VG_(message)(Vg_DebugMsg,
6682 " memcheck: sanity checks: %d cheap, %d expensive\n",
6683 n_sanity_cheap, n_sanity_expensive );
6684 VG_(message)(Vg_DebugMsg,
6685 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6686 n_auxmap_L2_nodes,
6687 n_auxmap_L2_nodes * 64,
6688 n_auxmap_L2_nodes / 16 );
6689 VG_(message)(Vg_DebugMsg,
6690 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6691 n_auxmap_L1_searches, n_auxmap_L1_cmps,
6692 (10ULL * n_auxmap_L1_cmps)
6693 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6694 );
6695 VG_(message)(Vg_DebugMsg,
6696 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6697 n_auxmap_L2_searches, n_auxmap_L2_nodes
6698 );
6699
6700 print_SM_info("n_issued ", n_issued_SMs);
6701 print_SM_info("n_deissued ", n_deissued_SMs);
6702 print_SM_info("max_noaccess ", max_noaccess_SMs);
6703 print_SM_info("max_undefined", max_undefined_SMs);
6704 print_SM_info("max_defined ", max_defined_SMs);
6705 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
6706
6707 // Three DSMs, plus the non-DSM ones
6708 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6709 // The 3*sizeof(Word) bytes is the AVL node metadata size.
6710 // The VG_ROUNDUP is because the OSet pool allocator will/must align
6711 // the elements on pointer size.
6712 // Note that the pool allocator has some additional small overhead
6713 // which is not counted in the below.
6714 // Hardwiring this logic sucks, but I don't see how else to do it.
6715 max_secVBit_szB = max_secVBit_nodes *
6716 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6717 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6718
6719 VG_(message)(Vg_DebugMsg,
6720 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n",
6721 max_secVBit_nodes, max_secVBit_szB / 1024,
6722 max_secVBit_szB / (1024 * 1024));
6723 VG_(message)(Vg_DebugMsg,
6724 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6725 sec_vbits_new_nodes + sec_vbits_updates,
6726 sec_vbits_new_nodes, sec_vbits_updates );
6727 VG_(message)(Vg_DebugMsg,
6728 " memcheck: max shadow mem size: %ldk, %ldM\n",
6729 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6730
6731 if (MC_(clo_mc_level) >= 3) {
6732 VG_(message)(Vg_DebugMsg,
6733 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
6734 stats_ocacheL1_find,
6735 stats_ocacheL1_misses,
6736 stats_ocacheL1_lossage );
6737 VG_(message)(Vg_DebugMsg,
6738 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
6739 stats_ocacheL1_find - stats_ocacheL1_misses
6740 - stats_ocacheL1_found_at_1
6741 - stats_ocacheL1_found_at_N,
6742 stats_ocacheL1_found_at_1 );
6743 VG_(message)(Vg_DebugMsg,
6744 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
6745 stats_ocacheL1_found_at_N,
6746 stats_ocacheL1_movefwds );
6747 VG_(message)(Vg_DebugMsg,
6748 " ocacheL1: %'12lu sizeB %'12u useful\n",
6749 (UWord)sizeof(OCache),
6750 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6751 VG_(message)(Vg_DebugMsg,
6752 " ocacheL2: %'12lu refs %'12lu misses\n",
6753 stats__ocacheL2_refs,
6754 stats__ocacheL2_misses );
6755 VG_(message)(Vg_DebugMsg,
6756 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
6757 stats__ocacheL2_n_nodes_max,
6758 stats__ocacheL2_n_nodes );
6759 VG_(message)(Vg_DebugMsg,
6760 " niacache: %'12lu refs %'12lu misses\n",
6761 stats__nia_cache_queries, stats__nia_cache_misses);
6762 } else {
6763 tl_assert(ocacheL1 == NULL);
6764 tl_assert(ocacheL2 == NULL);
6765 }
6766 }
6767
6768
mc_fini(Int exitcode)6769 static void mc_fini ( Int exitcode )
6770 {
6771 MC_(print_malloc_stats)();
6772
6773 if (MC_(clo_leak_check) != LC_Off) {
6774 LeakCheckParams lcp;
6775 lcp.mode = MC_(clo_leak_check);
6776 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6777 lcp.heuristics = MC_(clo_leak_check_heuristics);
6778 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6779 lcp.deltamode = LCD_Any;
6780 lcp.max_loss_records_output = 999999999;
6781 lcp.requested_by_monitor_command = False;
6782 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6783 } else {
6784 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6785 VG_(umsg)(
6786 "For a detailed leak analysis, rerun with: --leak-check=full\n"
6787 "\n"
6788 );
6789 }
6790 }
6791
6792 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6793 VG_(message)(Vg_UserMsg,
6794 "For counts of detected and suppressed errors, rerun with: -v\n");
6795 }
6796
6797 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6798 && MC_(clo_mc_level) == 2) {
6799 VG_(message)(Vg_UserMsg,
6800 "Use --track-origins=yes to see where "
6801 "uninitialised values come from\n");
6802 }
6803
6804 /* Print a warning if any client-request generated ignore-ranges
6805 still exist. It would be reasonable to expect that a properly
6806 written program would remove any such ranges before exiting, and
6807 since they are a bit on the dangerous side, let's comment. By
6808 contrast ranges which are specified on the command line normally
6809 pertain to hardware mapped into the address space, and so we
6810 can't expect the client to have got rid of them. */
6811 if (gIgnoredAddressRanges) {
6812 Word i, nBad = 0;
6813 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6814 UWord val = IAR_INVALID;
6815 UWord key_min = ~(UWord)0;
6816 UWord key_max = (UWord)0;
6817 VG_(indexRangeMap)( &key_min, &key_max, &val,
6818 gIgnoredAddressRanges, i );
6819 if (val != IAR_ClientReq)
6820 continue;
6821 /* Print the offending range. Also, if it is the first,
6822 print a banner before it. */
6823 nBad++;
6824 if (nBad == 1) {
6825 VG_(umsg)(
6826 "WARNING: exiting program has the following client-requested\n"
6827 "WARNING: address error disablement range(s) still in force,\n"
6828 "WARNING: "
6829 "possibly as a result of some mistake in the use of the\n"
6830 "WARNING: "
6831 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
6832 );
6833 }
6834 VG_(umsg)(" [%ld] 0x%016llx-0x%016llx %s\n",
6835 i, (ULong)key_min, (ULong)key_max, showIARKind(val));
6836 }
6837 }
6838
6839 done_prof_mem();
6840
6841 if (VG_(clo_stats))
6842 mc_print_stats();
6843
6844 if (0) {
6845 VG_(message)(Vg_DebugMsg,
6846 "------ Valgrind's client block stats follow ---------------\n" );
6847 show_client_block_stats();
6848 }
6849 }
6850
6851 /* mark the given addr/len unaddressable for watchpoint implementation
6852 The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6853 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6854 Addr addr, SizeT len)
6855 {
6856 /* GDBTD this is somewhat fishy. We might rather have to save the previous
6857 accessibility and definedness in gdbserver so as to allow restoring it
6858 properly. Currently, we assume that the user only watches things
6859 which are properly addressable and defined */
6860 if (insert)
6861 MC_(make_mem_noaccess) (addr, len);
6862 else
6863 MC_(make_mem_defined) (addr, len);
6864 return True;
6865 }
6866
mc_pre_clo_init(void)6867 static void mc_pre_clo_init(void)
6868 {
6869 VG_(details_name) ("Memcheck");
6870 VG_(details_version) (NULL);
6871 VG_(details_description) ("a memory error detector");
6872 VG_(details_copyright_author)(
6873 "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
6874 VG_(details_bug_reports_to) (VG_BUGS_TO);
6875 VG_(details_avg_translation_sizeB) ( 640 );
6876
6877 VG_(basic_tool_funcs) (mc_post_clo_init,
6878 MC_(instrument),
6879 mc_fini);
6880
6881 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
6882
6883
6884 VG_(needs_core_errors) ();
6885 VG_(needs_tool_errors) (MC_(eq_Error),
6886 MC_(before_pp_Error),
6887 MC_(pp_Error),
6888 True,/*show TIDs for errors*/
6889 MC_(update_Error_extra),
6890 MC_(is_recognised_suppression),
6891 MC_(read_extra_suppression_info),
6892 MC_(error_matches_suppression),
6893 MC_(get_error_name),
6894 MC_(get_extra_suppression_info),
6895 MC_(print_extra_suppression_use),
6896 MC_(update_extra_suppression_use));
6897 VG_(needs_libc_freeres) ();
6898 VG_(needs_command_line_options)(mc_process_cmd_line_options,
6899 mc_print_usage,
6900 mc_print_debug_usage);
6901 VG_(needs_client_requests) (mc_handle_client_request);
6902 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
6903 mc_expensive_sanity_check);
6904 VG_(needs_print_stats) (mc_print_stats);
6905 VG_(needs_info_location) (MC_(pp_describe_addr));
6906 VG_(needs_malloc_replacement) (MC_(malloc),
6907 MC_(__builtin_new),
6908 MC_(__builtin_vec_new),
6909 MC_(memalign),
6910 MC_(calloc),
6911 MC_(free),
6912 MC_(__builtin_delete),
6913 MC_(__builtin_vec_delete),
6914 MC_(realloc),
6915 MC_(malloc_usable_size),
6916 MC_MALLOC_DEFAULT_REDZONE_SZB );
6917 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6918
6919 VG_(needs_xml_output) ();
6920
6921 VG_(track_new_mem_startup) ( mc_new_mem_startup );
6922
6923 // Handling of mmap and mprotect isn't simple (well, it is simple,
6924 // but the justification isn't.) See comments above, just prior to
6925 // mc_new_mem_mmap.
6926 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
6927 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6928
6929 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
6930
6931 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6932 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
6933 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
6934
6935 /* Defer the specification of the new_mem_stack functions to the
6936 post_clo_init function, since we need to first parse the command
6937 line before deciding which set to use. */
6938
6939 # ifdef PERF_FAST_STACK
6940 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
6941 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
6942 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
6943 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
6944 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
6945 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
6946 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
6947 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
6948 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
6949 # endif
6950 VG_(track_die_mem_stack) ( mc_die_mem_stack );
6951
6952 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
6953
6954 VG_(track_pre_mem_read) ( check_mem_is_defined );
6955 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6956 VG_(track_pre_mem_write) ( check_mem_is_addressable );
6957 VG_(track_post_mem_write) ( mc_post_mem_write );
6958
6959 VG_(track_post_reg_write) ( mc_post_reg_write );
6960 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6961
6962 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
6963
6964 init_shadow_memory();
6965 // MC_(chunk_poolalloc) must be allocated in post_clo_init
6966 tl_assert(MC_(chunk_poolalloc) == NULL);
6967 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
6968 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6969 init_prof_mem();
6970
6971 tl_assert( mc_expensive_sanity_check() );
6972
6973 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6974 tl_assert(sizeof(UWord) == sizeof(Addr));
6975 // Call me paranoid. I don't care.
6976 tl_assert(sizeof(void*) == sizeof(Addr));
6977
6978 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6979 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6980
6981 /* This is small. Always initialise it. */
6982 init_nia_to_ecu_cache();
6983
6984 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6985 if we need to, since the command line args haven't been
6986 processed yet. Hence defer it to mc_post_clo_init. */
6987 tl_assert(ocacheL1 == NULL);
6988 tl_assert(ocacheL2 == NULL);
6989
6990 /* Check some important stuff. See extensive comments above
6991 re UNALIGNED_OR_HIGH for background. */
6992 # if VG_WORDSIZE == 4
6993 tl_assert(sizeof(void*) == 4);
6994 tl_assert(sizeof(Addr) == 4);
6995 tl_assert(sizeof(UWord) == 4);
6996 tl_assert(sizeof(Word) == 4);
6997 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6998 tl_assert(MASK(1) == 0UL);
6999 tl_assert(MASK(2) == 1UL);
7000 tl_assert(MASK(4) == 3UL);
7001 tl_assert(MASK(8) == 7UL);
7002 # else
7003 tl_assert(VG_WORDSIZE == 8);
7004 tl_assert(sizeof(void*) == 8);
7005 tl_assert(sizeof(Addr) == 8);
7006 tl_assert(sizeof(UWord) == 8);
7007 tl_assert(sizeof(Word) == 8);
7008 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7009 tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7010 tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7011 tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7012 tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7013 # endif
7014 }
7015
7016 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7017
7018 /*--------------------------------------------------------------------*/
7019 /*--- end mc_main.c ---*/
7020 /*--------------------------------------------------------------------*/
7021