1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
8
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
12
13 Copyright (C) 2000-2015 Julian Seward
14 jseward@acm.org
15
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32 */
33
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50
51 #include "mc_include.h"
52 #include "memcheck.h" /* for client requests */
53
54
55 /* Set to 1 to enable handwritten assembly helpers on targets for
56 which it is supported. */
57 #define ENABLE_ASSEMBLY_HELPERS 1
58
59 /* Set to 1 to do a little more sanity checking */
60 #define VG_DEBUG_MEMORY 0
61
62 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
63
64 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
65 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
66
67
68 /*------------------------------------------------------------*/
69 /*--- Fast-case knobs ---*/
70 /*------------------------------------------------------------*/
71
72 // Comment these out to disable the fast cases (don't just set them to zero).
73
74 #define PERF_FAST_LOADV 1
75 #define PERF_FAST_STOREV 1
76
77 #define PERF_FAST_SARP 1
78
79 #define PERF_FAST_STACK 1
80 #define PERF_FAST_STACK2 1
81
82 /* Change this to 1 to enable assertions on origin tracking cache fast
83 paths */
84 #define OC_ENABLE_ASSERTIONS 0
85
86
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
90
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
94
95
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
99
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
106
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109 for memory.
110
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
113 */
114
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
118
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
121
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
130 bytes.
131
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
143
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
151
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
156
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
162 seem worth it.
163 */
164
165 /* --------------- Basic configuration --------------- */
166
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
168
169 #if VG_WORDSIZE == 4
170
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
173
174 #else
175
176 /* Just handle the first 64G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 20
180
181 #endif
182
183
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
186
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
189
190
191 /* --------------- Secondary maps --------------- */
192
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
195 //
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
199 //
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
204 //
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // bits.
208 //
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
215 //
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
218 //
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
223 //
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
229 //
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
235 // few places.
236
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
242
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
247
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
252
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
257
258
259 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
260 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
261 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
262
263 // Paranoia: it's critical for performance that the requested inlining
264 // occurs. So try extra hard.
265 #define INLINE inline __attribute__((always_inline))
266
start_of_this_sm(Addr a)267 static INLINE Addr start_of_this_sm ( Addr a ) {
268 return (a & (~SM_MASK));
269 }
is_start_of_sm(Addr a)270 static INLINE Bool is_start_of_sm ( Addr a ) {
271 return (start_of_this_sm(a) == a);
272 }
273
274 typedef
275 struct {
276 UChar vabits8[SM_CHUNKS];
277 }
278 SecMap;
279
280 // 3 distinguished secondary maps, one for no-access, one for
281 // accessible but undefined, and one for accessible and defined.
282 // Distinguished secondaries may never be modified.
283 #define SM_DIST_NOACCESS 0
284 #define SM_DIST_UNDEFINED 1
285 #define SM_DIST_DEFINED 2
286
287 static SecMap sm_distinguished[3];
288
is_distinguished_sm(SecMap * sm)289 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
290 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
291 }
292
293 // Forward declaration
294 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
295
296 /* dist_sm points to one of our three distinguished secondaries. Make
297 a copy of it so that we can write to it.
298 */
copy_for_writing(SecMap * dist_sm)299 static SecMap* copy_for_writing ( SecMap* dist_sm )
300 {
301 SecMap* new_sm;
302 tl_assert(dist_sm == &sm_distinguished[0]
303 || dist_sm == &sm_distinguished[1]
304 || dist_sm == &sm_distinguished[2]);
305
306 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
307 if (new_sm == NULL)
308 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
309 sizeof(SecMap) );
310 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
311 update_SM_counts(dist_sm, new_sm);
312 return new_sm;
313 }
314
315 /* --------------- Stats --------------- */
316
317 static Int n_issued_SMs = 0;
318 static Int n_deissued_SMs = 0;
319 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
320 static Int n_undefined_SMs = 0;
321 static Int n_defined_SMs = 0;
322 static Int n_non_DSM_SMs = 0;
323 static Int max_noaccess_SMs = 0;
324 static Int max_undefined_SMs = 0;
325 static Int max_defined_SMs = 0;
326 static Int max_non_DSM_SMs = 0;
327
328 /* # searches initiated in auxmap_L1, and # base cmps required */
329 static ULong n_auxmap_L1_searches = 0;
330 static ULong n_auxmap_L1_cmps = 0;
331 /* # of searches that missed in auxmap_L1 and therefore had to
332 be handed to auxmap_L2. And the number of nodes inserted. */
333 static ULong n_auxmap_L2_searches = 0;
334 static ULong n_auxmap_L2_nodes = 0;
335
336 static Int n_sanity_cheap = 0;
337 static Int n_sanity_expensive = 0;
338
339 static Int n_secVBit_nodes = 0;
340 static Int max_secVBit_nodes = 0;
341
update_SM_counts(SecMap * oldSM,SecMap * newSM)342 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
343 {
344 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
345 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
346 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
347 else { n_non_DSM_SMs --;
348 n_deissued_SMs ++; }
349
350 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
351 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
352 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
353 else { n_non_DSM_SMs ++;
354 n_issued_SMs ++; }
355
356 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
357 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
358 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
359 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
360 }
361
362 /* --------------- Primary maps --------------- */
363
364 /* The main primary map. This covers some initial part of the address
365 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
366 handled using the auxiliary primary map.
367 */
368 static SecMap* primary_map[N_PRIMARY_MAP];
369
370
371 /* An entry in the auxiliary primary map. base must be a 64k-aligned
372 value, and sm points at the relevant secondary map. As with the
373 main primary map, the secondary may be either a real secondary, or
374 one of the three distinguished secondaries. DO NOT CHANGE THIS
375 LAYOUT: the first word has to be the key for OSet fast lookups.
376 */
377 typedef
378 struct {
379 Addr base;
380 SecMap* sm;
381 }
382 AuxMapEnt;
383
384 /* Tunable parameter: How big is the L1 queue? */
385 #define N_AUXMAP_L1 24
386
387 /* Tunable parameter: How far along the L1 queue to insert
388 entries resulting from L2 lookups? */
389 #define AUXMAP_L1_INSERT_IX 12
390
391 static struct {
392 Addr base;
393 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
394 }
395 auxmap_L1[N_AUXMAP_L1];
396
397 static OSet* auxmap_L2 = NULL;
398
init_auxmap_L1_L2(void)399 static void init_auxmap_L1_L2 ( void )
400 {
401 Int i;
402 for (i = 0; i < N_AUXMAP_L1; i++) {
403 auxmap_L1[i].base = 0;
404 auxmap_L1[i].ent = NULL;
405 }
406
407 tl_assert(0 == offsetof(AuxMapEnt,base));
408 tl_assert(sizeof(Addr) == sizeof(void*));
409 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
410 /*fastCmp*/ NULL,
411 VG_(malloc), "mc.iaLL.1", VG_(free) );
412 }
413
414 /* Check representation invariants; if OK return NULL; else a
415 descriptive bit of text. Also return the number of
416 non-distinguished secondary maps referred to from the auxiliary
417 primary maps. */
418
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)419 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
420 {
421 Word i, j;
422 /* On a 32-bit platform, the L2 and L1 tables should
423 both remain empty forever.
424
425 On a 64-bit platform:
426 In the L2 table:
427 all .base & 0xFFFF == 0
428 all .base > MAX_PRIMARY_ADDRESS
429 In the L1 table:
430 all .base & 0xFFFF == 0
431 all (.base > MAX_PRIMARY_ADDRESS
432 .base & 0xFFFF == 0
433 and .ent points to an AuxMapEnt with the same .base)
434 or
435 (.base == 0 and .ent == NULL)
436 */
437 *n_secmaps_found = 0;
438 if (sizeof(void*) == 4) {
439 /* 32-bit platform */
440 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
441 return "32-bit: auxmap_L2 is non-empty";
442 for (i = 0; i < N_AUXMAP_L1; i++)
443 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
444 return "32-bit: auxmap_L1 is non-empty";
445 } else {
446 /* 64-bit platform */
447 UWord elems_seen = 0;
448 AuxMapEnt *elem, *res;
449 AuxMapEnt key;
450 /* L2 table */
451 VG_(OSetGen_ResetIter)(auxmap_L2);
452 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
453 elems_seen++;
454 if (0 != (elem->base & (Addr)0xFFFF))
455 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
456 if (elem->base <= MAX_PRIMARY_ADDRESS)
457 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
458 if (elem->sm == NULL)
459 return "64-bit: .sm in _L2 is NULL";
460 if (!is_distinguished_sm(elem->sm))
461 (*n_secmaps_found)++;
462 }
463 if (elems_seen != n_auxmap_L2_nodes)
464 return "64-bit: disagreement on number of elems in _L2";
465 /* Check L1-L2 correspondence */
466 for (i = 0; i < N_AUXMAP_L1; i++) {
467 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
468 continue;
469 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
470 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
471 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
472 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
473 if (auxmap_L1[i].ent == NULL)
474 return "64-bit: .ent is NULL in auxmap_L1";
475 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
476 return "64-bit: _L1 and _L2 bases are inconsistent";
477 /* Look it up in auxmap_L2. */
478 key.base = auxmap_L1[i].base;
479 key.sm = 0;
480 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
481 if (res == NULL)
482 return "64-bit: _L1 .base not found in _L2";
483 if (res != auxmap_L1[i].ent)
484 return "64-bit: _L1 .ent disagrees with _L2 entry";
485 }
486 /* Check L1 contains no duplicates */
487 for (i = 0; i < N_AUXMAP_L1; i++) {
488 if (auxmap_L1[i].base == 0)
489 continue;
490 for (j = i+1; j < N_AUXMAP_L1; j++) {
491 if (auxmap_L1[j].base == 0)
492 continue;
493 if (auxmap_L1[j].base == auxmap_L1[i].base)
494 return "64-bit: duplicate _L1 .base entries";
495 }
496 }
497 }
498 return NULL; /* ok */
499 }
500
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)501 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
502 {
503 Word i;
504 tl_assert(ent);
505 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
506 for (i = N_AUXMAP_L1-1; i > rank; i--)
507 auxmap_L1[i] = auxmap_L1[i-1];
508 auxmap_L1[rank].base = ent->base;
509 auxmap_L1[rank].ent = ent;
510 }
511
maybe_find_in_auxmap(Addr a)512 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
513 {
514 AuxMapEnt key;
515 AuxMapEnt* res;
516 Word i;
517
518 tl_assert(a > MAX_PRIMARY_ADDRESS);
519 a &= ~(Addr)0xFFFF;
520
521 /* First search the front-cache, which is a self-organising
522 list containing the most popular entries. */
523
524 if (LIKELY(auxmap_L1[0].base == a))
525 return auxmap_L1[0].ent;
526 if (LIKELY(auxmap_L1[1].base == a)) {
527 Addr t_base = auxmap_L1[0].base;
528 AuxMapEnt* t_ent = auxmap_L1[0].ent;
529 auxmap_L1[0].base = auxmap_L1[1].base;
530 auxmap_L1[0].ent = auxmap_L1[1].ent;
531 auxmap_L1[1].base = t_base;
532 auxmap_L1[1].ent = t_ent;
533 return auxmap_L1[0].ent;
534 }
535
536 n_auxmap_L1_searches++;
537
538 for (i = 0; i < N_AUXMAP_L1; i++) {
539 if (auxmap_L1[i].base == a) {
540 break;
541 }
542 }
543 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
544
545 n_auxmap_L1_cmps += (ULong)(i+1);
546
547 if (i < N_AUXMAP_L1) {
548 if (i > 0) {
549 Addr t_base = auxmap_L1[i-1].base;
550 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
551 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
552 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
553 auxmap_L1[i-0].base = t_base;
554 auxmap_L1[i-0].ent = t_ent;
555 i--;
556 }
557 return auxmap_L1[i].ent;
558 }
559
560 n_auxmap_L2_searches++;
561
562 /* First see if we already have it. */
563 key.base = a;
564 key.sm = 0;
565
566 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
567 if (res)
568 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
569 return res;
570 }
571
find_or_alloc_in_auxmap(Addr a)572 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
573 {
574 AuxMapEnt *nyu, *res;
575
576 /* First see if we already have it. */
577 res = maybe_find_in_auxmap( a );
578 if (LIKELY(res))
579 return res;
580
581 /* Ok, there's no entry in the secondary map, so we'll have
582 to allocate one. */
583 a &= ~(Addr)0xFFFF;
584
585 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
586 nyu->base = a;
587 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
588 VG_(OSetGen_Insert)( auxmap_L2, nyu );
589 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
590 n_auxmap_L2_nodes++;
591 return nyu;
592 }
593
594 /* --------------- SecMap fundamentals --------------- */
595
596 // In all these, 'low' means it's definitely in the main primary map,
597 // 'high' means it's definitely in the auxiliary table.
598
get_secmap_low_ptr(Addr a)599 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
600 {
601 UWord pm_off = a >> 16;
602 # if VG_DEBUG_MEMORY >= 1
603 tl_assert(pm_off < N_PRIMARY_MAP);
604 # endif
605 return &primary_map[ pm_off ];
606 }
607
get_secmap_high_ptr(Addr a)608 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
609 {
610 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
611 return &am->sm;
612 }
613
get_secmap_ptr(Addr a)614 static INLINE SecMap** get_secmap_ptr ( Addr a )
615 {
616 return ( a <= MAX_PRIMARY_ADDRESS
617 ? get_secmap_low_ptr(a)
618 : get_secmap_high_ptr(a));
619 }
620
get_secmap_for_reading_low(Addr a)621 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
622 {
623 return *get_secmap_low_ptr(a);
624 }
625
get_secmap_for_reading_high(Addr a)626 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
627 {
628 return *get_secmap_high_ptr(a);
629 }
630
get_secmap_for_writing_low(Addr a)631 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
632 {
633 SecMap** p = get_secmap_low_ptr(a);
634 if (UNLIKELY(is_distinguished_sm(*p)))
635 *p = copy_for_writing(*p);
636 return *p;
637 }
638
get_secmap_for_writing_high(Addr a)639 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
640 {
641 SecMap** p = get_secmap_high_ptr(a);
642 if (UNLIKELY(is_distinguished_sm(*p)))
643 *p = copy_for_writing(*p);
644 return *p;
645 }
646
647 /* Produce the secmap for 'a', either from the primary map or by
648 ensuring there is an entry for it in the aux primary map. The
649 secmap may be a distinguished one as the caller will only want to
650 be able to read it.
651 */
get_secmap_for_reading(Addr a)652 static INLINE SecMap* get_secmap_for_reading ( Addr a )
653 {
654 return ( a <= MAX_PRIMARY_ADDRESS
655 ? get_secmap_for_reading_low (a)
656 : get_secmap_for_reading_high(a) );
657 }
658
659 /* Produce the secmap for 'a', either from the primary map or by
660 ensuring there is an entry for it in the aux primary map. The
661 secmap may not be a distinguished one, since the caller will want
662 to be able to write it. If it is a distinguished secondary, make a
663 writable copy of it, install it, and return the copy instead. (COW
664 semantics).
665 */
get_secmap_for_writing(Addr a)666 static INLINE SecMap* get_secmap_for_writing ( Addr a )
667 {
668 return ( a <= MAX_PRIMARY_ADDRESS
669 ? get_secmap_for_writing_low (a)
670 : get_secmap_for_writing_high(a) );
671 }
672
673 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
674 allocate one if one doesn't already exist. This is used by the
675 leak checker.
676 */
maybe_get_secmap_for(Addr a)677 static SecMap* maybe_get_secmap_for ( Addr a )
678 {
679 if (a <= MAX_PRIMARY_ADDRESS) {
680 return get_secmap_for_reading_low(a);
681 } else {
682 AuxMapEnt* am = maybe_find_in_auxmap(a);
683 return am ? am->sm : NULL;
684 }
685 }
686
687 /* --------------- Fundamental functions --------------- */
688
689 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)690 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
691 {
692 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
693 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
694 *vabits8 |= (vabits2 << shift); // mask in the two new bits
695 }
696
697 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)698 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
699 {
700 UInt shift;
701 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
702 shift = (a & 2) << 1; // shift by 0 or 4
703 *vabits8 &= ~(0xf << shift); // mask out the four old bits
704 *vabits8 |= (vabits4 << shift); // mask in the four new bits
705 }
706
707 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)708 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
709 {
710 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
711 vabits8 >>= shift; // shift the two bits to the bottom
712 return 0x3 & vabits8; // mask out the rest
713 }
714
715 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)716 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
717 {
718 UInt shift;
719 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
720 shift = (a & 2) << 1; // shift by 0 or 4
721 vabits8 >>= shift; // shift the four bits to the bottom
722 return 0xf & vabits8; // mask out the rest
723 }
724
725 // Note that these four are only used in slow cases. The fast cases do
726 // clever things like combine the auxmap check (in
727 // get_secmap_{read,writ}able) with alignment checks.
728
729 // *** WARNING! ***
730 // Any time this function is called, if it is possible that vabits2
731 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
732 // sec-V-bits table must also be set!
733 static INLINE
set_vabits2(Addr a,UChar vabits2)734 void set_vabits2 ( Addr a, UChar vabits2 )
735 {
736 SecMap* sm = get_secmap_for_writing(a);
737 UWord sm_off = SM_OFF(a);
738 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
739 }
740
741 static INLINE
get_vabits2(Addr a)742 UChar get_vabits2 ( Addr a )
743 {
744 SecMap* sm = get_secmap_for_reading(a);
745 UWord sm_off = SM_OFF(a);
746 UChar vabits8 = sm->vabits8[sm_off];
747 return extract_vabits2_from_vabits8(a, vabits8);
748 }
749
750 // *** WARNING! ***
751 // Any time this function is called, if it is possible that any of the
752 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
753 // corresponding entry(s) in the sec-V-bits table must also be set!
754 static INLINE
get_vabits8_for_aligned_word32(Addr a)755 UChar get_vabits8_for_aligned_word32 ( Addr a )
756 {
757 SecMap* sm = get_secmap_for_reading(a);
758 UWord sm_off = SM_OFF(a);
759 UChar vabits8 = sm->vabits8[sm_off];
760 return vabits8;
761 }
762
763 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)764 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
765 {
766 SecMap* sm = get_secmap_for_writing(a);
767 UWord sm_off = SM_OFF(a);
768 sm->vabits8[sm_off] = vabits8;
769 }
770
771
772 // Forward declarations
773 static UWord get_sec_vbits8(Addr a);
774 static void set_sec_vbits8(Addr a, UWord vbits8);
775
776 // Returns False if there was an addressability error.
777 static INLINE
set_vbits8(Addr a,UChar vbits8)778 Bool set_vbits8 ( Addr a, UChar vbits8 )
779 {
780 Bool ok = True;
781 UChar vabits2 = get_vabits2(a);
782 if ( VA_BITS2_NOACCESS != vabits2 ) {
783 // Addressable. Convert in-register format to in-memory format.
784 // Also remove any existing sec V bit entry for the byte if no
785 // longer necessary.
786 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
787 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
788 else { vabits2 = VA_BITS2_PARTDEFINED;
789 set_sec_vbits8(a, vbits8); }
790 set_vabits2(a, vabits2);
791
792 } else {
793 // Unaddressable! Do nothing -- when writing to unaddressable
794 // memory it acts as a black hole, and the V bits can never be seen
795 // again. So we don't have to write them at all.
796 ok = False;
797 }
798 return ok;
799 }
800
801 // Returns False if there was an addressability error. In that case, we put
802 // all defined bits into vbits8.
803 static INLINE
get_vbits8(Addr a,UChar * vbits8)804 Bool get_vbits8 ( Addr a, UChar* vbits8 )
805 {
806 Bool ok = True;
807 UChar vabits2 = get_vabits2(a);
808
809 // Convert the in-memory format to in-register format.
810 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
811 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
812 else if ( VA_BITS2_NOACCESS == vabits2 ) {
813 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
814 ok = False;
815 } else {
816 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
817 *vbits8 = get_sec_vbits8(a);
818 }
819 return ok;
820 }
821
822
823 /* --------------- Secondary V bit table ------------ */
824
825 // This table holds the full V bit pattern for partially-defined bytes
826 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
827 // memory.
828 //
829 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
830 // then overwrite the same address with a fully defined byte, the sec-V-bit
831 // node will not necessarily be removed. This is because checking for
832 // whether removal is necessary would slow down the fast paths.
833 //
834 // To avoid the stale nodes building up too much, we periodically (once the
835 // table reaches a certain size) garbage collect (GC) the table by
836 // traversing it and evicting any nodes not having PDB.
837 // If more than a certain proportion of nodes survived, we increase the
838 // table size so that GCs occur less often.
839 //
840 // This policy is designed to avoid bad table bloat in the worst case where
841 // a program creates huge numbers of stale PDBs -- we would get this bloat
842 // if we had no GC -- while handling well the case where a node becomes
843 // stale but shortly afterwards is rewritten with a PDB and so becomes
844 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
845 // remove all stale nodes as soon as possible, we just end up re-adding a
846 // lot of them in later again. The "sufficiently stale" approach avoids
847 // this. (If a program has many live PDBs, performance will just suck,
848 // there's no way around that.)
849 //
850 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
851 // holding on to stale entries for 2 GCs before discarding them can lead
852 // to massive space leaks. So we're changing to an arrangement where
853 // lines are evicted as soon as they are observed to be stale during a
854 // GC. This also has a side benefit of allowing the sufficiently_stale
855 // field to be removed from the SecVBitNode struct, reducing its size by
856 // 8 bytes, which is a substantial space saving considering that the
857 // struct was previously 32 or so bytes, on a 64 bit target.
858 //
859 // In order to try and mitigate the problem that the "sufficiently stale"
860 // heuristic was designed to avoid, the table size is allowed to drift
861 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
862 // means that nodes will exist in the table longer on average, and hopefully
863 // will be deleted and re-added less frequently.
864 //
865 // The previous scaling up mechanism (now called STEPUP) is retained:
866 // if residency exceeds 50%, the table is scaled up, although by a
867 // factor sqrt(2) rather than 2 as before. This effectively doubles the
868 // frequency of GCs when there are many PDBs at reduces the tendency of
869 // stale PDBs to reside for long periods in the table.
870
871 static OSet* secVBitTable;
872
873 // Stats
874 static ULong sec_vbits_new_nodes = 0;
875 static ULong sec_vbits_updates = 0;
876
877 // This must be a power of two; this is checked in mc_pre_clo_init().
878 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
879 // a larger address range) they take more space but we can get multiple
880 // partially-defined bytes in one if they are close to each other, reducing
881 // the number of total nodes. In practice sometimes they are clustered (eg.
882 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
883 // row), but often not. So we choose something intermediate.
884 #define BYTES_PER_SEC_VBIT_NODE 16
885
886 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
887 // more than this many nodes survive a GC.
888 #define STEPUP_SURVIVOR_PROPORTION 0.5
889 #define STEPUP_GROWTH_FACTOR 1.414213562
890
891 // If the above heuristic doesn't apply, then we may make the table
892 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
893 // this many nodes survive a GC, _and_ the total table size does
894 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
895 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
896 // effectively although gradually reduces residency and increases time
897 // between GCs for programs with small numbers of PDBs. The 80000 limit
898 // effectively limits the table size to around 2MB for programs with
899 // small numbers of PDBs, whilst giving a reasonably long lifetime to
900 // entries, to try and reduce the costs resulting from deleting and
901 // re-adding of entries.
902 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
903 #define DRIFTUP_GROWTH_FACTOR 1.015
904 #define DRIFTUP_MAX_SIZE 80000
905
906 // We GC the table when it gets this many nodes in it, ie. it's effectively
907 // the table size. It can change.
908 static Int secVBitLimit = 1000;
909
910 // The number of GCs done, used to age sec-V-bit nodes for eviction.
911 // Because it's unsigned, wrapping doesn't matter -- the right answer will
912 // come out anyway.
913 static UInt GCs_done = 0;
914
915 typedef
916 struct {
917 Addr a;
918 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
919 }
920 SecVBitNode;
921
createSecVBitTable(void)922 static OSet* createSecVBitTable(void)
923 {
924 OSet* newSecVBitTable;
925 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
926 ( offsetof(SecVBitNode, a),
927 NULL, // use fast comparisons
928 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
929 VG_(free),
930 1000,
931 sizeof(SecVBitNode));
932 return newSecVBitTable;
933 }
934
gcSecVBitTable(void)935 static void gcSecVBitTable(void)
936 {
937 OSet* secVBitTable2;
938 SecVBitNode* n;
939 Int i, n_nodes = 0, n_survivors = 0;
940
941 GCs_done++;
942
943 // Create the new table.
944 secVBitTable2 = createSecVBitTable();
945
946 // Traverse the table, moving fresh nodes into the new table.
947 VG_(OSetGen_ResetIter)(secVBitTable);
948 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
949 // Keep node if any of its bytes are non-stale. Using
950 // get_vabits2() for the lookup is not very efficient, but I don't
951 // think it matters.
952 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
953 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
954 // Found a non-stale byte, so keep =>
955 // Insert a copy of the node into the new table.
956 SecVBitNode* n2 =
957 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
958 *n2 = *n;
959 VG_(OSetGen_Insert)(secVBitTable2, n2);
960 break;
961 }
962 }
963 }
964
965 // Get the before and after sizes.
966 n_nodes = VG_(OSetGen_Size)(secVBitTable);
967 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
968
969 // Destroy the old table, and put the new one in its place.
970 VG_(OSetGen_Destroy)(secVBitTable);
971 secVBitTable = secVBitTable2;
972
973 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
974 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
975 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
976 }
977
978 // Increase table size if necessary.
979 if ((Double)n_survivors
980 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
981 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
982 if (VG_(clo_verbosity) > 1)
983 VG_(message)(Vg_DebugMsg,
984 "memcheck GC: %d new table size (stepup)\n",
985 secVBitLimit);
986 }
987 else
988 if (secVBitLimit < DRIFTUP_MAX_SIZE
989 && (Double)n_survivors
990 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
991 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
992 if (VG_(clo_verbosity) > 1)
993 VG_(message)(Vg_DebugMsg,
994 "memcheck GC: %d new table size (driftup)\n",
995 secVBitLimit);
996 }
997 }
998
get_sec_vbits8(Addr a)999 static UWord get_sec_vbits8(Addr a)
1000 {
1001 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1002 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1003 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1004 UChar vbits8;
1005 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1006 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1007 // make it to the secondary V bits table.
1008 vbits8 = n->vbits8[amod];
1009 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1010 return vbits8;
1011 }
1012
set_sec_vbits8(Addr a,UWord vbits8)1013 static void set_sec_vbits8(Addr a, UWord vbits8)
1014 {
1015 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1016 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1017 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1018 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1019 // make it to the secondary V bits table.
1020 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1021 if (n) {
1022 n->vbits8[amod] = vbits8; // update
1023 sec_vbits_updates++;
1024 } else {
1025 // Do a table GC if necessary. Nb: do this before creating and
1026 // inserting the new node, to avoid erroneously GC'ing the new node.
1027 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1028 gcSecVBitTable();
1029 }
1030
1031 // New node: assign the specific byte, make the rest invalid (they
1032 // should never be read as-is, but be cautious).
1033 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1034 n->a = aAligned;
1035 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1036 n->vbits8[i] = V_BITS8_UNDEFINED;
1037 }
1038 n->vbits8[amod] = vbits8;
1039
1040 // Insert the new node.
1041 VG_(OSetGen_Insert)(secVBitTable, n);
1042 sec_vbits_new_nodes++;
1043
1044 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1045 if (n_secVBit_nodes > max_secVBit_nodes)
1046 max_secVBit_nodes = n_secVBit_nodes;
1047 }
1048 }
1049
1050 /* --------------- Endianness helpers --------------- */
1051
1052 /* Returns the offset in memory of the byteno-th most significant byte
1053 in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1054 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1055 UWord byteno ) {
1056 return bigendian ? (wordszB-1-byteno) : byteno;
1057 }
1058
1059
1060 /* --------------- Ignored address ranges --------------- */
1061
1062 /* Denotes the address-error-reportability status for address ranges:
1063 IAR_NotIgnored: the usual case -- report errors in this range
1064 IAR_CommandLine: don't report errors -- from command line setting
1065 IAR_ClientReq: don't report errors -- from client request
1066 */
1067 typedef
1068 enum { IAR_INVALID=99,
1069 IAR_NotIgnored,
1070 IAR_CommandLine,
1071 IAR_ClientReq }
1072 IARKind;
1073
showIARKind(IARKind iark)1074 static const HChar* showIARKind ( IARKind iark )
1075 {
1076 switch (iark) {
1077 case IAR_INVALID: return "INVALID";
1078 case IAR_NotIgnored: return "NotIgnored";
1079 case IAR_CommandLine: return "CommandLine";
1080 case IAR_ClientReq: return "ClientReq";
1081 default: return "???";
1082 }
1083 }
1084
1085 // RangeMap<IARKind>
1086 static RangeMap* gIgnoredAddressRanges = NULL;
1087
init_gIgnoredAddressRanges(void)1088 static void init_gIgnoredAddressRanges ( void )
1089 {
1090 if (LIKELY(gIgnoredAddressRanges != NULL))
1091 return;
1092 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1093 VG_(free), IAR_NotIgnored );
1094 }
1095
MC_(in_ignored_range)1096 Bool MC_(in_ignored_range) ( Addr a )
1097 {
1098 if (LIKELY(gIgnoredAddressRanges == NULL))
1099 return False;
1100 UWord how = IAR_INVALID;
1101 UWord key_min = ~(UWord)0;
1102 UWord key_max = (UWord)0;
1103 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1104 tl_assert(key_min <= a && a <= key_max);
1105 switch (how) {
1106 case IAR_NotIgnored: return False;
1107 case IAR_CommandLine: return True;
1108 case IAR_ClientReq: return True;
1109 default: break; /* invalid */
1110 }
1111 VG_(tool_panic)("MC_(in_ignore_range)");
1112 /*NOTREACHED*/
1113 }
1114
1115 /* Parse two Addr separated by a dash, or fail. */
1116
parse_range(const HChar ** ppc,Addr * result1,Addr * result2)1117 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1118 {
1119 Bool ok = VG_(parse_Addr) (ppc, result1);
1120 if (!ok)
1121 return False;
1122 if (**ppc != '-')
1123 return False;
1124 (*ppc)++;
1125 ok = VG_(parse_Addr) (ppc, result2);
1126 if (!ok)
1127 return False;
1128 return True;
1129 }
1130
1131 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1132 fail. If they are valid, add them to the global set of ignored
1133 ranges. */
parse_ignore_ranges(const HChar * str0)1134 static Bool parse_ignore_ranges ( const HChar* str0 )
1135 {
1136 init_gIgnoredAddressRanges();
1137 const HChar* str = str0;
1138 const HChar** ppc = &str;
1139 while (1) {
1140 Addr start = ~(Addr)0;
1141 Addr end = (Addr)0;
1142 Bool ok = parse_range(ppc, &start, &end);
1143 if (!ok)
1144 return False;
1145 if (start > end)
1146 return False;
1147 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1148 if (**ppc == 0)
1149 return True;
1150 if (**ppc != ',')
1151 return False;
1152 (*ppc)++;
1153 }
1154 /*NOTREACHED*/
1155 return False;
1156 }
1157
1158 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1159 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1160 {
1161 init_gIgnoredAddressRanges();
1162 const Bool verbose = (VG_(clo_verbosity) > 1);
1163 if (len == 0) {
1164 return False;
1165 }
1166 if (addRange) {
1167 VG_(bindRangeMap)(gIgnoredAddressRanges,
1168 start, start+len-1, IAR_ClientReq);
1169 if (verbose)
1170 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1171 (void*)start, (void*)(start+len-1));
1172 } else {
1173 VG_(bindRangeMap)(gIgnoredAddressRanges,
1174 start, start+len-1, IAR_NotIgnored);
1175 if (verbose)
1176 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1177 (void*)start, (void*)(start+len-1));
1178 }
1179 if (verbose) {
1180 VG_(dmsg)("memcheck: now have %u ranges:\n",
1181 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1182 UInt i;
1183 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1184 UWord val = IAR_INVALID;
1185 UWord key_min = ~(UWord)0;
1186 UWord key_max = (UWord)0;
1187 VG_(indexRangeMap)( &key_min, &key_max, &val,
1188 gIgnoredAddressRanges, i );
1189 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1190 i, key_min, key_max, showIARKind(val));
1191 }
1192 }
1193 return True;
1194 }
1195
1196
1197 /* --------------- Load/store slow cases. --------------- */
1198
1199 static
1200 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1201 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1202 Addr a, SizeT nBits, Bool bigendian )
1203 {
1204 ULong pessim[4]; /* only used when p-l-ok=yes */
1205 SSizeT szB = nBits / 8;
1206 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1207 SSizeT i, j; /* Must be signed. */
1208 SizeT n_addrs_bad = 0;
1209 Addr ai;
1210 UChar vbits8;
1211 Bool ok;
1212
1213 /* Code below assumes load size is a power of two and at least 64
1214 bits. */
1215 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1216
1217 /* If this triggers, you probably just need to increase the size of
1218 the pessim array. */
1219 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1220
1221 for (j = 0; j < szL; j++) {
1222 pessim[j] = V_BITS64_DEFINED;
1223 res[j] = V_BITS64_UNDEFINED;
1224 }
1225
1226 /* Make up a result V word, which contains the loaded data for
1227 valid addresses and Defined for invalid addresses. Iterate over
1228 the bytes in the word, from the most significant down to the
1229 least. The vbits to return are calculated into vbits128. Also
1230 compute the pessimising value to be used when
1231 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1232 info can be gleaned from the pessim array) but is used as a
1233 cross-check. */
1234 for (j = szL-1; j >= 0; j--) {
1235 ULong vbits64 = V_BITS64_UNDEFINED;
1236 ULong pessim64 = V_BITS64_DEFINED;
1237 UWord long_index = byte_offset_w(szL, bigendian, j);
1238 for (i = 8-1; i >= 0; i--) {
1239 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1240 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1241 ok = get_vbits8(ai, &vbits8);
1242 vbits64 <<= 8;
1243 vbits64 |= vbits8;
1244 if (!ok) n_addrs_bad++;
1245 pessim64 <<= 8;
1246 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1247 }
1248 res[long_index] = vbits64;
1249 pessim[long_index] = pessim64;
1250 }
1251
1252 /* In the common case, all the addresses involved are valid, so we
1253 just return the computed V bits and have done. */
1254 if (LIKELY(n_addrs_bad == 0))
1255 return;
1256
1257 /* If there's no possibility of getting a partial-loads-ok
1258 exemption, report the error and quit. */
1259 if (!MC_(clo_partial_loads_ok)) {
1260 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1261 return;
1262 }
1263
1264 /* The partial-loads-ok excemption might apply. Find out if it
1265 does. If so, don't report an addressing error, but do return
1266 Undefined for the bytes that are out of range, so as to avoid
1267 false negatives. If it doesn't apply, just report an addressing
1268 error in the usual way. */
1269
1270 /* Some code steps along byte strings in aligned chunks
1271 even when there is only a partially defined word at the end (eg,
1272 optimised strlen). This is allowed by the memory model of
1273 modern machines, since an aligned load cannot span two pages and
1274 thus cannot "partially fault".
1275
1276 Therefore, a load from a partially-addressible place is allowed
1277 if all of the following hold:
1278 - the command-line flag is set [by default, it isn't]
1279 - it's an aligned load
1280 - at least one of the addresses in the word *is* valid
1281
1282 Since this suppresses the addressing error, we avoid false
1283 negatives by marking bytes undefined when they come from an
1284 invalid address.
1285 */
1286
1287 /* "at least one of the addresses is invalid" */
1288 ok = False;
1289 for (j = 0; j < szL; j++)
1290 ok |= pessim[j] != V_BITS64_DEFINED;
1291 tl_assert(ok);
1292
1293 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1294 /* Exemption applies. Use the previously computed pessimising
1295 value and return the combined result, but don't flag an
1296 addressing error. The pessimising value is Defined for valid
1297 addresses and Undefined for invalid addresses. */
1298 /* for assumption that doing bitwise or implements UifU */
1299 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1300 /* (really need "UifU" here...)
1301 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1302 for (j = szL-1; j >= 0; j--)
1303 res[j] |= pessim[j];
1304 return;
1305 }
1306
1307 /* Exemption doesn't apply. Flag an addressing error in the normal
1308 way. */
1309 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1310 }
1311
1312
1313 static
1314 __attribute__((noinline))
1315 __attribute__((used))
1316 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1317 this function may get called from hand written assembly. */
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1318 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1319 {
1320 PROF_EVENT(MCPE_LOADVN_SLOW);
1321
1322 /* ------------ BEGIN semi-fast cases ------------ */
1323 /* These deal quickly-ish with the common auxiliary primary map
1324 cases on 64-bit platforms. Are merely a speedup hack; can be
1325 omitted without loss of correctness/functionality. Note that in
1326 both cases the "sizeof(void*) == 8" causes these cases to be
1327 folded out by compilers on 32-bit platforms. These are derived
1328 from LOADV64 and LOADV32.
1329 */
1330 if (LIKELY(sizeof(void*) == 8
1331 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1332 SecMap* sm = get_secmap_for_reading(a);
1333 UWord sm_off16 = SM_OFF_16(a);
1334 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1335 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1336 return V_BITS64_DEFINED;
1337 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1338 return V_BITS64_UNDEFINED;
1339 /* else fall into the slow case */
1340 }
1341 if (LIKELY(sizeof(void*) == 8
1342 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1343 SecMap* sm = get_secmap_for_reading(a);
1344 UWord sm_off = SM_OFF(a);
1345 UWord vabits8 = sm->vabits8[sm_off];
1346 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1347 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1348 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1349 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1350 /* else fall into slow case */
1351 }
1352 /* ------------ END semi-fast cases ------------ */
1353
1354 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1355 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1356 SSizeT szB = nBits / 8;
1357 SSizeT i; /* Must be signed. */
1358 SizeT n_addrs_bad = 0;
1359 Addr ai;
1360 UChar vbits8;
1361 Bool ok;
1362
1363 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1364
1365 /* Make up a 64-bit result V word, which contains the loaded data
1366 for valid addresses and Defined for invalid addresses. Iterate
1367 over the bytes in the word, from the most significant down to
1368 the least. The vbits to return are calculated into vbits64.
1369 Also compute the pessimising value to be used when
1370 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1371 info can be gleaned from pessim64) but is used as a
1372 cross-check. */
1373 for (i = szB-1; i >= 0; i--) {
1374 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1375 ai = a + byte_offset_w(szB, bigendian, i);
1376 ok = get_vbits8(ai, &vbits8);
1377 vbits64 <<= 8;
1378 vbits64 |= vbits8;
1379 if (!ok) n_addrs_bad++;
1380 pessim64 <<= 8;
1381 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1382 }
1383
1384 /* In the common case, all the addresses involved are valid, so we
1385 just return the computed V bits and have done. */
1386 if (LIKELY(n_addrs_bad == 0))
1387 return vbits64;
1388
1389 /* If there's no possibility of getting a partial-loads-ok
1390 exemption, report the error and quit. */
1391 if (!MC_(clo_partial_loads_ok)) {
1392 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1393 return vbits64;
1394 }
1395
1396 /* The partial-loads-ok excemption might apply. Find out if it
1397 does. If so, don't report an addressing error, but do return
1398 Undefined for the bytes that are out of range, so as to avoid
1399 false negatives. If it doesn't apply, just report an addressing
1400 error in the usual way. */
1401
1402 /* Some code steps along byte strings in aligned word-sized chunks
1403 even when there is only a partially defined word at the end (eg,
1404 optimised strlen). This is allowed by the memory model of
1405 modern machines, since an aligned load cannot span two pages and
1406 thus cannot "partially fault". Despite such behaviour being
1407 declared undefined by ANSI C/C++.
1408
1409 Therefore, a load from a partially-addressible place is allowed
1410 if all of the following hold:
1411 - the command-line flag is set [by default, it isn't]
1412 - it's a word-sized, word-aligned load
1413 - at least one of the addresses in the word *is* valid
1414
1415 Since this suppresses the addressing error, we avoid false
1416 negatives by marking bytes undefined when they come from an
1417 invalid address.
1418 */
1419
1420 /* "at least one of the addresses is invalid" */
1421 tl_assert(pessim64 != V_BITS64_DEFINED);
1422
1423 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1424 && n_addrs_bad < VG_WORDSIZE) {
1425 /* Exemption applies. Use the previously computed pessimising
1426 value for vbits64 and return the combined result, but don't
1427 flag an addressing error. The pessimising value is Defined
1428 for valid addresses and Undefined for invalid addresses. */
1429 /* for assumption that doing bitwise or implements UifU */
1430 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1431 /* (really need "UifU" here...)
1432 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1433 vbits64 |= pessim64;
1434 return vbits64;
1435 }
1436
1437 /* Also, in appears that gcc generates string-stepping code in
1438 32-bit chunks on 64 bit platforms. So, also grant an exception
1439 for this case. Note that the first clause of the conditional
1440 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1441 will get folded out in 32 bit builds. */
1442 if (VG_WORDSIZE == 8
1443 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1444 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1445 /* (really need "UifU" here...)
1446 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1447 vbits64 |= pessim64;
1448 /* Mark the upper 32 bits as undefined, just to be on the safe
1449 side. */
1450 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1451 return vbits64;
1452 }
1453
1454 /* Exemption doesn't apply. Flag an addressing error in the normal
1455 way. */
1456 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1457
1458 return vbits64;
1459 }
1460
1461
1462 static
1463 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1464 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1465 {
1466 SizeT szB = nBits / 8;
1467 SizeT i, n_addrs_bad = 0;
1468 UChar vbits8;
1469 Addr ai;
1470 Bool ok;
1471
1472 PROF_EVENT(MCPE_STOREVN_SLOW);
1473
1474 /* ------------ BEGIN semi-fast cases ------------ */
1475 /* These deal quickly-ish with the common auxiliary primary map
1476 cases on 64-bit platforms. Are merely a speedup hack; can be
1477 omitted without loss of correctness/functionality. Note that in
1478 both cases the "sizeof(void*) == 8" causes these cases to be
1479 folded out by compilers on 32-bit platforms. The logic below
1480 is somewhat similar to some cases extensively commented in
1481 MC_(helperc_STOREV8).
1482 */
1483 if (LIKELY(sizeof(void*) == 8
1484 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1485 SecMap* sm = get_secmap_for_reading(a);
1486 UWord sm_off16 = SM_OFF_16(a);
1487 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1488 if (LIKELY( !is_distinguished_sm(sm) &&
1489 (VA_BITS16_DEFINED == vabits16 ||
1490 VA_BITS16_UNDEFINED == vabits16) )) {
1491 /* Handle common case quickly: a is suitably aligned, */
1492 /* is mapped, and is addressible. */
1493 // Convert full V-bits in register to compact 2-bit form.
1494 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1495 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1496 return;
1497 } else if (V_BITS64_UNDEFINED == vbytes) {
1498 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1499 return;
1500 }
1501 /* else fall into the slow case */
1502 }
1503 /* else fall into the slow case */
1504 }
1505 if (LIKELY(sizeof(void*) == 8
1506 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1507 SecMap* sm = get_secmap_for_reading(a);
1508 UWord sm_off = SM_OFF(a);
1509 UWord vabits8 = sm->vabits8[sm_off];
1510 if (LIKELY( !is_distinguished_sm(sm) &&
1511 (VA_BITS8_DEFINED == vabits8 ||
1512 VA_BITS8_UNDEFINED == vabits8) )) {
1513 /* Handle common case quickly: a is suitably aligned, */
1514 /* is mapped, and is addressible. */
1515 // Convert full V-bits in register to compact 2-bit form.
1516 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1517 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1518 return;
1519 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1520 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1521 return;
1522 }
1523 /* else fall into the slow case */
1524 }
1525 /* else fall into the slow case */
1526 }
1527 /* ------------ END semi-fast cases ------------ */
1528
1529 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1530
1531 /* Dump vbytes in memory, iterating from least to most significant
1532 byte. At the same time establish addressibility of the location. */
1533 for (i = 0; i < szB; i++) {
1534 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1535 ai = a + byte_offset_w(szB, bigendian, i);
1536 vbits8 = vbytes & 0xff;
1537 ok = set_vbits8(ai, vbits8);
1538 if (!ok) n_addrs_bad++;
1539 vbytes >>= 8;
1540 }
1541
1542 /* If an address error has happened, report it. */
1543 if (n_addrs_bad > 0)
1544 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1545 }
1546
1547
1548 /*------------------------------------------------------------*/
1549 /*--- Setting permissions over address ranges. ---*/
1550 /*------------------------------------------------------------*/
1551
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1552 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1553 UWord dsm_num )
1554 {
1555 UWord sm_off, sm_off16;
1556 UWord vabits2 = vabits16 & 0x3;
1557 SizeT lenA, lenB, len_to_next_secmap;
1558 Addr aNext;
1559 SecMap* sm;
1560 SecMap** sm_ptr;
1561 SecMap* example_dsm;
1562
1563 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1564
1565 /* Check the V+A bits make sense. */
1566 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1567 VA_BITS16_UNDEFINED == vabits16 ||
1568 VA_BITS16_DEFINED == vabits16);
1569
1570 // This code should never write PDBs; ensure this. (See comment above
1571 // set_vabits2().)
1572 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1573
1574 if (lenT == 0)
1575 return;
1576
1577 if (lenT > 256 * 1024 * 1024) {
1578 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1579 const HChar* s = "unknown???";
1580 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1581 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1582 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1583 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1584 "large range [0x%lx, 0x%lx) (%s)\n",
1585 a, a + lenT, s);
1586 }
1587 }
1588
1589 #ifndef PERF_FAST_SARP
1590 /*------------------ debug-only case ------------------ */
1591 {
1592 // Endianness doesn't matter here because all bytes are being set to
1593 // the same value.
1594 // Nb: We don't have to worry about updating the sec-V-bits table
1595 // after these set_vabits2() calls because this code never writes
1596 // VA_BITS2_PARTDEFINED values.
1597 SizeT i;
1598 for (i = 0; i < lenT; i++) {
1599 set_vabits2(a + i, vabits2);
1600 }
1601 return;
1602 }
1603 #endif
1604
1605 /*------------------ standard handling ------------------ */
1606
1607 /* Get the distinguished secondary that we might want
1608 to use (part of the space-compression scheme). */
1609 example_dsm = &sm_distinguished[dsm_num];
1610
1611 // We have to handle ranges covering various combinations of partial and
1612 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1613 // Cases marked with a '*' are common.
1614 //
1615 // TYPE PARTS USED
1616 // ---- ----------
1617 // * one partial sec-map (p) 1
1618 // - one whole sec-map (P) 2
1619 //
1620 // * two partial sec-maps (pp) 1,3
1621 // - one partial, one whole sec-map (pP) 1,2
1622 // - one whole, one partial sec-map (Pp) 2,3
1623 // - two whole sec-maps (PP) 2,2
1624 //
1625 // * one partial, one whole, one partial (pPp) 1,2,3
1626 // - one partial, two whole (pPP) 1,2,2
1627 // - two whole, one partial (PPp) 2,2,3
1628 // - three whole (PPP) 2,2,2
1629 //
1630 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1631 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1632 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1633 // - N whole (PP...PP) 2,2...2,3
1634
1635 // Break up total length (lenT) into two parts: length in the first
1636 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1637 aNext = start_of_this_sm(a) + SM_SIZE;
1638 len_to_next_secmap = aNext - a;
1639 if ( lenT <= len_to_next_secmap ) {
1640 // Range entirely within one sec-map. Covers almost all cases.
1641 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1642 lenA = lenT;
1643 lenB = 0;
1644 } else if (is_start_of_sm(a)) {
1645 // Range spans at least one whole sec-map, and starts at the beginning
1646 // of a sec-map; skip to Part 2.
1647 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1648 lenA = 0;
1649 lenB = lenT;
1650 goto part2;
1651 } else {
1652 // Range spans two or more sec-maps, first one is partial.
1653 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1654 lenA = len_to_next_secmap;
1655 lenB = lenT - lenA;
1656 }
1657
1658 //------------------------------------------------------------------------
1659 // Part 1: Deal with the first sec_map. Most of the time the range will be
1660 // entirely within a sec_map and this part alone will suffice. Also,
1661 // doing it this way lets us avoid repeatedly testing for the crossing of
1662 // a sec-map boundary within these loops.
1663 //------------------------------------------------------------------------
1664
1665 // If it's distinguished, make it undistinguished if necessary.
1666 sm_ptr = get_secmap_ptr(a);
1667 if (is_distinguished_sm(*sm_ptr)) {
1668 if (*sm_ptr == example_dsm) {
1669 // Sec-map already has the V+A bits that we want, so skip.
1670 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1671 a = aNext;
1672 lenA = 0;
1673 } else {
1674 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1675 *sm_ptr = copy_for_writing(*sm_ptr);
1676 }
1677 }
1678 sm = *sm_ptr;
1679
1680 // 1 byte steps
1681 while (True) {
1682 if (VG_IS_8_ALIGNED(a)) break;
1683 if (lenA < 1) break;
1684 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1685 sm_off = SM_OFF(a);
1686 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1687 a += 1;
1688 lenA -= 1;
1689 }
1690 // 8-aligned, 8 byte steps
1691 while (True) {
1692 if (lenA < 8) break;
1693 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1694 sm_off16 = SM_OFF_16(a);
1695 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1696 a += 8;
1697 lenA -= 8;
1698 }
1699 // 1 byte steps
1700 while (True) {
1701 if (lenA < 1) break;
1702 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1703 sm_off = SM_OFF(a);
1704 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1705 a += 1;
1706 lenA -= 1;
1707 }
1708
1709 // We've finished the first sec-map. Is that it?
1710 if (lenB == 0)
1711 return;
1712
1713 //------------------------------------------------------------------------
1714 // Part 2: Fast-set entire sec-maps at a time.
1715 //------------------------------------------------------------------------
1716 part2:
1717 // 64KB-aligned, 64KB steps.
1718 // Nb: we can reach here with lenB < SM_SIZE
1719 tl_assert(0 == lenA);
1720 while (True) {
1721 if (lenB < SM_SIZE) break;
1722 tl_assert(is_start_of_sm(a));
1723 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1724 sm_ptr = get_secmap_ptr(a);
1725 if (!is_distinguished_sm(*sm_ptr)) {
1726 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1727 // Free the non-distinguished sec-map that we're replacing. This
1728 // case happens moderately often, enough to be worthwhile.
1729 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1730 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1731 }
1732 update_SM_counts(*sm_ptr, example_dsm);
1733 // Make the sec-map entry point to the example DSM
1734 *sm_ptr = example_dsm;
1735 lenB -= SM_SIZE;
1736 a += SM_SIZE;
1737 }
1738
1739 // We've finished the whole sec-maps. Is that it?
1740 if (lenB == 0)
1741 return;
1742
1743 //------------------------------------------------------------------------
1744 // Part 3: Finish off the final partial sec-map, if necessary.
1745 //------------------------------------------------------------------------
1746
1747 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1748
1749 // If it's distinguished, make it undistinguished if necessary.
1750 sm_ptr = get_secmap_ptr(a);
1751 if (is_distinguished_sm(*sm_ptr)) {
1752 if (*sm_ptr == example_dsm) {
1753 // Sec-map already has the V+A bits that we want, so stop.
1754 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1755 return;
1756 } else {
1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1758 *sm_ptr = copy_for_writing(*sm_ptr);
1759 }
1760 }
1761 sm = *sm_ptr;
1762
1763 // 8-aligned, 8 byte steps
1764 while (True) {
1765 if (lenB < 8) break;
1766 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1767 sm_off16 = SM_OFF_16(a);
1768 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1769 a += 8;
1770 lenB -= 8;
1771 }
1772 // 1 byte steps
1773 while (True) {
1774 if (lenB < 1) return;
1775 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1776 sm_off = SM_OFF(a);
1777 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1778 a += 1;
1779 lenB -= 1;
1780 }
1781 }
1782
1783
1784 /* --- Set permissions for arbitrary address ranges --- */
1785
MC_(make_mem_noaccess)1786 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1787 {
1788 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1789 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1790 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1791 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1792 ocache_sarp_Clear_Origins ( a, len );
1793 }
1794
make_mem_undefined(Addr a,SizeT len)1795 static void make_mem_undefined ( Addr a, SizeT len )
1796 {
1797 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1798 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1799 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1800 }
1801
MC_(make_mem_undefined_w_otag)1802 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1803 {
1804 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1805 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1806 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1807 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1808 ocache_sarp_Set_Origins ( a, len, otag );
1809 }
1810
1811 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1812 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1813 ThreadId tid, UInt okind )
1814 {
1815 UInt ecu;
1816 ExeContext* here;
1817 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1818 if it is invalid. So no need to do it here. */
1819 tl_assert(okind <= 3);
1820 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1821 tl_assert(here);
1822 ecu = VG_(get_ECU_from_ExeContext)(here);
1823 tl_assert(VG_(is_plausible_ECU)(ecu));
1824 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1825 }
1826
1827 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1828 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1829 {
1830 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1831 }
1832
1833 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1834 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1835 {
1836 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1837 }
1838
MC_(make_mem_defined)1839 void MC_(make_mem_defined) ( Addr a, SizeT len )
1840 {
1841 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1842 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1843 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1844 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1845 ocache_sarp_Clear_Origins ( a, len );
1846 }
1847
1848 __attribute__((unused))
make_mem_defined_w_tid(Addr a,SizeT len,ThreadId tid)1849 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1850 {
1851 MC_(make_mem_defined)(a, len);
1852 }
1853
1854 /* For each byte in [a,a+len), if the byte is addressable, make it be
1855 defined, but if it isn't addressible, leave it alone. In other
1856 words a version of MC_(make_mem_defined) that doesn't mess with
1857 addressibility. Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1858 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1859 {
1860 SizeT i;
1861 UChar vabits2;
1862 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1863 for (i = 0; i < len; i++) {
1864 vabits2 = get_vabits2( a+i );
1865 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1866 set_vabits2(a+i, VA_BITS2_DEFINED);
1867 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1868 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1869 }
1870 }
1871 }
1872 }
1873
1874 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1875 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1876 {
1877 SizeT i;
1878 UChar vabits2;
1879 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1880 for (i = 0; i < len; i++) {
1881 vabits2 = get_vabits2( a+i );
1882 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1883 set_vabits2(a+i, VA_BITS2_DEFINED);
1884 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1885 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1886 }
1887 }
1888 }
1889 }
1890
1891 /* --- Block-copy permissions (needed for implementing realloc() and
1892 sys_mremap). --- */
1893
MC_(copy_address_range_state)1894 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1895 {
1896 SizeT i, j;
1897 UChar vabits2, vabits8;
1898 Bool aligned, nooverlap;
1899
1900 DEBUG("MC_(copy_address_range_state)\n");
1901 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
1902
1903 if (len == 0 || src == dst)
1904 return;
1905
1906 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1907 nooverlap = src+len <= dst || dst+len <= src;
1908
1909 if (nooverlap && aligned) {
1910
1911 /* Vectorised fast case, when no overlap and suitably aligned */
1912 /* vector loop */
1913 i = 0;
1914 while (len >= 4) {
1915 vabits8 = get_vabits8_for_aligned_word32( src+i );
1916 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1917 if (LIKELY(VA_BITS8_DEFINED == vabits8
1918 || VA_BITS8_UNDEFINED == vabits8
1919 || VA_BITS8_NOACCESS == vabits8)) {
1920 /* do nothing */
1921 } else {
1922 /* have to copy secondary map info */
1923 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1924 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1925 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1926 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1927 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1928 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1929 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1930 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1931 }
1932 i += 4;
1933 len -= 4;
1934 }
1935 /* fixup loop */
1936 while (len >= 1) {
1937 vabits2 = get_vabits2( src+i );
1938 set_vabits2( dst+i, vabits2 );
1939 if (VA_BITS2_PARTDEFINED == vabits2) {
1940 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1941 }
1942 i++;
1943 len--;
1944 }
1945
1946 } else {
1947
1948 /* We have to do things the slow way */
1949 if (src < dst) {
1950 for (i = 0, j = len-1; i < len; i++, j--) {
1951 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
1952 vabits2 = get_vabits2( src+j );
1953 set_vabits2( dst+j, vabits2 );
1954 if (VA_BITS2_PARTDEFINED == vabits2) {
1955 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1956 }
1957 }
1958 }
1959
1960 if (src > dst) {
1961 for (i = 0; i < len; i++) {
1962 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
1963 vabits2 = get_vabits2( src+i );
1964 set_vabits2( dst+i, vabits2 );
1965 if (VA_BITS2_PARTDEFINED == vabits2) {
1966 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1967 }
1968 }
1969 }
1970 }
1971
1972 }
1973
1974
1975 /*------------------------------------------------------------*/
1976 /*--- Origin tracking stuff - cache basics ---*/
1977 /*------------------------------------------------------------*/
1978
1979 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1980 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1981
1982 Note that this implementation draws inspiration from the "origin
1983 tracking by value piggybacking" scheme described in "Tracking Bad
1984 Apples: Reporting the Origin of Null and Undefined Value Errors"
1985 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1986 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1987 implemented completely differently.
1988
1989 Origin tags and ECUs -- about the shadow values
1990 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1991
1992 This implementation tracks the defining point of all uninitialised
1993 values using so called "origin tags", which are 32-bit integers,
1994 rather than using the values themselves to encode the origins. The
1995 latter, so-called value piggybacking", is what the OOPSLA07 paper
1996 describes.
1997
1998 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1999 ints (UInts), regardless of the machine's word size. Each tag
2000 comprises an upper 30-bit ECU field and a lower 2-bit
2001 'kind' field. The ECU field is a number given out by m_execontext
2002 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2003 directly as an origin tag (otag), but in fact we want to put
2004 additional information 'kind' field to indicate roughly where the
2005 tag came from. This helps print more understandable error messages
2006 for the user -- it has no other purpose. In summary:
2007
2008 * Both ECUs and origin tags are represented as 32-bit words
2009
2010 * m_execontext and the core-tool interface deal purely in ECUs.
2011 They have no knowledge of origin tags - that is a purely
2012 Memcheck-internal matter.
2013
2014 * all valid ECUs have the lowest 2 bits zero and at least
2015 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2016
2017 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2018 constants defined in mc_include.h.
2019
2020 * to convert an otag back to an ECU, AND it with ~3
2021
2022 One important fact is that no valid otag is zero. A zero otag is
2023 used by the implementation to indicate "no origin", which could
2024 mean that either the value is defined, or it is undefined but the
2025 implementation somehow managed to lose the origin.
2026
2027 The ECU used for memory created by malloc etc is derived from the
2028 stack trace at the time the malloc etc happens. This means the
2029 mechanism can show the exact allocation point for heap-created
2030 uninitialised values.
2031
2032 In contrast, it is simply too expensive to create a complete
2033 backtrace for each stack allocation. Therefore we merely use a
2034 depth-1 backtrace for stack allocations, which can be done once at
2035 translation time, rather than N times at run time. The result of
2036 this is that, for stack created uninitialised values, Memcheck can
2037 only show the allocating function, and not what called it.
2038 Furthermore, compilers tend to move the stack pointer just once at
2039 the start of the function, to allocate all locals, and so in fact
2040 the stack origin almost always simply points to the opening brace
2041 of the function. Net result is, for stack origins, the mechanism
2042 can tell you in which function the undefined value was created, but
2043 that's all. Users will need to carefully check all locals in the
2044 specified function.
2045
2046 Shadowing registers and memory
2047 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2048
2049 Memory is shadowed using a two level cache structure (ocacheL1 and
2050 ocacheL2). Memory references are first directed to ocacheL1. This
2051 is a traditional 2-way set associative cache with 32-byte lines and
2052 approximate LRU replacement within each set.
2053
2054 A naive implementation would require storing one 32 bit otag for
2055 each byte of memory covered, a 4:1 space overhead. Instead, there
2056 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2057 that shows which of the 4 bytes have that shadow value and which
2058 have a shadow value of zero (indicating no origin). Hence a lot of
2059 space is saved, but the cost is that only one different origin per
2060 4 bytes of address space can be represented. This is a source of
2061 imprecision, but how much of a problem it really is remains to be
2062 seen.
2063
2064 A cache line that contains all zeroes ("no origins") contains no
2065 useful information, and can be ejected from the L1 cache "for
2066 free", in the sense that a read miss on the L1 causes a line of
2067 zeroes to be installed. However, ejecting a line containing
2068 nonzeroes risks losing origin information permanently. In order to
2069 prevent such lossage, ejected nonzero lines are placed in a
2070 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2071 lines. This can grow arbitrarily large, and so should ensure that
2072 Memcheck runs out of memory in preference to losing useful origin
2073 info due to cache size limitations.
2074
2075 Shadowing registers is a bit tricky, because the shadow values are
2076 32 bits, regardless of the size of the register. That gives a
2077 problem for registers smaller than 32 bits. The solution is to
2078 find spaces in the guest state that are unused, and use those to
2079 shadow guest state fragments smaller than 32 bits. For example, on
2080 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2081 shadow are allocated for the register's otag, then there are still
2082 12 bytes left over which could be used to shadow 3 other values.
2083
2084 This implies there is some non-obvious mapping from guest state
2085 (start,length) pairs to the relevant shadow offset (for the origin
2086 tags). And it is unfortunately guest-architecture specific. The
2087 mapping is contained in mc_machine.c, which is quite lengthy but
2088 straightforward.
2089
2090 Instrumenting the IR
2091 ~~~~~~~~~~~~~~~~~~~~
2092
2093 Instrumentation is largely straightforward, and done by the
2094 functions schemeE and schemeS in mc_translate.c. These generate
2095 code for handling the origin tags of expressions (E) and statements
2096 (S) respectively. The rather strange names are a reference to the
2097 "compilation schemes" shown in Simon Peyton Jones' book "The
2098 Implementation of Functional Programming Languages" (Prentice Hall,
2099 1987, see
2100 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2101
2102 schemeS merely arranges to move shadow values around the guest
2103 state to track the incoming IR. schemeE is largely trivial too.
2104 The only significant point is how to compute the otag corresponding
2105 to binary (or ternary, quaternary, etc) operator applications. The
2106 rule is simple: just take whichever value is larger (32-bit
2107 unsigned max). Constants get the special value zero. Hence this
2108 rule always propagates a nonzero (known) otag in preference to a
2109 zero (unknown, or more likely, value-is-defined) tag, as we want.
2110 If two different undefined values are inputs to a binary operator
2111 application, then which is propagated is arbitrary, but that
2112 doesn't matter, since the program is erroneous in using either of
2113 the values, and so there's no point in attempting to propagate
2114 both.
2115
2116 Since constants are abstracted to (otag) zero, much of the
2117 instrumentation code can be folded out without difficulty by the
2118 generic post-instrumentation IR cleanup pass, using these rules:
2119 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2120 constants is evaluated at JIT time. And the resulting dead code
2121 removal. In practice this causes surprisingly few Max32Us to
2122 survive through to backend code generation.
2123
2124 Integration with the V-bits machinery
2125 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2126
2127 This is again largely straightforward. Mostly the otag and V bits
2128 stuff are independent. The only point of interaction is when the V
2129 bits instrumenter creates a call to a helper function to report an
2130 uninitialised value error -- in that case it must first use schemeE
2131 to get hold of the origin tag expression for the value, and pass
2132 that to the helper too.
2133
2134 There is the usual stuff to do with setting address range
2135 permissions. When memory is painted undefined, we must also know
2136 the origin tag to paint with, which involves some tedious plumbing,
2137 particularly to do with the fast case stack handlers. When memory
2138 is painted defined or noaccess then the origin tags must be forced
2139 to zero.
2140
2141 One of the goals of the implementation was to ensure that the
2142 non-origin tracking mode isn't slowed down at all. To do this,
2143 various functions to do with memory permissions setting (again,
2144 mostly pertaining to the stack) are duplicated for the with- and
2145 without-otag case.
2146
2147 Dealing with stack redzones, and the NIA cache
2148 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2149
2150 This is one of the few non-obvious parts of the implementation.
2151
2152 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2153 reserved area below the stack pointer, that can be used as scratch
2154 space by compiler generated code for functions. In the Memcheck
2155 sources this is referred to as the "stack redzone". The important
2156 thing here is that such redzones are considered volatile across
2157 function calls and returns. So Memcheck takes care to mark them as
2158 undefined for each call and return, on the afflicted platforms.
2159 Past experience shows this is essential in order to get reliable
2160 messages about uninitialised values that come from the stack.
2161
2162 So the question is, when we paint a redzone undefined, what origin
2163 tag should we use for it? Consider a function f() calling g(). If
2164 we paint the redzone using an otag derived from the ExeContext of
2165 the CALL/BL instruction in f, then any errors in g causing it to
2166 use uninitialised values that happen to lie in the redzone, will be
2167 reported as having their origin in f. Which is highly confusing.
2168
2169 The same applies for returns: if, on a return, we paint the redzone
2170 using a origin tag derived from the ExeContext of the RET/BLR
2171 instruction in g, then any later errors in f causing it to use
2172 uninitialised values in the redzone, will be reported as having
2173 their origin in g. Which is just as confusing.
2174
2175 To do it right, in both cases we need to use an origin tag which
2176 pertains to the instruction which dynamically follows the CALL/BL
2177 or RET/BLR. In short, one derived from the NIA - the "next
2178 instruction address".
2179
2180 To make this work, Memcheck's redzone-painting helper,
2181 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2182 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2183 ExeContext's ECU as the basis for the otag used to paint the
2184 redzone. The expensive part of this is converting an NIA into an
2185 ECU, since this happens once for every call and every return. So
2186 we use a simple 511-line, 2-way set associative cache
2187 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2188 the cost out.
2189
2190 Further background comments
2191 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2192
2193 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2194 > it really just the address of the relevant ExeContext?
2195
2196 Well, it's not the address, but a value which has a 1-1 mapping
2197 with ExeContexts, and is guaranteed not to be zero, since zero
2198 denotes (to memcheck) "unknown origin or defined value". So these
2199 UInts are just numbers starting at 4 and incrementing by 4; each
2200 ExeContext is given a number when it is created. (*** NOTE this
2201 confuses otags and ECUs; see comments above ***).
2202
2203 Making these otags 32-bit regardless of the machine's word size
2204 makes the 64-bit implementation easier (next para). And it doesn't
2205 really limit us in any way, since for the tags to overflow would
2206 require that the program somehow caused 2^30-1 different
2207 ExeContexts to be created, in which case it is probably in deep
2208 trouble. Not to mention V will have soaked up many tens of
2209 gigabytes of memory merely to store them all.
2210
2211 So having 64-bit origins doesn't really buy you anything, and has
2212 the following downsides:
2213
2214 Suppose that instead, an otag is a UWord. This would mean that, on
2215 a 64-bit target,
2216
2217 1. It becomes hard to shadow any element of guest state which is
2218 smaller than 8 bytes. To do so means you'd need to find some
2219 8-byte-sized hole in the guest state which you don't want to
2220 shadow, and use that instead to hold the otag. On ppc64, the
2221 condition code register(s) are split into 20 UChar sized pieces,
2222 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2223 and so that would entail finding 160 bytes somewhere else in the
2224 guest state.
2225
2226 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2227 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2228 same) and so I had to look for 4 untracked otag-sized areas in
2229 the guest state to make that possible.
2230
2231 The same problem exists of course when origin tags are only 32
2232 bits, but it's less extreme.
2233
2234 2. (More compelling) it doubles the size of the origin shadow
2235 memory. Given that the shadow memory is organised as a fixed
2236 size cache, and that accuracy of tracking is limited by origins
2237 falling out the cache due to space conflicts, this isn't good.
2238
2239 > Another question: is the origin tracking perfect, or are there
2240 > cases where it fails to determine an origin?
2241
2242 It is imperfect for at least for the following reasons, and
2243 probably more:
2244
2245 * Insufficient capacity in the origin cache. When a line is
2246 evicted from the cache it is gone forever, and so subsequent
2247 queries for the line produce zero, indicating no origin
2248 information. Interestingly, a line containing all zeroes can be
2249 evicted "free" from the cache, since it contains no useful
2250 information, so there is scope perhaps for some cleverer cache
2251 management schemes. (*** NOTE, with the introduction of the
2252 second level origin tag cache, ocacheL2, this is no longer a
2253 problem. ***)
2254
2255 * The origin cache only stores one otag per 32-bits of address
2256 space, plus 4 bits indicating which of the 4 bytes has that tag
2257 and which are considered defined. The result is that if two
2258 undefined bytes in the same word are stored in memory, the first
2259 stored byte's origin will be lost and replaced by the origin for
2260 the second byte.
2261
2262 * Nonzero origin tags for defined values. Consider a binary
2263 operator application op(x,y). Suppose y is undefined (and so has
2264 a valid nonzero origin tag), and x is defined, but erroneously
2265 has a nonzero origin tag (defined values should have tag zero).
2266 If the erroneous tag has a numeric value greater than y's tag,
2267 then the rule for propagating origin tags though binary
2268 operations, which is simply to take the unsigned max of the two
2269 tags, will erroneously propagate x's tag rather than y's.
2270
2271 * Some obscure uses of x86/amd64 byte registers can cause lossage
2272 or confusion of origins. %AH .. %DH are treated as different
2273 from, and unrelated to, their parent registers, %EAX .. %EDX.
2274 So some weird sequences like
2275
2276 movb undefined-value, %AH
2277 movb defined-value, %AL
2278 .. use %AX or %EAX ..
2279
2280 will cause the origin attributed to %AH to be ignored, since %AL,
2281 %AX, %EAX are treated as the same register, and %AH as a
2282 completely separate one.
2283
2284 But having said all that, it actually seems to work fairly well in
2285 practice.
2286 */
2287
2288 static UWord stats_ocacheL1_find = 0;
2289 static UWord stats_ocacheL1_found_at_1 = 0;
2290 static UWord stats_ocacheL1_found_at_N = 0;
2291 static UWord stats_ocacheL1_misses = 0;
2292 static UWord stats_ocacheL1_lossage = 0;
2293 static UWord stats_ocacheL1_movefwds = 0;
2294
2295 static UWord stats__ocacheL2_refs = 0;
2296 static UWord stats__ocacheL2_misses = 0;
2297 static UWord stats__ocacheL2_n_nodes_max = 0;
2298
2299 /* Cache of 32-bit values, one every 32 bits of address space */
2300
2301 #define OC_BITS_PER_LINE 5
2302 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2303
oc_line_offset(Addr a)2304 static INLINE UWord oc_line_offset ( Addr a ) {
2305 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2306 }
is_valid_oc_tag(Addr tag)2307 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2308 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2309 }
2310
2311 #define OC_LINES_PER_SET 2
2312
2313 #define OC_N_SET_BITS 20
2314 #define OC_N_SETS (1 << OC_N_SET_BITS)
2315
2316 /* These settings give:
2317 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2318 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2319 */
2320
2321 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2322
2323
2324 typedef
2325 struct {
2326 Addr tag;
2327 UInt w32[OC_W32S_PER_LINE];
2328 UChar descr[OC_W32S_PER_LINE];
2329 }
2330 OCacheLine;
2331
2332 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2333 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2334 and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2335 static UChar classify_OCacheLine ( OCacheLine* line )
2336 {
2337 UWord i;
2338 if (line->tag == 1/*invalid*/)
2339 return 'e'; /* EMPTY */
2340 tl_assert(is_valid_oc_tag(line->tag));
2341 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2342 tl_assert(0 == ((~0xF) & line->descr[i]));
2343 if (line->w32[i] > 0 && line->descr[i] > 0)
2344 return 'n'; /* NONZERO - contains useful info */
2345 }
2346 return 'z'; /* ZERO - no useful info */
2347 }
2348
2349 typedef
2350 struct {
2351 OCacheLine line[OC_LINES_PER_SET];
2352 }
2353 OCacheSet;
2354
2355 typedef
2356 struct {
2357 OCacheSet set[OC_N_SETS];
2358 }
2359 OCache;
2360
2361 static OCache* ocacheL1 = NULL;
2362 static UWord ocacheL1_event_ctr = 0;
2363
2364 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2365 static void init_OCache ( void )
2366 {
2367 UWord line, set;
2368 tl_assert(MC_(clo_mc_level) >= 3);
2369 tl_assert(ocacheL1 == NULL);
2370 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2371 if (ocacheL1 == NULL) {
2372 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2373 sizeof(OCache) );
2374 }
2375 tl_assert(ocacheL1 != NULL);
2376 for (set = 0; set < OC_N_SETS; set++) {
2377 for (line = 0; line < OC_LINES_PER_SET; line++) {
2378 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2379 }
2380 }
2381 init_ocacheL2();
2382 }
2383
moveLineForwards(OCacheSet * set,UWord lineno)2384 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2385 {
2386 OCacheLine tmp;
2387 stats_ocacheL1_movefwds++;
2388 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2389 tmp = set->line[lineno-1];
2390 set->line[lineno-1] = set->line[lineno];
2391 set->line[lineno] = tmp;
2392 }
2393
zeroise_OCacheLine(OCacheLine * line,Addr tag)2394 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2395 UWord i;
2396 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2397 line->w32[i] = 0; /* NO ORIGIN */
2398 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2399 }
2400 line->tag = tag;
2401 }
2402
2403 //////////////////////////////////////////////////////////////
2404 //// OCache backing store
2405
2406 static OSet* ocacheL2 = NULL;
2407
ocacheL2_malloc(const HChar * cc,SizeT szB)2408 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2409 return VG_(malloc)(cc, szB);
2410 }
ocacheL2_free(void * v)2411 static void ocacheL2_free ( void* v ) {
2412 VG_(free)( v );
2413 }
2414
2415 /* Stats: # nodes currently in tree */
2416 static UWord stats__ocacheL2_n_nodes = 0;
2417
init_ocacheL2(void)2418 static void init_ocacheL2 ( void )
2419 {
2420 tl_assert(!ocacheL2);
2421 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2422 tl_assert(0 == offsetof(OCacheLine,tag));
2423 ocacheL2
2424 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2425 NULL, /* fast cmp */
2426 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2427 stats__ocacheL2_n_nodes = 0;
2428 }
2429
2430 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2431 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2432 {
2433 OCacheLine* line;
2434 tl_assert(is_valid_oc_tag(tag));
2435 stats__ocacheL2_refs++;
2436 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2437 return line;
2438 }
2439
2440 /* Delete the line with the given tag from the tree, if it is present, and
2441 free up the associated memory. */
ocacheL2_del_tag(Addr tag)2442 static void ocacheL2_del_tag ( Addr tag )
2443 {
2444 OCacheLine* line;
2445 tl_assert(is_valid_oc_tag(tag));
2446 stats__ocacheL2_refs++;
2447 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2448 if (line) {
2449 VG_(OSetGen_FreeNode)(ocacheL2, line);
2450 tl_assert(stats__ocacheL2_n_nodes > 0);
2451 stats__ocacheL2_n_nodes--;
2452 }
2453 }
2454
2455 /* Add a copy of the given line to the tree. It must not already be
2456 present. */
ocacheL2_add_line(OCacheLine * line)2457 static void ocacheL2_add_line ( OCacheLine* line )
2458 {
2459 OCacheLine* copy;
2460 tl_assert(is_valid_oc_tag(line->tag));
2461 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2462 *copy = *line;
2463 stats__ocacheL2_refs++;
2464 VG_(OSetGen_Insert)( ocacheL2, copy );
2465 stats__ocacheL2_n_nodes++;
2466 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2467 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2468 }
2469
2470 ////
2471 //////////////////////////////////////////////////////////////
2472
2473 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2474 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2475 {
2476 OCacheLine *victim, *inL2;
2477 UChar c;
2478 UWord line;
2479 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2480 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2481 UWord tag = a & tagmask;
2482 tl_assert(setno >= 0 && setno < OC_N_SETS);
2483
2484 /* we already tried line == 0; skip therefore. */
2485 for (line = 1; line < OC_LINES_PER_SET; line++) {
2486 if (ocacheL1->set[setno].line[line].tag == tag) {
2487 if (line == 1) {
2488 stats_ocacheL1_found_at_1++;
2489 } else {
2490 stats_ocacheL1_found_at_N++;
2491 }
2492 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2493 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2494 moveLineForwards( &ocacheL1->set[setno], line );
2495 line--;
2496 }
2497 return &ocacheL1->set[setno].line[line];
2498 }
2499 }
2500
2501 /* A miss. Use the last slot. Implicitly this means we're
2502 ejecting the line in the last slot. */
2503 stats_ocacheL1_misses++;
2504 tl_assert(line == OC_LINES_PER_SET);
2505 line--;
2506 tl_assert(line > 0);
2507
2508 /* First, move the to-be-ejected line to the L2 cache. */
2509 victim = &ocacheL1->set[setno].line[line];
2510 c = classify_OCacheLine(victim);
2511 switch (c) {
2512 case 'e':
2513 /* the line is empty (has invalid tag); ignore it. */
2514 break;
2515 case 'z':
2516 /* line contains zeroes. We must ensure the backing store is
2517 updated accordingly, either by copying the line there
2518 verbatim, or by ensuring it isn't present there. We
2519 chosse the latter on the basis that it reduces the size of
2520 the backing store. */
2521 ocacheL2_del_tag( victim->tag );
2522 break;
2523 case 'n':
2524 /* line contains at least one real, useful origin. Copy it
2525 to the backing store. */
2526 stats_ocacheL1_lossage++;
2527 inL2 = ocacheL2_find_tag( victim->tag );
2528 if (inL2) {
2529 *inL2 = *victim;
2530 } else {
2531 ocacheL2_add_line( victim );
2532 }
2533 break;
2534 default:
2535 tl_assert(0);
2536 }
2537
2538 /* Now we must reload the L1 cache from the backing tree, if
2539 possible. */
2540 tl_assert(tag != victim->tag); /* stay sane */
2541 inL2 = ocacheL2_find_tag( tag );
2542 if (inL2) {
2543 /* We're in luck. It's in the L2. */
2544 ocacheL1->set[setno].line[line] = *inL2;
2545 } else {
2546 /* Missed at both levels of the cache hierarchy. We have to
2547 declare it as full of zeroes (unknown origins). */
2548 stats__ocacheL2_misses++;
2549 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2550 }
2551
2552 /* Move it one forwards */
2553 moveLineForwards( &ocacheL1->set[setno], line );
2554 line--;
2555
2556 return &ocacheL1->set[setno].line[line];
2557 }
2558
find_OCacheLine(Addr a)2559 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2560 {
2561 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2562 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2563 UWord tag = a & tagmask;
2564
2565 stats_ocacheL1_find++;
2566
2567 if (OC_ENABLE_ASSERTIONS) {
2568 tl_assert(setno >= 0 && setno < OC_N_SETS);
2569 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2570 }
2571
2572 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2573 return &ocacheL1->set[setno].line[0];
2574 }
2575
2576 return find_OCacheLine_SLOW( a );
2577 }
2578
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2579 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2580 {
2581 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2582 //// Set the origins for a+0 .. a+7
2583 { OCacheLine* line;
2584 UWord lineoff = oc_line_offset(a);
2585 if (OC_ENABLE_ASSERTIONS) {
2586 tl_assert(lineoff >= 0
2587 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2588 }
2589 line = find_OCacheLine( a );
2590 line->descr[lineoff+0] = 0xF;
2591 line->descr[lineoff+1] = 0xF;
2592 line->w32[lineoff+0] = otag;
2593 line->w32[lineoff+1] = otag;
2594 }
2595 //// END inlined, specialised version of MC_(helperc_b_store8)
2596 }
2597
2598
2599 /*------------------------------------------------------------*/
2600 /*--- Aligned fast case permission setters, ---*/
2601 /*--- for dealing with stacks ---*/
2602 /*------------------------------------------------------------*/
2603
2604 /*--------------------- 32-bit ---------------------*/
2605
2606 /* Nb: by "aligned" here we mean 4-byte aligned */
2607
make_aligned_word32_undefined(Addr a)2608 static INLINE void make_aligned_word32_undefined ( Addr a )
2609 {
2610 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2611
2612 #ifndef PERF_FAST_STACK2
2613 make_mem_undefined(a, 4);
2614 #else
2615 {
2616 UWord sm_off;
2617 SecMap* sm;
2618
2619 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2620 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2621 make_mem_undefined(a, 4);
2622 return;
2623 }
2624
2625 sm = get_secmap_for_writing_low(a);
2626 sm_off = SM_OFF(a);
2627 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2628 }
2629 #endif
2630 }
2631
2632 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2633 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2634 {
2635 make_aligned_word32_undefined(a);
2636 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2637 //// Set the origins for a+0 .. a+3
2638 { OCacheLine* line;
2639 UWord lineoff = oc_line_offset(a);
2640 if (OC_ENABLE_ASSERTIONS) {
2641 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2642 }
2643 line = find_OCacheLine( a );
2644 line->descr[lineoff] = 0xF;
2645 line->w32[lineoff] = otag;
2646 }
2647 //// END inlined, specialised version of MC_(helperc_b_store4)
2648 }
2649
2650 static INLINE
make_aligned_word32_noaccess(Addr a)2651 void make_aligned_word32_noaccess ( Addr a )
2652 {
2653 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2654
2655 #ifndef PERF_FAST_STACK2
2656 MC_(make_mem_noaccess)(a, 4);
2657 #else
2658 {
2659 UWord sm_off;
2660 SecMap* sm;
2661
2662 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2663 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2664 MC_(make_mem_noaccess)(a, 4);
2665 return;
2666 }
2667
2668 sm = get_secmap_for_writing_low(a);
2669 sm_off = SM_OFF(a);
2670 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2671
2672 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2673 //// Set the origins for a+0 .. a+3.
2674 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2675 OCacheLine* line;
2676 UWord lineoff = oc_line_offset(a);
2677 if (OC_ENABLE_ASSERTIONS) {
2678 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2679 }
2680 line = find_OCacheLine( a );
2681 line->descr[lineoff] = 0;
2682 }
2683 //// END inlined, specialised version of MC_(helperc_b_store4)
2684 }
2685 #endif
2686 }
2687
2688 /*--------------------- 64-bit ---------------------*/
2689
2690 /* Nb: by "aligned" here we mean 8-byte aligned */
2691
make_aligned_word64_undefined(Addr a)2692 static INLINE void make_aligned_word64_undefined ( Addr a )
2693 {
2694 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2695
2696 #ifndef PERF_FAST_STACK2
2697 make_mem_undefined(a, 8);
2698 #else
2699 {
2700 UWord sm_off16;
2701 SecMap* sm;
2702
2703 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2704 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2705 make_mem_undefined(a, 8);
2706 return;
2707 }
2708
2709 sm = get_secmap_for_writing_low(a);
2710 sm_off16 = SM_OFF_16(a);
2711 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2712 }
2713 #endif
2714 }
2715
2716 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2717 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2718 {
2719 make_aligned_word64_undefined(a);
2720 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2721 //// Set the origins for a+0 .. a+7
2722 { OCacheLine* line;
2723 UWord lineoff = oc_line_offset(a);
2724 tl_assert(lineoff >= 0
2725 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2726 line = find_OCacheLine( a );
2727 line->descr[lineoff+0] = 0xF;
2728 line->descr[lineoff+1] = 0xF;
2729 line->w32[lineoff+0] = otag;
2730 line->w32[lineoff+1] = otag;
2731 }
2732 //// END inlined, specialised version of MC_(helperc_b_store8)
2733 }
2734
2735 static INLINE
make_aligned_word64_noaccess(Addr a)2736 void make_aligned_word64_noaccess ( Addr a )
2737 {
2738 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2739
2740 #ifndef PERF_FAST_STACK2
2741 MC_(make_mem_noaccess)(a, 8);
2742 #else
2743 {
2744 UWord sm_off16;
2745 SecMap* sm;
2746
2747 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2748 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2749 MC_(make_mem_noaccess)(a, 8);
2750 return;
2751 }
2752
2753 sm = get_secmap_for_writing_low(a);
2754 sm_off16 = SM_OFF_16(a);
2755 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2756
2757 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2758 //// Clear the origins for a+0 .. a+7.
2759 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2760 OCacheLine* line;
2761 UWord lineoff = oc_line_offset(a);
2762 tl_assert(lineoff >= 0
2763 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2764 line = find_OCacheLine( a );
2765 line->descr[lineoff+0] = 0;
2766 line->descr[lineoff+1] = 0;
2767 }
2768 //// END inlined, specialised version of MC_(helperc_b_store8)
2769 }
2770 #endif
2771 }
2772
2773
2774 /*------------------------------------------------------------*/
2775 /*--- Stack pointer adjustment ---*/
2776 /*------------------------------------------------------------*/
2777
2778 #ifdef PERF_FAST_STACK
2779 # define MAYBE_USED
2780 #else
2781 # define MAYBE_USED __attribute__((unused))
2782 #endif
2783
2784 /*--------------- adjustment by 4 bytes ---------------*/
2785
2786 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2787 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2788 {
2789 UInt otag = ecu | MC_OKIND_STACK;
2790 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2791 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2792 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2793 } else {
2794 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2795 }
2796 }
2797
2798 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2799 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2800 {
2801 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2802 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2803 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2804 } else {
2805 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2806 }
2807 }
2808
2809 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2810 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2811 {
2812 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2813 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2814 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2815 } else {
2816 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2817 }
2818 }
2819
2820 /*--------------- adjustment by 8 bytes ---------------*/
2821
2822 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2823 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2824 {
2825 UInt otag = ecu | MC_OKIND_STACK;
2826 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2827 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2828 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2829 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2830 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2831 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2832 } else {
2833 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2834 }
2835 }
2836
2837 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2838 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2839 {
2840 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2841 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2842 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2843 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2844 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2845 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2846 } else {
2847 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2848 }
2849 }
2850
2851 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2852 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2853 {
2854 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2855 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2856 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2857 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2858 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2859 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2860 } else {
2861 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2862 }
2863 }
2864
2865 /*--------------- adjustment by 12 bytes ---------------*/
2866
2867 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2868 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2869 {
2870 UInt otag = ecu | MC_OKIND_STACK;
2871 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2872 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2873 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2874 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2875 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2876 /* from previous test we don't have 8-alignment at offset +0,
2877 hence must have 8 alignment at offsets +4/-4. Hence safe to
2878 do 4 at +0 and then 8 at +4/. */
2879 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2880 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2881 } else {
2882 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2883 }
2884 }
2885
2886 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2887 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2888 {
2889 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2890 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2891 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2892 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2893 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2894 /* from previous test we don't have 8-alignment at offset +0,
2895 hence must have 8 alignment at offsets +4/-4. Hence safe to
2896 do 4 at +0 and then 8 at +4/. */
2897 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2898 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2899 } else {
2900 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2901 }
2902 }
2903
2904 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2905 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2906 {
2907 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
2908 /* Note the -12 in the test */
2909 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2910 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2911 -4. */
2912 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2913 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2914 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2915 /* We have 4-alignment at +0, but we don't have 8-alignment at
2916 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2917 and then 8 at -8. */
2918 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2919 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2920 } else {
2921 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2922 }
2923 }
2924
2925 /*--------------- adjustment by 16 bytes ---------------*/
2926
2927 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2928 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2929 {
2930 UInt otag = ecu | MC_OKIND_STACK;
2931 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2932 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2933 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2934 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2936 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2937 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2938 Hence do 4 at +0, 8 at +4, 4 at +12. */
2939 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2941 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2942 } else {
2943 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2944 }
2945 }
2946
2947 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2948 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2949 {
2950 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2951 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2952 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2955 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2956 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2957 Hence do 4 at +0, 8 at +4, 4 at +12. */
2958 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2959 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2960 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2961 } else {
2962 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2963 }
2964 }
2965
2966 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2967 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2968 {
2969 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
2970 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2971 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2972 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2973 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2974 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2975 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2976 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2977 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2978 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2979 } else {
2980 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2981 }
2982 }
2983
2984 /*--------------- adjustment by 32 bytes ---------------*/
2985
2986 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2987 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2988 {
2989 UInt otag = ecu | MC_OKIND_STACK;
2990 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
2991 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2992 /* Straightforward */
2993 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2994 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2996 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2997 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2998 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2999 +0,+28. */
3000 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3001 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3002 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3003 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3004 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3005 } else {
3006 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3007 }
3008 }
3009
3010 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)3011 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3012 {
3013 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3014 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3015 /* Straightforward */
3016 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3017 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3018 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3019 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3020 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3021 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3022 +0,+28. */
3023 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3024 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3025 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3026 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3027 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3028 } else {
3029 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3030 }
3031 }
3032
3033 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3034 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3035 {
3036 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3037 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3038 /* Straightforward */
3039 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3040 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3041 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3042 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3043 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3044 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3045 4 at -32,-4. */
3046 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3047 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3048 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3049 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3050 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3051 } else {
3052 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3053 }
3054 }
3055
3056 /*--------------- adjustment by 112 bytes ---------------*/
3057
3058 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3059 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3060 {
3061 UInt otag = ecu | MC_OKIND_STACK;
3062 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3063 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3064 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3065 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3066 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3067 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3068 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3069 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3070 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3071 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3072 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3073 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3074 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3075 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3076 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3077 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3078 } else {
3079 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3080 }
3081 }
3082
3083 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3084 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3085 {
3086 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3087 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3088 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3089 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3090 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3091 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3092 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3093 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3094 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3095 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3096 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3097 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3098 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3099 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3100 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3101 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3102 } else {
3103 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3104 }
3105 }
3106
3107 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3108 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3109 {
3110 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3111 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3112 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3113 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3114 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3115 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3116 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3117 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3118 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3119 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3120 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3121 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3122 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3123 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3124 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3125 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3126 } else {
3127 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3128 }
3129 }
3130
3131 /*--------------- adjustment by 128 bytes ---------------*/
3132
3133 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3134 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3135 {
3136 UInt otag = ecu | MC_OKIND_STACK;
3137 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3138 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3139 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3140 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3141 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3142 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3143 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3144 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3145 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3146 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3147 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3148 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3149 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3150 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3151 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3152 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3153 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3154 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3155 } else {
3156 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3157 }
3158 }
3159
3160 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3161 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3162 {
3163 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3164 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3165 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3166 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3167 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3168 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3169 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3170 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3171 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3172 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3173 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3174 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3175 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3176 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3177 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3178 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3179 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3180 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3181 } else {
3182 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3183 }
3184 }
3185
3186 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3187 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3188 {
3189 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3190 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3191 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3192 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3193 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3194 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3195 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3196 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3197 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3198 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3199 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3200 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3201 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3202 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3203 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3204 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3205 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3206 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3207 } else {
3208 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3209 }
3210 }
3211
3212 /*--------------- adjustment by 144 bytes ---------------*/
3213
3214 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3215 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3216 {
3217 UInt otag = ecu | MC_OKIND_STACK;
3218 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3219 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3221 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3222 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3223 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3224 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3225 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3227 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3228 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3229 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3230 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3231 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3232 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3233 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3234 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3235 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3236 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3237 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3238 } else {
3239 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3240 }
3241 }
3242
3243 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3244 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3245 {
3246 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3247 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3251 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3252 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3253 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3254 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3255 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3256 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3257 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3258 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3259 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3260 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3261 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3262 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3263 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3264 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3265 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3266 } else {
3267 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3268 }
3269 }
3270
3271 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3272 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3273 {
3274 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3275 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3276 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3277 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3278 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3279 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3280 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3281 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3282 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3283 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3284 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3285 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3286 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3287 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3288 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3289 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3290 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3291 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3292 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3293 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3294 } else {
3295 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3296 }
3297 }
3298
3299 /*--------------- adjustment by 160 bytes ---------------*/
3300
3301 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3302 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3303 {
3304 UInt otag = ecu | MC_OKIND_STACK;
3305 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3306 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3307 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3308 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3309 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3310 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3311 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3312 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3313 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3314 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3315 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3316 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3317 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3318 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3319 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3320 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3321 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3322 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3323 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3324 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3325 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3326 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3327 } else {
3328 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3329 }
3330 }
3331
3332 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3333 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3334 {
3335 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3336 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3337 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3338 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3339 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3340 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3341 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3342 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3343 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3344 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3345 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3346 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3347 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3348 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3349 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3350 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3351 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3352 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3353 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3354 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3355 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3356 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3357 } else {
3358 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3359 }
3360 }
3361
3362 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3363 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3364 {
3365 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3366 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3367 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3368 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3369 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3370 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3371 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3372 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3373 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3374 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3375 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3376 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3377 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3378 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3379 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3380 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3381 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3382 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3383 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3384 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3385 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3386 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3387 } else {
3388 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3389 }
3390 }
3391
3392 /*--------------- adjustment by N bytes ---------------*/
3393
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3394 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3395 {
3396 UInt otag = ecu | MC_OKIND_STACK;
3397 PROF_EVENT(MCPE_NEW_MEM_STACK);
3398 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3399 }
3400
mc_new_mem_stack(Addr a,SizeT len)3401 static void mc_new_mem_stack ( Addr a, SizeT len )
3402 {
3403 PROF_EVENT(MCPE_NEW_MEM_STACK);
3404 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3405 }
3406
mc_die_mem_stack(Addr a,SizeT len)3407 static void mc_die_mem_stack ( Addr a, SizeT len )
3408 {
3409 PROF_EVENT(MCPE_DIE_MEM_STACK);
3410 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3411 }
3412
3413
3414 /* The AMD64 ABI says:
3415
3416 "The 128-byte area beyond the location pointed to by %rsp is considered
3417 to be reserved and shall not be modified by signal or interrupt
3418 handlers. Therefore, functions may use this area for temporary data
3419 that is not needed across function calls. In particular, leaf functions
3420 may use this area for their entire stack frame, rather than adjusting
3421 the stack pointer in the prologue and epilogue. This area is known as
3422 red zone [sic]."
3423
3424 So after any call or return we need to mark this redzone as containing
3425 undefined values.
3426
3427 Consider this: we're in function f. f calls g. g moves rsp down
3428 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3429 defined. g returns. f is buggy and reads from parts of the red zone
3430 that it didn't write on. But because g filled that area in, f is going
3431 to be picking up defined V bits and so any errors from reading bits of
3432 the red zone it didn't write, will be missed. The only solution I could
3433 think of was to make the red zone undefined when g returns to f.
3434
3435 This is in accordance with the ABI, which makes it clear the redzone
3436 is volatile across function calls.
3437
3438 The problem occurs the other way round too: f could fill the RZ up
3439 with defined values and g could mistakenly read them. So the RZ
3440 also needs to be nuked on function calls.
3441 */
3442
3443
3444 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3445 improved so as to have a lower miss rate. */
3446
3447 static UWord stats__nia_cache_queries = 0;
3448 static UWord stats__nia_cache_misses = 0;
3449
3450 typedef
3451 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3452 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3453 WCacheEnt;
3454
3455 #define N_NIA_TO_ECU_CACHE 511
3456
3457 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3458
init_nia_to_ecu_cache(void)3459 static void init_nia_to_ecu_cache ( void )
3460 {
3461 UWord i;
3462 Addr zero_addr = 0;
3463 ExeContext* zero_ec;
3464 UInt zero_ecu;
3465 /* Fill all the slots with an entry for address zero, and the
3466 relevant otags accordingly. Hence the cache is initially filled
3467 with valid data. */
3468 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3469 tl_assert(zero_ec);
3470 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3471 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3472 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3473 nia_to_ecu_cache[i].nia0 = zero_addr;
3474 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3475 nia_to_ecu_cache[i].nia1 = zero_addr;
3476 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3477 }
3478 }
3479
convert_nia_to_ecu(Addr nia)3480 static inline UInt convert_nia_to_ecu ( Addr nia )
3481 {
3482 UWord i;
3483 UInt ecu;
3484 ExeContext* ec;
3485
3486 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3487
3488 stats__nia_cache_queries++;
3489 i = nia % N_NIA_TO_ECU_CACHE;
3490 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3491
3492 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3493 return nia_to_ecu_cache[i].ecu0;
3494
3495 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3496 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3497 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3498 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3499 # undef SWAP
3500 return nia_to_ecu_cache[i].ecu0;
3501 }
3502
3503 stats__nia_cache_misses++;
3504 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3505 tl_assert(ec);
3506 ecu = VG_(get_ECU_from_ExeContext)(ec);
3507 tl_assert(VG_(is_plausible_ECU)(ecu));
3508
3509 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3510 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3511
3512 nia_to_ecu_cache[i].nia0 = nia;
3513 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3514 return ecu;
3515 }
3516
3517
3518 /* Note that this serves both the origin-tracking and
3519 no-origin-tracking modes. We assume that calls to it are
3520 sufficiently infrequent that it isn't worth specialising for the
3521 with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3522 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3523 {
3524 UInt otag;
3525 tl_assert(sizeof(UWord) == sizeof(SizeT));
3526 if (0)
3527 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3528 base, len, nia );
3529
3530 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3531 UInt ecu = convert_nia_to_ecu ( nia );
3532 tl_assert(VG_(is_plausible_ECU)(ecu));
3533 otag = ecu | MC_OKIND_STACK;
3534 } else {
3535 tl_assert(nia == 0);
3536 otag = 0;
3537 }
3538
3539 # if 0
3540 /* Really slow version */
3541 MC_(make_mem_undefined)(base, len, otag);
3542 # endif
3543
3544 # if 0
3545 /* Slow(ish) version, which is fairly easily seen to be correct.
3546 */
3547 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3548 make_aligned_word64_undefined(base + 0, otag);
3549 make_aligned_word64_undefined(base + 8, otag);
3550 make_aligned_word64_undefined(base + 16, otag);
3551 make_aligned_word64_undefined(base + 24, otag);
3552
3553 make_aligned_word64_undefined(base + 32, otag);
3554 make_aligned_word64_undefined(base + 40, otag);
3555 make_aligned_word64_undefined(base + 48, otag);
3556 make_aligned_word64_undefined(base + 56, otag);
3557
3558 make_aligned_word64_undefined(base + 64, otag);
3559 make_aligned_word64_undefined(base + 72, otag);
3560 make_aligned_word64_undefined(base + 80, otag);
3561 make_aligned_word64_undefined(base + 88, otag);
3562
3563 make_aligned_word64_undefined(base + 96, otag);
3564 make_aligned_word64_undefined(base + 104, otag);
3565 make_aligned_word64_undefined(base + 112, otag);
3566 make_aligned_word64_undefined(base + 120, otag);
3567 } else {
3568 MC_(make_mem_undefined)(base, len, otag);
3569 }
3570 # endif
3571
3572 /* Idea is: go fast when
3573 * 8-aligned and length is 128
3574 * the sm is available in the main primary map
3575 * the address range falls entirely with a single secondary map
3576 If all those conditions hold, just update the V+A bits by writing
3577 directly into the vabits array. (If the sm was distinguished, this
3578 will make a copy and then write to it.)
3579 */
3580
3581 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3582 /* Now we know the address range is suitably sized and aligned. */
3583 UWord a_lo = (UWord)(base);
3584 UWord a_hi = (UWord)(base + 128 - 1);
3585 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3586 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3587 // Now we know the entire range is within the main primary map.
3588 SecMap* sm = get_secmap_for_writing_low(a_lo);
3589 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3590 /* Now we know that the entire address range falls within a
3591 single secondary map, and that that secondary 'lives' in
3592 the main primary map. */
3593 if (LIKELY(sm == sm_hi)) {
3594 // Finally, we know that the range is entirely within one secmap.
3595 UWord v_off = SM_OFF(a_lo);
3596 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3597 p[ 0] = VA_BITS16_UNDEFINED;
3598 p[ 1] = VA_BITS16_UNDEFINED;
3599 p[ 2] = VA_BITS16_UNDEFINED;
3600 p[ 3] = VA_BITS16_UNDEFINED;
3601 p[ 4] = VA_BITS16_UNDEFINED;
3602 p[ 5] = VA_BITS16_UNDEFINED;
3603 p[ 6] = VA_BITS16_UNDEFINED;
3604 p[ 7] = VA_BITS16_UNDEFINED;
3605 p[ 8] = VA_BITS16_UNDEFINED;
3606 p[ 9] = VA_BITS16_UNDEFINED;
3607 p[10] = VA_BITS16_UNDEFINED;
3608 p[11] = VA_BITS16_UNDEFINED;
3609 p[12] = VA_BITS16_UNDEFINED;
3610 p[13] = VA_BITS16_UNDEFINED;
3611 p[14] = VA_BITS16_UNDEFINED;
3612 p[15] = VA_BITS16_UNDEFINED;
3613 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3614 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3615 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3616 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3617 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3618 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3619 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3620 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3621 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3622 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3623 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3624 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3625 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3626 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3627 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3628 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3629 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3630 }
3631 return;
3632 }
3633 }
3634 }
3635
3636 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3637 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3638 /* Now we know the address range is suitably sized and aligned. */
3639 UWord a_lo = (UWord)(base);
3640 UWord a_hi = (UWord)(base + 288 - 1);
3641 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3642 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3643 // Now we know the entire range is within the main primary map.
3644 SecMap* sm = get_secmap_for_writing_low(a_lo);
3645 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3646 /* Now we know that the entire address range falls within a
3647 single secondary map, and that that secondary 'lives' in
3648 the main primary map. */
3649 if (LIKELY(sm == sm_hi)) {
3650 // Finally, we know that the range is entirely within one secmap.
3651 UWord v_off = SM_OFF(a_lo);
3652 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3653 p[ 0] = VA_BITS16_UNDEFINED;
3654 p[ 1] = VA_BITS16_UNDEFINED;
3655 p[ 2] = VA_BITS16_UNDEFINED;
3656 p[ 3] = VA_BITS16_UNDEFINED;
3657 p[ 4] = VA_BITS16_UNDEFINED;
3658 p[ 5] = VA_BITS16_UNDEFINED;
3659 p[ 6] = VA_BITS16_UNDEFINED;
3660 p[ 7] = VA_BITS16_UNDEFINED;
3661 p[ 8] = VA_BITS16_UNDEFINED;
3662 p[ 9] = VA_BITS16_UNDEFINED;
3663 p[10] = VA_BITS16_UNDEFINED;
3664 p[11] = VA_BITS16_UNDEFINED;
3665 p[12] = VA_BITS16_UNDEFINED;
3666 p[13] = VA_BITS16_UNDEFINED;
3667 p[14] = VA_BITS16_UNDEFINED;
3668 p[15] = VA_BITS16_UNDEFINED;
3669 p[16] = VA_BITS16_UNDEFINED;
3670 p[17] = VA_BITS16_UNDEFINED;
3671 p[18] = VA_BITS16_UNDEFINED;
3672 p[19] = VA_BITS16_UNDEFINED;
3673 p[20] = VA_BITS16_UNDEFINED;
3674 p[21] = VA_BITS16_UNDEFINED;
3675 p[22] = VA_BITS16_UNDEFINED;
3676 p[23] = VA_BITS16_UNDEFINED;
3677 p[24] = VA_BITS16_UNDEFINED;
3678 p[25] = VA_BITS16_UNDEFINED;
3679 p[26] = VA_BITS16_UNDEFINED;
3680 p[27] = VA_BITS16_UNDEFINED;
3681 p[28] = VA_BITS16_UNDEFINED;
3682 p[29] = VA_BITS16_UNDEFINED;
3683 p[30] = VA_BITS16_UNDEFINED;
3684 p[31] = VA_BITS16_UNDEFINED;
3685 p[32] = VA_BITS16_UNDEFINED;
3686 p[33] = VA_BITS16_UNDEFINED;
3687 p[34] = VA_BITS16_UNDEFINED;
3688 p[35] = VA_BITS16_UNDEFINED;
3689 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3690 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3691 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3692 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3693 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3694 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3695 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3696 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3697 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3698 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3699 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3700 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3701 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3702 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3703 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3704 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3705 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3706 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3707 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3708 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3709 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3710 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3711 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3712 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3713 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3714 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3715 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3716 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3717 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3718 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3719 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3720 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3721 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3722 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3723 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3724 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3725 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3726 }
3727 return;
3728 }
3729 }
3730 }
3731
3732 /* else fall into slow case */
3733 MC_(make_mem_undefined_w_otag)(base, len, otag);
3734 }
3735
3736
3737 /*------------------------------------------------------------*/
3738 /*--- Checking memory ---*/
3739 /*------------------------------------------------------------*/
3740
3741 typedef
3742 enum {
3743 MC_Ok = 5,
3744 MC_AddrErr = 6,
3745 MC_ValueErr = 7
3746 }
3747 MC_ReadResult;
3748
3749
3750 /* Check permissions for address range. If inadequate permissions
3751 exist, *bad_addr is set to the offending address, so the caller can
3752 know what it is. */
3753
3754 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3755 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3756 indicate the lowest failing address. Functions below are
3757 similar. */
MC_(check_mem_is_noaccess)3758 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3759 {
3760 SizeT i;
3761 UWord vabits2;
3762
3763 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
3764 for (i = 0; i < len; i++) {
3765 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
3766 vabits2 = get_vabits2(a);
3767 if (VA_BITS2_NOACCESS != vabits2) {
3768 if (bad_addr != NULL) *bad_addr = a;
3769 return False;
3770 }
3771 a++;
3772 }
3773 return True;
3774 }
3775
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3776 static Bool is_mem_addressable ( Addr a, SizeT len,
3777 /*OUT*/Addr* bad_addr )
3778 {
3779 SizeT i;
3780 UWord vabits2;
3781
3782 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
3783 for (i = 0; i < len; i++) {
3784 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
3785 vabits2 = get_vabits2(a);
3786 if (VA_BITS2_NOACCESS == vabits2) {
3787 if (bad_addr != NULL) *bad_addr = a;
3788 return False;
3789 }
3790 a++;
3791 }
3792 return True;
3793 }
3794
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3795 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3796 /*OUT*/Addr* bad_addr,
3797 /*OUT*/UInt* otag )
3798 {
3799 SizeT i;
3800 UWord vabits2;
3801
3802 PROF_EVENT(MCPE_IS_MEM_DEFINED);
3803 DEBUG("is_mem_defined\n");
3804
3805 if (otag) *otag = 0;
3806 if (bad_addr) *bad_addr = 0;
3807 for (i = 0; i < len; i++) {
3808 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
3809 vabits2 = get_vabits2(a);
3810 if (VA_BITS2_DEFINED != vabits2) {
3811 // Error! Nb: Report addressability errors in preference to
3812 // definedness errors. And don't report definedeness errors unless
3813 // --undef-value-errors=yes.
3814 if (bad_addr) {
3815 *bad_addr = a;
3816 }
3817 if (VA_BITS2_NOACCESS == vabits2) {
3818 return MC_AddrErr;
3819 }
3820 if (MC_(clo_mc_level) >= 2) {
3821 if (otag && MC_(clo_mc_level) == 3) {
3822 *otag = MC_(helperc_b_load1)( a );
3823 }
3824 return MC_ValueErr;
3825 }
3826 }
3827 a++;
3828 }
3829 return MC_Ok;
3830 }
3831
3832
3833 /* Like is_mem_defined but doesn't give up at the first uninitialised
3834 byte -- the entire range is always checked. This is important for
3835 detecting errors in the case where a checked range strays into
3836 invalid memory, but that fact is not detected by the ordinary
3837 is_mem_defined(), because of an undefined section that precedes the
3838 out of range section, possibly as a result of an alignment hole in
3839 the checked data. This version always checks the entire range and
3840 can report both a definedness and an accessbility error, if
3841 necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3842 static void is_mem_defined_comprehensive (
3843 Addr a, SizeT len,
3844 /*OUT*/Bool* errorV, /* is there a definedness err? */
3845 /*OUT*/Addr* bad_addrV, /* if so where? */
3846 /*OUT*/UInt* otagV, /* and what's its otag? */
3847 /*OUT*/Bool* errorA, /* is there an addressability err? */
3848 /*OUT*/Addr* bad_addrA /* if so where? */
3849 )
3850 {
3851 SizeT i;
3852 UWord vabits2;
3853 Bool already_saw_errV = False;
3854
3855 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
3856 DEBUG("is_mem_defined_comprehensive\n");
3857
3858 tl_assert(!(*errorV || *errorA));
3859
3860 for (i = 0; i < len; i++) {
3861 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
3862 vabits2 = get_vabits2(a);
3863 switch (vabits2) {
3864 case VA_BITS2_DEFINED:
3865 a++;
3866 break;
3867 case VA_BITS2_UNDEFINED:
3868 case VA_BITS2_PARTDEFINED:
3869 if (!already_saw_errV) {
3870 *errorV = True;
3871 *bad_addrV = a;
3872 if (MC_(clo_mc_level) == 3) {
3873 *otagV = MC_(helperc_b_load1)( a );
3874 } else {
3875 *otagV = 0;
3876 }
3877 already_saw_errV = True;
3878 }
3879 a++; /* keep going */
3880 break;
3881 case VA_BITS2_NOACCESS:
3882 *errorA = True;
3883 *bad_addrA = a;
3884 return; /* give up now. */
3885 default:
3886 tl_assert(0);
3887 }
3888 }
3889 }
3890
3891
3892 /* Check a zero-terminated ascii string. Tricky -- don't want to
3893 examine the actual bytes, to find the end, until we're sure it is
3894 safe to do so. */
3895
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3896 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3897 {
3898 UWord vabits2;
3899
3900 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
3901 DEBUG("mc_is_defined_asciiz\n");
3902
3903 if (otag) *otag = 0;
3904 if (bad_addr) *bad_addr = 0;
3905 while (True) {
3906 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
3907 vabits2 = get_vabits2(a);
3908 if (VA_BITS2_DEFINED != vabits2) {
3909 // Error! Nb: Report addressability errors in preference to
3910 // definedness errors. And don't report definedeness errors unless
3911 // --undef-value-errors=yes.
3912 if (bad_addr) {
3913 *bad_addr = a;
3914 }
3915 if (VA_BITS2_NOACCESS == vabits2) {
3916 return MC_AddrErr;
3917 }
3918 if (MC_(clo_mc_level) >= 2) {
3919 if (otag && MC_(clo_mc_level) == 3) {
3920 *otag = MC_(helperc_b_load1)( a );
3921 }
3922 return MC_ValueErr;
3923 }
3924 }
3925 /* Ok, a is safe to read. */
3926 if (* ((UChar*)a) == 0) {
3927 return MC_Ok;
3928 }
3929 a++;
3930 }
3931 }
3932
3933
3934 /*------------------------------------------------------------*/
3935 /*--- Memory event handlers ---*/
3936 /*------------------------------------------------------------*/
3937
3938 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3939 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3940 Addr base, SizeT size )
3941 {
3942 Addr bad_addr;
3943 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3944
3945 if (!ok) {
3946 switch (part) {
3947 case Vg_CoreSysCall:
3948 MC_(record_memparam_error) ( tid, bad_addr,
3949 /*isAddrErr*/True, s, 0/*otag*/ );
3950 break;
3951
3952 case Vg_CoreSignal:
3953 MC_(record_core_mem_error)( tid, s );
3954 break;
3955
3956 default:
3957 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3958 }
3959 }
3960 }
3961
3962 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3963 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3964 Addr base, SizeT size )
3965 {
3966 UInt otag = 0;
3967 Addr bad_addr;
3968 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3969
3970 if (MC_Ok != res) {
3971 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3972
3973 switch (part) {
3974 case Vg_CoreSysCall:
3975 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3976 isAddrErr ? 0 : otag );
3977 break;
3978
3979 case Vg_CoreSysCallArgInMem:
3980 MC_(record_regparam_error) ( tid, s, otag );
3981 break;
3982
3983 /* If we're being asked to jump to a silly address, record an error
3984 message before potentially crashing the entire system. */
3985 case Vg_CoreTranslate:
3986 MC_(record_jump_error)( tid, bad_addr );
3987 break;
3988
3989 default:
3990 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3991 }
3992 }
3993 }
3994
3995 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)3996 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3997 const HChar* s, Addr str )
3998 {
3999 MC_ReadResult res;
4000 Addr bad_addr = 0; // shut GCC up
4001 UInt otag = 0;
4002
4003 tl_assert(part == Vg_CoreSysCall);
4004 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4005 if (MC_Ok != res) {
4006 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4007 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4008 isAddrErr ? 0 : otag );
4009 }
4010 }
4011
4012 /* Handling of mmap and mprotect is not as simple as it seems.
4013
4014 The underlying semantics are that memory obtained from mmap is
4015 always initialised, but may be inaccessible. And changes to the
4016 protection of memory do not change its contents and hence not its
4017 definedness state. Problem is we can't model
4018 inaccessible-but-with-some-definedness state; once we mark memory
4019 as inaccessible we lose all info about definedness, and so can't
4020 restore that if it is later made accessible again.
4021
4022 One obvious thing to do is this:
4023
4024 mmap/mprotect NONE -> noaccess
4025 mmap/mprotect other -> defined
4026
4027 The problem case here is: taking accessible memory, writing
4028 uninitialised data to it, mprotecting it NONE and later mprotecting
4029 it back to some accessible state causes the undefinedness to be
4030 lost.
4031
4032 A better proposal is:
4033
4034 (1) mmap NONE -> make noaccess
4035 (2) mmap other -> make defined
4036
4037 (3) mprotect NONE -> # no change
4038 (4) mprotect other -> change any "noaccess" to "defined"
4039
4040 (2) is OK because memory newly obtained from mmap really is defined
4041 (zeroed out by the kernel -- doing anything else would
4042 constitute a massive security hole.)
4043
4044 (1) is OK because the only way to make the memory usable is via
4045 (4), in which case we also wind up correctly marking it all as
4046 defined.
4047
4048 (3) is the weak case. We choose not to change memory state.
4049 (presumably the range is in some mixture of "defined" and
4050 "undefined", viz, accessible but with arbitrary V bits). Doing
4051 nothing means we retain the V bits, so that if the memory is
4052 later mprotected "other", the V bits remain unchanged, so there
4053 can be no false negatives. The bad effect is that if there's
4054 an access in the area, then MC cannot warn; but at least we'll
4055 get a SEGV to show, so it's better than nothing.
4056
4057 Consider the sequence (3) followed by (4). Any memory that was
4058 "defined" or "undefined" previously retains its state (as
4059 required). Any memory that was "noaccess" before can only have
4060 been made that way by (1), and so it's OK to change it to
4061 "defined".
4062
4063 See https://bugs.kde.org/show_bug.cgi?id=205541
4064 and https://bugs.kde.org/show_bug.cgi?id=210268
4065 */
4066 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4067 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4068 ULong di_handle )
4069 {
4070 if (rr || ww || xx) {
4071 /* (2) mmap/mprotect other -> defined */
4072 MC_(make_mem_defined)(a, len);
4073 } else {
4074 /* (1) mmap/mprotect NONE -> noaccess */
4075 MC_(make_mem_noaccess)(a, len);
4076 }
4077 }
4078
4079 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4080 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4081 {
4082 if (rr || ww || xx) {
4083 /* (4) mprotect other -> change any "noaccess" to "defined" */
4084 make_mem_defined_if_noaccess(a, len);
4085 } else {
4086 /* (3) mprotect NONE -> # no change */
4087 /* do nothing */
4088 }
4089 }
4090
4091
4092 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4093 void mc_new_mem_startup( Addr a, SizeT len,
4094 Bool rr, Bool ww, Bool xx, ULong di_handle )
4095 {
4096 // Because code is defined, initialised variables get put in the data
4097 // segment and are defined, and uninitialised variables get put in the
4098 // bss segment and are auto-zeroed (and so defined).
4099 //
4100 // It's possible that there will be padding between global variables.
4101 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4102 // a program uses it, Memcheck will not complain. This is arguably a
4103 // false negative, but it's a grey area -- the behaviour is defined (the
4104 // padding is zeroed) but it's probably not what the user intended. And
4105 // we can't avoid it.
4106 //
4107 // Note: we generally ignore RWX permissions, because we can't track them
4108 // without requiring more than one A bit which would slow things down a
4109 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4110 // So we mark any such pages as "unaddressable".
4111 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4112 a, (ULong)len, rr, ww, xx);
4113 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4114 }
4115
4116 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4117 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4118 {
4119 MC_(make_mem_defined)(a, len);
4120 }
4121
4122
4123 /*------------------------------------------------------------*/
4124 /*--- Register event handlers ---*/
4125 /*------------------------------------------------------------*/
4126
4127 /* Try and get a nonzero origin for the guest state section of thread
4128 tid characterised by (offset,size). Return 0 if nothing to show
4129 for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4130 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4131 Int offset, SizeT size )
4132 {
4133 Int sh2off;
4134 UInt area[3];
4135 UInt otag;
4136 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4137 if (sh2off == -1)
4138 return 0; /* This piece of guest state is not tracked */
4139 tl_assert(sh2off >= 0);
4140 tl_assert(0 == (sh2off % 4));
4141 area[0] = 0x31313131;
4142 area[2] = 0x27272727;
4143 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4144 tl_assert(area[0] == 0x31313131);
4145 tl_assert(area[2] == 0x27272727);
4146 otag = area[1];
4147 return otag;
4148 }
4149
4150
4151 /* When some chunk of guest state is written, mark the corresponding
4152 shadow area as valid. This is used to initialise arbitrarily large
4153 chunks of guest state, hence the _SIZE value, which has to be as
4154 big as the biggest guest state.
4155 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4156 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4157 PtrdiffT offset, SizeT size)
4158 {
4159 # define MAX_REG_WRITE_SIZE 1712
4160 UChar area[MAX_REG_WRITE_SIZE];
4161 tl_assert(size <= MAX_REG_WRITE_SIZE);
4162 VG_(memset)(area, V_BITS8_DEFINED, size);
4163 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4164 # undef MAX_REG_WRITE_SIZE
4165 }
4166
4167 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4168 void mc_post_reg_write_clientcall ( ThreadId tid,
4169 PtrdiffT offset, SizeT size, Addr f)
4170 {
4171 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4172 }
4173
4174 /* Look at the definedness of the guest's shadow state for
4175 [offset, offset+len). If any part of that is undefined, record
4176 a parameter error.
4177 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4178 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4179 PtrdiffT offset, SizeT size)
4180 {
4181 Int i;
4182 Bool bad;
4183 UInt otag;
4184
4185 UChar area[16];
4186 tl_assert(size <= 16);
4187
4188 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4189
4190 bad = False;
4191 for (i = 0; i < size; i++) {
4192 if (area[i] != V_BITS8_DEFINED) {
4193 bad = True;
4194 break;
4195 }
4196 }
4197
4198 if (!bad)
4199 return;
4200
4201 /* We've found some undefinedness. See if we can also find an
4202 origin for it. */
4203 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4204 MC_(record_regparam_error) ( tid, s, otag );
4205 }
4206
4207
4208 /*------------------------------------------------------------*/
4209 /*--- Register-memory event handlers ---*/
4210 /*------------------------------------------------------------*/
4211
mc_copy_mem_to_reg(CorePart part,ThreadId tid,Addr a,PtrdiffT guest_state_offset,SizeT size)4212 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4213 PtrdiffT guest_state_offset, SizeT size )
4214 {
4215 SizeT i;
4216 UChar vbits8;
4217 Int offset;
4218 UInt d32;
4219
4220 /* Slow loop. */
4221 for (i = 0; i < size; i++) {
4222 get_vbits8( a+i, &vbits8 );
4223 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4224 1, &vbits8 );
4225 }
4226
4227 if (MC_(clo_mc_level) != 3)
4228 return;
4229
4230 /* Track origins. */
4231 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4232 if (offset == -1)
4233 return;
4234
4235 switch (size) {
4236 case 1:
4237 d32 = MC_(helperc_b_load1)( a );
4238 break;
4239 case 2:
4240 d32 = MC_(helperc_b_load2)( a );
4241 break;
4242 case 4:
4243 d32 = MC_(helperc_b_load4)( a );
4244 break;
4245 case 8:
4246 d32 = MC_(helperc_b_load8)( a );
4247 break;
4248 case 16:
4249 d32 = MC_(helperc_b_load16)( a );
4250 break;
4251 case 32:
4252 d32 = MC_(helperc_b_load32)( a );
4253 break;
4254 default:
4255 tl_assert(0);
4256 }
4257
4258 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4259 }
4260
mc_copy_reg_to_mem(CorePart part,ThreadId tid,PtrdiffT guest_state_offset,Addr a,SizeT size)4261 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4262 PtrdiffT guest_state_offset, Addr a,
4263 SizeT size )
4264 {
4265 SizeT i;
4266 UChar vbits8;
4267 Int offset;
4268 UInt d32;
4269
4270 /* Slow loop. */
4271 for (i = 0; i < size; i++) {
4272 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4273 guest_state_offset+i, 1 );
4274 set_vbits8( a+i, vbits8 );
4275 }
4276
4277 if (MC_(clo_mc_level) != 3)
4278 return;
4279
4280 /* Track origins. */
4281 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4282 if (offset == -1)
4283 return;
4284
4285 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4286 switch (size) {
4287 case 1:
4288 MC_(helperc_b_store1)( a, d32 );
4289 break;
4290 case 2:
4291 MC_(helperc_b_store2)( a, d32 );
4292 break;
4293 case 4:
4294 MC_(helperc_b_store4)( a, d32 );
4295 break;
4296 case 8:
4297 MC_(helperc_b_store8)( a, d32 );
4298 break;
4299 case 16:
4300 MC_(helperc_b_store16)( a, d32 );
4301 break;
4302 case 32:
4303 MC_(helperc_b_store32)( a, d32 );
4304 break;
4305 default:
4306 tl_assert(0);
4307 }
4308 }
4309
4310
4311 /*------------------------------------------------------------*/
4312 /*--- Some static assertions ---*/
4313 /*------------------------------------------------------------*/
4314
4315 /* The handwritten assembly helpers below have baked-in assumptions
4316 about various constant values. These assertions attempt to make
4317 that a bit safer by checking those values and flagging changes that
4318 would make the assembly invalid. Not perfect but it's better than
4319 nothing. */
4320
4321 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4322
4323 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4324 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4325
4326 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4327 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4328
4329 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4330 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4331
4332 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4333 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4334
4335 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4336 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4337
4338 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4339 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4340
4341
4342 /*------------------------------------------------------------*/
4343 /*--- Functions called directly from generated code: ---*/
4344 /*--- Load/store handlers. ---*/
4345 /*------------------------------------------------------------*/
4346
4347 /* Types: LOADV32, LOADV16, LOADV8 are:
4348 UWord fn ( Addr a )
4349 so they return 32-bits on 32-bit machines and 64-bits on
4350 64-bit machines. Addr has the same size as a host word.
4351
4352 LOADV64 is always ULong fn ( Addr a )
4353
4354 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4355 are a UWord, and for STOREV64 they are a ULong.
4356 */
4357
4358 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4359 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4360 primary map. This is all very tricky (and important!), so let's
4361 work through the maths by hand (below), *and* assert for these
4362 values at startup. */
4363 #define MASK(_szInBytes) \
4364 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4365
4366 /* MASK only exists so as to define this macro. */
4367 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4368 ((_a) & MASK((_szInBits>>3)))
4369
4370 /* On a 32-bit machine:
4371
4372 N_PRIMARY_BITS == 16, so
4373 N_PRIMARY_MAP == 0x10000, so
4374 N_PRIMARY_MAP-1 == 0xFFFF, so
4375 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4376
4377 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4378 = ~ ( 0xFFFF | 0xFFFF0000 )
4379 = ~ 0xFFFF'FFFF
4380 = 0
4381
4382 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4383 = ~ ( 0xFFFE | 0xFFFF0000 )
4384 = ~ 0xFFFF'FFFE
4385 = 1
4386
4387 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4388 = ~ ( 0xFFFC | 0xFFFF0000 )
4389 = ~ 0xFFFF'FFFC
4390 = 3
4391
4392 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4393 = ~ ( 0xFFF8 | 0xFFFF0000 )
4394 = ~ 0xFFFF'FFF8
4395 = 7
4396
4397 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4398 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4399 the 1-byte alignment case, it is always a zero value, since MASK(1)
4400 is zero. All as expected.
4401
4402 On a 64-bit machine, it's more complex, since we're testing
4403 simultaneously for misalignment and for the address being at or
4404 above 64G:
4405
4406 N_PRIMARY_BITS == 20, so
4407 N_PRIMARY_MAP == 0x100000, so
4408 N_PRIMARY_MAP-1 == 0xFFFFF, so
4409 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4410
4411 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4412 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4413 = ~ 0xF'FFFF'FFFF
4414 = 0xFFFF'FFF0'0000'0000
4415
4416 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4417 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4418 = ~ 0xF'FFFF'FFFE
4419 = 0xFFFF'FFF0'0000'0001
4420
4421 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4422 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4423 = ~ 0xF'FFFF'FFFC
4424 = 0xFFFF'FFF0'0000'0003
4425
4426 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4427 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4428 = ~ 0xF'FFFF'FFF8
4429 = 0xFFFF'FFF0'0000'0007
4430 */
4431
4432 /*------------------------------------------------------------*/
4433 /*--- LOADV256 and LOADV128 ---*/
4434 /*------------------------------------------------------------*/
4435
4436 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4437 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4438 Addr a, SizeT nBits, Bool isBigEndian )
4439 {
4440 PROF_EVENT(MCPE_LOADV_128_OR_256);
4441
4442 #ifndef PERF_FAST_LOADV
4443 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4444 return;
4445 #else
4446 {
4447 UWord sm_off16, vabits16, j;
4448 UWord nBytes = nBits / 8;
4449 UWord nULongs = nBytes / 8;
4450 SecMap* sm;
4451
4452 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4453 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4454 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4455 return;
4456 }
4457
4458 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4459 suitably aligned, is mapped, and addressible. */
4460 for (j = 0; j < nULongs; j++) {
4461 sm = get_secmap_for_reading_low(a + 8*j);
4462 sm_off16 = SM_OFF_16(a + 8*j);
4463 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4464
4465 // Convert V bits from compact memory form to expanded
4466 // register form.
4467 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4468 res[j] = V_BITS64_DEFINED;
4469 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4470 res[j] = V_BITS64_UNDEFINED;
4471 } else {
4472 /* Slow case: some block of 8 bytes are not all-defined or
4473 all-undefined. */
4474 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4475 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4476 return;
4477 }
4478 }
4479 return;
4480 }
4481 #endif
4482 }
4483
MC_(helperc_LOADV256be)4484 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4485 {
4486 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4487 }
MC_(helperc_LOADV256le)4488 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4489 {
4490 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4491 }
4492
MC_(helperc_LOADV128be)4493 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4494 {
4495 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4496 }
MC_(helperc_LOADV128le)4497 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4498 {
4499 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4500 }
4501
4502 /*------------------------------------------------------------*/
4503 /*--- LOADV64 ---*/
4504 /*------------------------------------------------------------*/
4505
4506 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4507 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4508 {
4509 PROF_EVENT(MCPE_LOADV64);
4510
4511 #ifndef PERF_FAST_LOADV
4512 return mc_LOADVn_slow( a, 64, isBigEndian );
4513 #else
4514 {
4515 UWord sm_off16, vabits16;
4516 SecMap* sm;
4517
4518 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4519 PROF_EVENT(MCPE_LOADV64_SLOW1);
4520 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4521 }
4522
4523 sm = get_secmap_for_reading_low(a);
4524 sm_off16 = SM_OFF_16(a);
4525 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4526
4527 // Handle common case quickly: a is suitably aligned, is mapped, and
4528 // addressible.
4529 // Convert V bits from compact memory form to expanded register form.
4530 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4531 return V_BITS64_DEFINED;
4532 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4533 return V_BITS64_UNDEFINED;
4534 } else {
4535 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4536 PROF_EVENT(MCPE_LOADV64_SLOW2);
4537 return mc_LOADVn_slow( a, 64, isBigEndian );
4538 }
4539 }
4540 #endif
4541 }
4542
4543 // Generic for all platforms
MC_(helperc_LOADV64be)4544 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4545 {
4546 return mc_LOADV64(a, True);
4547 }
4548
4549 // Non-generic assembly for arm32-linux
4550 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4551 && defined(VGP_arm_linux)
4552 __asm__( /* Derived from the 32 bit assembly helper */
4553 ".text \n"
4554 ".align 2 \n"
4555 ".global vgMemCheck_helperc_LOADV64le \n"
4556 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4557 "vgMemCheck_helperc_LOADV64le: \n"
4558 " tst r0, #7 \n"
4559 " movw r3, #:lower16:primary_map \n"
4560 " bne .LLV64LEc4 \n" // if misaligned
4561 " lsr r2, r0, #16 \n"
4562 " movt r3, #:upper16:primary_map \n"
4563 " ldr r2, [r3, r2, lsl #2] \n"
4564 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4565 " movw r3, #0xAAAA \n"
4566 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4567 " ldrh r1, [r2, r1] \n"
4568 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4569 " bne .LLV64LEc0 \n" // if !all_defined
4570 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4571 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4572 " bx lr \n"
4573 ".LLV64LEc0: \n"
4574 " movw r3, #0x5555 \n"
4575 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4576 " bne .LLV64LEc4 \n" // if !all_undefined
4577 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4578 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4579 " bx lr \n"
4580 ".LLV64LEc4: \n"
4581 " push {r4, lr} \n"
4582 " mov r2, #0 \n"
4583 " mov r1, #64 \n"
4584 " bl mc_LOADVn_slow \n"
4585 " pop {r4, pc} \n"
4586 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4587 ".previous\n"
4588 );
4589
4590 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4591 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4592 __asm__(
4593 ".text\n"
4594 ".align 16\n"
4595 ".global vgMemCheck_helperc_LOADV64le\n"
4596 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4597 "vgMemCheck_helperc_LOADV64le:\n"
4598 " test $0x7, %eax\n"
4599 " jne .LLV64LE2\n" /* jump if not aligned */
4600 " mov %eax, %ecx\n"
4601 " movzwl %ax, %edx\n"
4602 " shr $0x10, %ecx\n"
4603 " mov primary_map(,%ecx,4), %ecx\n"
4604 " shr $0x3, %edx\n"
4605 " movzwl (%ecx,%edx,2), %edx\n"
4606 " cmp $0xaaaa, %edx\n"
4607 " jne .LLV64LE1\n" /* jump if not all defined */
4608 " xor %eax, %eax\n" /* return 0 in edx:eax */
4609 " xor %edx, %edx\n"
4610 " ret\n"
4611 ".LLV64LE1:\n"
4612 " cmp $0x5555, %edx\n"
4613 " jne .LLV64LE2\n" /* jump if not all undefined */
4614 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4615 " or $0xffffffff, %edx\n"
4616 " ret\n"
4617 ".LLV64LE2:\n"
4618 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4619 " mov $64, %edx\n"
4620 " jmp mc_LOADVn_slow\n"
4621 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4622 ".previous\n"
4623 );
4624
4625 #else
4626 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV64le)4627 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4628 {
4629 return mc_LOADV64(a, False);
4630 }
4631 #endif
4632
4633 /*------------------------------------------------------------*/
4634 /*--- STOREV64 ---*/
4635 /*------------------------------------------------------------*/
4636
4637 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4638 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4639 {
4640 PROF_EVENT(MCPE_STOREV64);
4641
4642 #ifndef PERF_FAST_STOREV
4643 // XXX: this slow case seems to be marginally faster than the fast case!
4644 // Investigate further.
4645 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4646 #else
4647 {
4648 UWord sm_off16, vabits16;
4649 SecMap* sm;
4650
4651 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4652 PROF_EVENT(MCPE_STOREV64_SLOW1);
4653 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4654 return;
4655 }
4656
4657 sm = get_secmap_for_reading_low(a);
4658 sm_off16 = SM_OFF_16(a);
4659 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4660
4661 // To understand the below cleverness, see the extensive comments
4662 // in MC_(helperc_STOREV8).
4663 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4664 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4665 return;
4666 }
4667 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4668 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4669 return;
4670 }
4671 PROF_EVENT(MCPE_STOREV64_SLOW2);
4672 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4673 return;
4674 }
4675 if (V_BITS64_UNDEFINED == vbits64) {
4676 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4677 return;
4678 }
4679 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4680 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4681 return;
4682 }
4683 PROF_EVENT(MCPE_STOREV64_SLOW3);
4684 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4685 return;
4686 }
4687
4688 PROF_EVENT(MCPE_STOREV64_SLOW4);
4689 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4690 }
4691 #endif
4692 }
4693
MC_(helperc_STOREV64be)4694 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4695 {
4696 mc_STOREV64(a, vbits64, True);
4697 }
MC_(helperc_STOREV64le)4698 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4699 {
4700 mc_STOREV64(a, vbits64, False);
4701 }
4702
4703 /*------------------------------------------------------------*/
4704 /*--- LOADV32 ---*/
4705 /*------------------------------------------------------------*/
4706
4707 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4708 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4709 {
4710 PROF_EVENT(MCPE_LOADV32);
4711
4712 #ifndef PERF_FAST_LOADV
4713 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4714 #else
4715 {
4716 UWord sm_off, vabits8;
4717 SecMap* sm;
4718
4719 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4720 PROF_EVENT(MCPE_LOADV32_SLOW1);
4721 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4722 }
4723
4724 sm = get_secmap_for_reading_low(a);
4725 sm_off = SM_OFF(a);
4726 vabits8 = sm->vabits8[sm_off];
4727
4728 // Handle common case quickly: a is suitably aligned, is mapped, and the
4729 // entire word32 it lives in is addressible.
4730 // Convert V bits from compact memory form to expanded register form.
4731 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4732 // Almost certainly not necessary, but be paranoid.
4733 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4734 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4735 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4736 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4737 } else {
4738 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4739 PROF_EVENT(MCPE_LOADV32_SLOW2);
4740 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4741 }
4742 }
4743 #endif
4744 }
4745
4746 // Generic for all platforms
MC_(helperc_LOADV32be)4747 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4748 {
4749 return mc_LOADV32(a, True);
4750 }
4751
4752 // Non-generic assembly for arm32-linux
4753 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4754 && defined(VGP_arm_linux)
4755 __asm__( /* Derived from NCode template */
4756 ".text \n"
4757 ".align 2 \n"
4758 ".global vgMemCheck_helperc_LOADV32le \n"
4759 ".type vgMemCheck_helperc_LOADV32le, %function \n"
4760 "vgMemCheck_helperc_LOADV32le: \n"
4761 " tst r0, #3 \n" // 1
4762 " movw r3, #:lower16:primary_map \n" // 1
4763 " bne .LLV32LEc4 \n" // 2 if misaligned
4764 " lsr r2, r0, #16 \n" // 3
4765 " movt r3, #:upper16:primary_map \n" // 3
4766 " ldr r2, [r3, r2, lsl #2] \n" // 4
4767 " uxth r1, r0 \n" // 4
4768 " ldrb r1, [r2, r1, lsr #2] \n" // 5
4769 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
4770 " bne .LLV32LEc0 \n" // 7 if !all_defined
4771 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
4772 " bx lr \n" // 9
4773 ".LLV32LEc0: \n"
4774 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
4775 " bne .LLV32LEc4 \n" // if !all_undefined
4776 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4777 " bx lr \n"
4778 ".LLV32LEc4: \n"
4779 " push {r4, lr} \n"
4780 " mov r2, #0 \n"
4781 " mov r1, #32 \n"
4782 " bl mc_LOADVn_slow \n"
4783 " pop {r4, pc} \n"
4784 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
4785 ".previous\n"
4786 );
4787
4788 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4789 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4790 __asm__(
4791 ".text\n"
4792 ".align 16\n"
4793 ".global vgMemCheck_helperc_LOADV32le\n"
4794 ".type vgMemCheck_helperc_LOADV32le, @function\n"
4795 "vgMemCheck_helperc_LOADV32le:\n"
4796 " test $0x3, %eax\n"
4797 " jnz .LLV32LE2\n" /* jump if misaligned */
4798 " mov %eax, %edx\n"
4799 " shr $16, %edx\n"
4800 " mov primary_map(,%edx,4), %ecx\n"
4801 " movzwl %ax, %edx\n"
4802 " shr $2, %edx\n"
4803 " movzbl (%ecx,%edx,1), %edx\n"
4804 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
4805 " jne .LLV32LE1\n" /* jump if not completely defined */
4806 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
4807 " ret\n"
4808 ".LLV32LE1:\n"
4809 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
4810 " jne .LLV32LE2\n" /* jump if not completely undefined */
4811 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
4812 " ret\n"
4813 ".LLV32LE2:\n"
4814 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
4815 " mov $32, %edx\n"
4816 " jmp mc_LOADVn_slow\n"
4817 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
4818 ".previous\n"
4819 );
4820
4821 #else
4822 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV32le)4823 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4824 {
4825 return mc_LOADV32(a, False);
4826 }
4827 #endif
4828
4829 /*------------------------------------------------------------*/
4830 /*--- STOREV32 ---*/
4831 /*------------------------------------------------------------*/
4832
4833 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4834 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4835 {
4836 PROF_EVENT(MCPE_STOREV32);
4837
4838 #ifndef PERF_FAST_STOREV
4839 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4840 #else
4841 {
4842 UWord sm_off, vabits8;
4843 SecMap* sm;
4844
4845 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4846 PROF_EVENT(MCPE_STOREV32_SLOW1);
4847 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4848 return;
4849 }
4850
4851 sm = get_secmap_for_reading_low(a);
4852 sm_off = SM_OFF(a);
4853 vabits8 = sm->vabits8[sm_off];
4854
4855 // To understand the below cleverness, see the extensive comments
4856 // in MC_(helperc_STOREV8).
4857 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4858 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4859 return;
4860 }
4861 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4862 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4863 return;
4864 }
4865 PROF_EVENT(MCPE_STOREV32_SLOW2);
4866 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4867 return;
4868 }
4869 if (V_BITS32_UNDEFINED == vbits32) {
4870 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4871 return;
4872 }
4873 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4874 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4875 return;
4876 }
4877 PROF_EVENT(MCPE_STOREV32_SLOW3);
4878 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4879 return;
4880 }
4881
4882 PROF_EVENT(MCPE_STOREV32_SLOW4);
4883 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4884 }
4885 #endif
4886 }
4887
MC_(helperc_STOREV32be)4888 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4889 {
4890 mc_STOREV32(a, vbits32, True);
4891 }
MC_(helperc_STOREV32le)4892 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4893 {
4894 mc_STOREV32(a, vbits32, False);
4895 }
4896
4897 /*------------------------------------------------------------*/
4898 /*--- LOADV16 ---*/
4899 /*------------------------------------------------------------*/
4900
4901 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4902 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4903 {
4904 PROF_EVENT(MCPE_LOADV16);
4905
4906 #ifndef PERF_FAST_LOADV
4907 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4908 #else
4909 {
4910 UWord sm_off, vabits8;
4911 SecMap* sm;
4912
4913 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4914 PROF_EVENT(MCPE_LOADV16_SLOW1);
4915 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4916 }
4917
4918 sm = get_secmap_for_reading_low(a);
4919 sm_off = SM_OFF(a);
4920 vabits8 = sm->vabits8[sm_off];
4921 // Handle common case quickly: a is suitably aligned, is mapped, and is
4922 // addressible.
4923 // Convert V bits from compact memory form to expanded register form
4924 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
4925 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4926 else {
4927 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4928 // the two sub-bytes.
4929 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4930 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4931 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4932 else {
4933 /* Slow case: the two bytes are not all-defined or all-undefined. */
4934 PROF_EVENT(MCPE_LOADV16_SLOW2);
4935 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4936 }
4937 }
4938 }
4939 #endif
4940 }
4941
4942 // Generic for all platforms
MC_(helperc_LOADV16be)4943 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4944 {
4945 return mc_LOADV16(a, True);
4946 }
4947
4948 // Non-generic assembly for arm32-linux
4949 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4950 && defined(VGP_arm_linux)
4951 __asm__( /* Derived from NCode template */
4952 ".text \n"
4953 ".align 2 \n"
4954 ".global vgMemCheck_helperc_LOADV16le \n"
4955 ".type vgMemCheck_helperc_LOADV16le, %function \n"
4956 "vgMemCheck_helperc_LOADV16le: \n" //
4957 " tst r0, #1 \n" //
4958 " bne .LLV16LEc12 \n" // if misaligned
4959 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
4960 " movw r3, #:lower16:primary_map \n" //
4961 " uxth r1, r0 \n" // r1 = sec-map-offB
4962 " movt r3, #:upper16:primary_map \n" //
4963 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
4964 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
4965 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
4966 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
4967 ".LLV16LEh9: \n" //
4968 " mov r0, #0xFFFFFFFF \n" //
4969 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
4970 " bx lr \n" //
4971 ".LLV16LEc0: \n" //
4972 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
4973 " bne .LLV16LEc4 \n" //
4974 ".LLV16LEc2: \n" //
4975 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
4976 " bx lr \n" //
4977 ".LLV16LEc4: \n" //
4978 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
4979 // Extract the relevant 4 bits and inspect.
4980 " and r2, r0, #2 \n" // addr & 2
4981 " add r2, r2, r2 \n" // 2 * (addr & 2)
4982 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
4983 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
4984
4985 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
4986 " beq .LLV16LEh9 \n" //
4987
4988 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
4989 " beq .LLV16LEc2 \n" //
4990
4991 ".LLV16LEc12: \n" //
4992 " push {r4, lr} \n" //
4993 " mov r2, #0 \n" //
4994 " mov r1, #16 \n" //
4995 " bl mc_LOADVn_slow \n" //
4996 " pop {r4, pc} \n" //
4997 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
4998 ".previous\n"
4999 );
5000
5001 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5002 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5003 __asm__(
5004 ".text\n"
5005 ".align 16\n"
5006 ".global vgMemCheck_helperc_LOADV16le\n"
5007 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5008 "vgMemCheck_helperc_LOADV16le:\n"
5009 " test $0x1, %eax\n"
5010 " jne .LLV16LE5\n" /* jump if not aligned */
5011 " mov %eax, %edx\n"
5012 " shr $0x10, %edx\n"
5013 " mov primary_map(,%edx,4), %ecx\n"
5014 " movzwl %ax, %edx\n"
5015 " shr $0x2, %edx\n"
5016 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5017 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5018 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5019 ".LLV16LE1:\n"
5020 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5021 " ret\n"
5022 ".LLV16LE2:\n"
5023 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5024 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5025 ".LLV16LE3:\n"
5026 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5027 " ret\n"
5028 ".LLV16LE4:\n"
5029 " mov %eax, %ecx\n"
5030 " and $0x2, %ecx\n"
5031 " add %ecx, %ecx\n"
5032 " sar %cl, %edx\n"
5033 " and $0xf, %edx\n"
5034 " cmp $0xa, %edx\n"
5035 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5036 " cmp $0x5, %edx\n"
5037 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5038 ".LLV16LE5:\n"
5039 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5040 " mov $16, %edx\n"
5041 " jmp mc_LOADVn_slow\n"
5042 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5043 ".previous\n"
5044 );
5045
5046 #else
5047 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV16le)5048 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5049 {
5050 return mc_LOADV16(a, False);
5051 }
5052 #endif
5053
5054 /*------------------------------------------------------------*/
5055 /*--- STOREV16 ---*/
5056 /*------------------------------------------------------------*/
5057
5058 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5059 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)5060 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5061 {
5062 UInt shift;
5063 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5064 shift = (a & 2) << 1; // shift by 0 or 4
5065 vabits8 >>= shift; // shift the four bits to the bottom
5066 // check 2 x vabits2 != VA_BITS2_NOACCESS
5067 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5068 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5069 }
5070
5071 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)5072 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5073 {
5074 PROF_EVENT(MCPE_STOREV16);
5075
5076 #ifndef PERF_FAST_STOREV
5077 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5078 #else
5079 {
5080 UWord sm_off, vabits8;
5081 SecMap* sm;
5082
5083 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5084 PROF_EVENT(MCPE_STOREV16_SLOW1);
5085 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5086 return;
5087 }
5088
5089 sm = get_secmap_for_reading_low(a);
5090 sm_off = SM_OFF(a);
5091 vabits8 = sm->vabits8[sm_off];
5092
5093 // To understand the below cleverness, see the extensive comments
5094 // in MC_(helperc_STOREV8).
5095 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5096 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5097 return;
5098 }
5099 if (!is_distinguished_sm(sm)
5100 && accessible_vabits4_in_vabits8(a, vabits8)) {
5101 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5102 &(sm->vabits8[sm_off]) );
5103 return;
5104 }
5105 PROF_EVENT(MCPE_STOREV16_SLOW2);
5106 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5107 }
5108 if (V_BITS16_UNDEFINED == vbits16) {
5109 if (vabits8 == VA_BITS8_UNDEFINED) {
5110 return;
5111 }
5112 if (!is_distinguished_sm(sm)
5113 && accessible_vabits4_in_vabits8(a, vabits8)) {
5114 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5115 &(sm->vabits8[sm_off]) );
5116 return;
5117 }
5118 PROF_EVENT(MCPE_STOREV16_SLOW3);
5119 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5120 return;
5121 }
5122
5123 PROF_EVENT(MCPE_STOREV16_SLOW4);
5124 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5125 }
5126 #endif
5127 }
5128
5129
MC_(helperc_STOREV16be)5130 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5131 {
5132 mc_STOREV16(a, vbits16, True);
5133 }
MC_(helperc_STOREV16le)5134 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5135 {
5136 mc_STOREV16(a, vbits16, False);
5137 }
5138
5139 /*------------------------------------------------------------*/
5140 /*--- LOADV8 ---*/
5141 /*------------------------------------------------------------*/
5142
5143 /* Note: endianness is irrelevant for size == 1 */
5144
5145 // Non-generic assembly for arm32-linux
5146 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5147 && defined(VGP_arm_linux)
5148 __asm__( /* Derived from NCode template */
5149 ".text \n"
5150 ".align 2 \n"
5151 ".global vgMemCheck_helperc_LOADV8 \n"
5152 ".type vgMemCheck_helperc_LOADV8, %function \n"
5153 "vgMemCheck_helperc_LOADV8: \n" //
5154 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5155 " movw r3, #:lower16:primary_map \n" //
5156 " uxth r1, r0 \n" // r1 = sec-map-offB
5157 " movt r3, #:upper16:primary_map \n" //
5158 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5159 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5160 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5161 " bne .LLV8c0 \n" // no, goto .LLV8c0
5162 ".LLV8h9: \n" //
5163 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5164 " bx lr \n" //
5165 ".LLV8c0: \n" //
5166 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5167 " bne .LLV8c4 \n" //
5168 ".LLV8c2: \n" //
5169 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5170 " bx lr \n" //
5171 ".LLV8c4: \n" //
5172 // r1 holds sec-map-VABITS8
5173 // r0 holds the address. Extract the relevant 2 bits and inspect.
5174 " and r2, r0, #3 \n" // addr & 3
5175 " add r2, r2, r2 \n" // 2 * (addr & 3)
5176 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5177 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5178
5179 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5180 " beq .LLV8h9 \n" //
5181
5182 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5183 " beq .LLV8c2 \n" //
5184
5185 " push {r4, lr} \n" //
5186 " mov r2, #0 \n" //
5187 " mov r1, #8 \n" //
5188 " bl mc_LOADVn_slow \n" //
5189 " pop {r4, pc} \n" //
5190 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5191 ".previous\n"
5192 );
5193
5194 /* Non-generic assembly for x86-linux */
5195 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5196 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5197 __asm__(
5198 ".text\n"
5199 ".align 16\n"
5200 ".global vgMemCheck_helperc_LOADV8\n"
5201 ".type vgMemCheck_helperc_LOADV8, @function\n"
5202 "vgMemCheck_helperc_LOADV8:\n"
5203 " mov %eax, %edx\n"
5204 " shr $0x10, %edx\n"
5205 " mov primary_map(,%edx,4), %ecx\n"
5206 " movzwl %ax, %edx\n"
5207 " shr $0x2, %edx\n"
5208 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5209 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5210 " jne .LLV8LE2\n" /* jump if not defined */
5211 ".LLV8LE1:\n"
5212 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5213 " ret\n"
5214 ".LLV8LE2:\n"
5215 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5216 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5217 ".LLV8LE3:\n"
5218 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5219 " ret\n"
5220 ".LLV8LE4:\n"
5221 " mov %eax, %ecx\n"
5222 " and $0x3, %ecx\n"
5223 " add %ecx, %ecx\n"
5224 " sar %cl, %edx\n"
5225 " and $0x3, %edx\n"
5226 " cmp $0x2, %edx\n"
5227 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5228 " cmp $0x1, %edx\n"
5229 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5230 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5231 " mov $0x8, %edx\n"
5232 " jmp mc_LOADVn_slow\n"
5233 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5234 ".previous\n"
5235 );
5236
5237 #else
5238 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5239 VG_REGPARM(1)
MC_(helperc_LOADV8)5240 UWord MC_(helperc_LOADV8) ( Addr a )
5241 {
5242 PROF_EVENT(MCPE_LOADV8);
5243
5244 #ifndef PERF_FAST_LOADV
5245 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5246 #else
5247 {
5248 UWord sm_off, vabits8;
5249 SecMap* sm;
5250
5251 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5252 PROF_EVENT(MCPE_LOADV8_SLOW1);
5253 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5254 }
5255
5256 sm = get_secmap_for_reading_low(a);
5257 sm_off = SM_OFF(a);
5258 vabits8 = sm->vabits8[sm_off];
5259 // Convert V bits from compact memory form to expanded register form
5260 // Handle common case quickly: a is mapped, and the entire
5261 // word32 it lives in is addressible.
5262 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5263 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5264 else {
5265 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5266 // the single byte.
5267 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5268 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5269 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5270 else {
5271 /* Slow case: the byte is not all-defined or all-undefined. */
5272 PROF_EVENT(MCPE_LOADV8_SLOW2);
5273 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5274 }
5275 }
5276 }
5277 #endif
5278 }
5279 #endif
5280
5281 /*------------------------------------------------------------*/
5282 /*--- STOREV8 ---*/
5283 /*------------------------------------------------------------*/
5284
5285 VG_REGPARM(2)
MC_(helperc_STOREV8)5286 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5287 {
5288 PROF_EVENT(MCPE_STOREV8);
5289
5290 #ifndef PERF_FAST_STOREV
5291 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5292 #else
5293 {
5294 UWord sm_off, vabits8;
5295 SecMap* sm;
5296
5297 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5298 PROF_EVENT(MCPE_STOREV8_SLOW1);
5299 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5300 return;
5301 }
5302
5303 sm = get_secmap_for_reading_low(a);
5304 sm_off = SM_OFF(a);
5305 vabits8 = sm->vabits8[sm_off];
5306
5307 // Clevernesses to speed up storing V bits.
5308 // The 64/32/16 bit cases also have similar clevernesses, but it
5309 // works a little differently to the code below.
5310 //
5311 // Cleverness 1: sometimes we don't have to write the shadow memory at
5312 // all, if we can tell that what we want to write is the same as what is
5313 // already there. These cases are marked below as "defined on defined" and
5314 // "undefined on undefined".
5315 //
5316 // Cleverness 2:
5317 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5318 // be written in the secondary map. V bits can be directly written
5319 // if 4 conditions are respected:
5320 // * The address for which V bits are written is naturally aligned
5321 // on 1 byte for STOREV8 (this is always true)
5322 // on 2 bytes for STOREV16
5323 // on 4 bytes for STOREV32
5324 // on 8 bytes for STOREV64.
5325 // * V bits being written are either fully defined or fully undefined.
5326 // (for partially defined V bits, V bits cannot be directly written,
5327 // as the secondary vbits table must be maintained).
5328 // * the secmap is not distinguished (distinguished maps cannot be
5329 // modified).
5330 // * the memory corresponding to the V bits being written is
5331 // accessible (if one or more bytes are not accessible,
5332 // we must call mc_STOREVn_slow in order to report accessibility
5333 // errors).
5334 // Note that for STOREV32 and STOREV64, it is too expensive
5335 // to verify the accessibility of each byte for the benefit it
5336 // brings. Instead, a quicker check is done by comparing to
5337 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5338 // but misses some opportunity of direct modifications.
5339 // Checking each byte accessibility was measured for
5340 // STOREV32+perf tests and was slowing down all perf tests.
5341 // The cases corresponding to cleverness 2 are marked below as
5342 // "direct mod".
5343 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5344 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5345 return; // defined on defined
5346 }
5347 if (!is_distinguished_sm(sm)
5348 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5349 // direct mod
5350 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5351 &(sm->vabits8[sm_off]) );
5352 return;
5353 }
5354 PROF_EVENT(MCPE_STOREV8_SLOW2);
5355 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5356 return;
5357 }
5358 if (V_BITS8_UNDEFINED == vbits8) {
5359 if (vabits8 == VA_BITS8_UNDEFINED) {
5360 return; // undefined on undefined
5361 }
5362 if (!is_distinguished_sm(sm)
5363 && (VA_BITS2_NOACCESS
5364 != extract_vabits2_from_vabits8(a, vabits8))) {
5365 // direct mod
5366 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5367 &(sm->vabits8[sm_off]) );
5368 return;
5369 }
5370 PROF_EVENT(MCPE_STOREV8_SLOW3);
5371 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5372 return;
5373 }
5374
5375 // Partially defined word
5376 PROF_EVENT(MCPE_STOREV8_SLOW4);
5377 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5378 }
5379 #endif
5380 }
5381
5382
5383 /*------------------------------------------------------------*/
5384 /*--- Functions called directly from generated code: ---*/
5385 /*--- Value-check failure handlers. ---*/
5386 /*------------------------------------------------------------*/
5387
5388 /* Call these ones when an origin is available ... */
5389 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)5390 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5391 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5392 }
5393
5394 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)5395 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5396 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5397 }
5398
5399 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)5400 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5401 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5402 }
5403
5404 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)5405 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5406 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5407 }
5408
5409 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)5410 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5411 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5412 }
5413
5414 /* ... and these when an origin isn't available. */
5415
5416 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)5417 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5418 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5419 }
5420
5421 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)5422 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5423 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5424 }
5425
5426 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)5427 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5428 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5429 }
5430
5431 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)5432 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5433 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5434 }
5435
5436 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)5437 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5438 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5439 }
5440
5441
5442 /*------------------------------------------------------------*/
5443 /*--- Metadata get/set functions, for client requests. ---*/
5444 /*------------------------------------------------------------*/
5445
5446 // Nb: this expands the V+A bits out into register-form V bits, even though
5447 // they're in memory. This is for backward compatibility, and because it's
5448 // probably what the user wants.
5449
5450 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5451 error [no longer used], 3 == addressing error. */
5452 /* Nb: We used to issue various definedness/addressability errors from here,
5453 but we took them out because they ranged from not-very-helpful to
5454 downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)5455 static Int mc_get_or_set_vbits_for_client (
5456 Addr a,
5457 Addr vbits,
5458 SizeT szB,
5459 Bool setting, /* True <=> set vbits, False <=> get vbits */
5460 Bool is_client_request /* True <=> real user request
5461 False <=> internal call from gdbserver */
5462 )
5463 {
5464 SizeT i;
5465 Bool ok;
5466 UChar vbits8;
5467
5468 /* Check that arrays are addressible before doing any getting/setting.
5469 vbits to be checked only for real user request. */
5470 for (i = 0; i < szB; i++) {
5471 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5472 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5473 return 3;
5474 }
5475 }
5476
5477 /* Do the copy */
5478 if (setting) {
5479 /* setting */
5480 for (i = 0; i < szB; i++) {
5481 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5482 tl_assert(ok);
5483 }
5484 } else {
5485 /* getting */
5486 for (i = 0; i < szB; i++) {
5487 ok = get_vbits8(a + i, &vbits8);
5488 tl_assert(ok);
5489 ((UChar*)vbits)[i] = vbits8;
5490 }
5491 if (is_client_request)
5492 // The bytes in vbits[] have now been set, so mark them as such.
5493 MC_(make_mem_defined)(vbits, szB);
5494 }
5495
5496 return 1;
5497 }
5498
5499
5500 /*------------------------------------------------------------*/
5501 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5502 /*------------------------------------------------------------*/
5503
5504 /* For the memory leak detector, say whether an entire 64k chunk of
5505 address space is possibly in use, or not. If in doubt return
5506 True.
5507 */
MC_(is_within_valid_secondary)5508 Bool MC_(is_within_valid_secondary) ( Addr a )
5509 {
5510 SecMap* sm = maybe_get_secmap_for ( a );
5511 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5512 /* Definitely not in use. */
5513 return False;
5514 } else {
5515 return True;
5516 }
5517 }
5518
5519
5520 /* For the memory leak detector, say whether or not a given word
5521 address is to be regarded as valid. */
MC_(is_valid_aligned_word)5522 Bool MC_(is_valid_aligned_word) ( Addr a )
5523 {
5524 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5525 tl_assert(VG_IS_WORD_ALIGNED(a));
5526 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5527 return False;
5528 if (sizeof(UWord) == 8) {
5529 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5530 return False;
5531 }
5532 if (UNLIKELY(MC_(in_ignored_range)(a)))
5533 return False;
5534 else
5535 return True;
5536 }
5537
5538
5539 /*------------------------------------------------------------*/
5540 /*--- Initialisation ---*/
5541 /*------------------------------------------------------------*/
5542
init_shadow_memory(void)5543 static void init_shadow_memory ( void )
5544 {
5545 Int i;
5546 SecMap* sm;
5547
5548 tl_assert(V_BIT_UNDEFINED == 1);
5549 tl_assert(V_BIT_DEFINED == 0);
5550 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5551 tl_assert(V_BITS8_DEFINED == 0);
5552
5553 /* Build the 3 distinguished secondaries */
5554 sm = &sm_distinguished[SM_DIST_NOACCESS];
5555 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5556
5557 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5558 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5559
5560 sm = &sm_distinguished[SM_DIST_DEFINED];
5561 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5562
5563 /* Set up the primary map. */
5564 /* These entries gradually get overwritten as the used address
5565 space expands. */
5566 for (i = 0; i < N_PRIMARY_MAP; i++)
5567 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5568
5569 /* Auxiliary primary maps */
5570 init_auxmap_L1_L2();
5571
5572 /* auxmap_size = auxmap_used = 0;
5573 no ... these are statically initialised */
5574
5575 /* Secondary V bit table */
5576 secVBitTable = createSecVBitTable();
5577 }
5578
5579
5580 /*------------------------------------------------------------*/
5581 /*--- Sanity check machinery (permanently engaged) ---*/
5582 /*------------------------------------------------------------*/
5583
mc_cheap_sanity_check(void)5584 static Bool mc_cheap_sanity_check ( void )
5585 {
5586 n_sanity_cheap++;
5587 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5588 /* Check for sane operating level */
5589 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5590 return False;
5591 /* nothing else useful we can rapidly check */
5592 return True;
5593 }
5594
mc_expensive_sanity_check(void)5595 static Bool mc_expensive_sanity_check ( void )
5596 {
5597 Int i;
5598 Word n_secmaps_found;
5599 SecMap* sm;
5600 const HChar* errmsg;
5601 Bool bad = False;
5602
5603 if (0) VG_(printf)("expensive sanity check\n");
5604 if (0) return True;
5605
5606 n_sanity_expensive++;
5607 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5608
5609 /* Check for sane operating level */
5610 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5611 return False;
5612
5613 /* Check that the 3 distinguished SMs are still as they should be. */
5614
5615 /* Check noaccess DSM. */
5616 sm = &sm_distinguished[SM_DIST_NOACCESS];
5617 for (i = 0; i < SM_CHUNKS; i++)
5618 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5619 bad = True;
5620
5621 /* Check undefined DSM. */
5622 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5623 for (i = 0; i < SM_CHUNKS; i++)
5624 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5625 bad = True;
5626
5627 /* Check defined DSM. */
5628 sm = &sm_distinguished[SM_DIST_DEFINED];
5629 for (i = 0; i < SM_CHUNKS; i++)
5630 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5631 bad = True;
5632
5633 if (bad) {
5634 VG_(printf)("memcheck expensive sanity: "
5635 "distinguished_secondaries have changed\n");
5636 return False;
5637 }
5638
5639 /* If we're not checking for undefined value errors, the secondary V bit
5640 * table should be empty. */
5641 if (MC_(clo_mc_level) == 1) {
5642 if (0 != VG_(OSetGen_Size)(secVBitTable))
5643 return False;
5644 }
5645
5646 /* check the auxiliary maps, very thoroughly */
5647 n_secmaps_found = 0;
5648 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5649 if (errmsg) {
5650 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5651 return False;
5652 }
5653
5654 /* n_secmaps_found is now the number referred to by the auxiliary
5655 primary map. Now add on the ones referred to by the main
5656 primary map. */
5657 for (i = 0; i < N_PRIMARY_MAP; i++) {
5658 if (primary_map[i] == NULL) {
5659 bad = True;
5660 } else {
5661 if (!is_distinguished_sm(primary_map[i]))
5662 n_secmaps_found++;
5663 }
5664 }
5665
5666 /* check that the number of secmaps issued matches the number that
5667 are reachable (iow, no secmap leaks) */
5668 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5669 bad = True;
5670
5671 if (bad) {
5672 VG_(printf)("memcheck expensive sanity: "
5673 "apparent secmap leakage\n");
5674 return False;
5675 }
5676
5677 if (bad) {
5678 VG_(printf)("memcheck expensive sanity: "
5679 "auxmap covers wrong address space\n");
5680 return False;
5681 }
5682
5683 /* there is only one pointer to each secmap (expensive) */
5684
5685 return True;
5686 }
5687
5688 /*------------------------------------------------------------*/
5689 /*--- Command line args ---*/
5690 /*------------------------------------------------------------*/
5691
5692 /* 31 Aug 2015: Vectorised code is now so widespread that
5693 --partial-loads-ok needs to be enabled by default on all platforms.
5694 Not doing so causes lots of false errors. */
5695 Bool MC_(clo_partial_loads_ok) = True;
5696 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
5697 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
5698 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
5699 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
5700 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
5701 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
5702 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
5703 | H2S( LchLength64)
5704 | H2S( LchNewArray)
5705 | H2S( LchMultipleInheritance);
5706 Bool MC_(clo_workaround_gcc296_bugs) = False;
5707 Int MC_(clo_malloc_fill) = -1;
5708 Int MC_(clo_free_fill) = -1;
5709 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
5710 Int MC_(clo_mc_level) = 2;
5711 Bool MC_(clo_show_mismatched_frees) = True;
5712 Bool MC_(clo_expensive_definedness_checks) = False;
5713
5714 static const HChar * MC_(parse_leak_heuristics_tokens) =
5715 "-,stdstring,length64,newarray,multipleinheritance";
5716 /* The first heuristic value (LchNone) has no keyword, as this is
5717 a fake heuristic used to collect the blocks found without any
5718 heuristic. */
5719
mc_process_cmd_line_options(const HChar * arg)5720 static Bool mc_process_cmd_line_options(const HChar* arg)
5721 {
5722 const HChar* tmp_str;
5723 Int tmp_show;
5724
5725 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5726
5727 /* Set MC_(clo_mc_level):
5728 1 = A bit tracking only
5729 2 = A and V bit tracking, but no V bit origins
5730 3 = A and V bit tracking, and V bit origins
5731
5732 Do this by inspecting --undef-value-errors= and
5733 --track-origins=. Reject the case --undef-value-errors=no
5734 --track-origins=yes as meaningless.
5735 */
5736 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5737 if (MC_(clo_mc_level) == 3) {
5738 goto bad_level;
5739 } else {
5740 MC_(clo_mc_level) = 1;
5741 return True;
5742 }
5743 }
5744 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5745 if (MC_(clo_mc_level) == 1)
5746 MC_(clo_mc_level) = 2;
5747 return True;
5748 }
5749 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5750 if (MC_(clo_mc_level) == 3)
5751 MC_(clo_mc_level) = 2;
5752 return True;
5753 }
5754 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5755 if (MC_(clo_mc_level) == 1) {
5756 goto bad_level;
5757 } else {
5758 MC_(clo_mc_level) = 3;
5759 return True;
5760 }
5761 }
5762
5763 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5764 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5765 MC_(parse_leak_kinds_tokens),
5766 MC_(clo_error_for_leak_kinds)) {}
5767 else if VG_USET_CLO(arg, "--show-leak-kinds",
5768 MC_(parse_leak_kinds_tokens),
5769 MC_(clo_show_leak_kinds)) {}
5770 else if VG_USET_CLO(arg, "--leak-check-heuristics",
5771 MC_(parse_leak_heuristics_tokens),
5772 MC_(clo_leak_check_heuristics)) {}
5773 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5774 if (tmp_show) {
5775 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5776 } else {
5777 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5778 }
5779 }
5780 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5781 if (tmp_show) {
5782 MC_(clo_show_leak_kinds) |= R2S(Possible);
5783 } else {
5784 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5785 }
5786 }
5787 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5788 MC_(clo_workaround_gcc296_bugs)) {}
5789
5790 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
5791 0, 10*1000*1000*1000LL) {}
5792
5793 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5794 MC_(clo_freelist_big_blocks),
5795 0, 10*1000*1000*1000LL) {}
5796
5797 else if VG_XACT_CLO(arg, "--leak-check=no",
5798 MC_(clo_leak_check), LC_Off) {}
5799 else if VG_XACT_CLO(arg, "--leak-check=summary",
5800 MC_(clo_leak_check), LC_Summary) {}
5801 else if VG_XACT_CLO(arg, "--leak-check=yes",
5802 MC_(clo_leak_check), LC_Full) {}
5803 else if VG_XACT_CLO(arg, "--leak-check=full",
5804 MC_(clo_leak_check), LC_Full) {}
5805
5806 else if VG_XACT_CLO(arg, "--leak-resolution=low",
5807 MC_(clo_leak_resolution), Vg_LowRes) {}
5808 else if VG_XACT_CLO(arg, "--leak-resolution=med",
5809 MC_(clo_leak_resolution), Vg_MedRes) {}
5810 else if VG_XACT_CLO(arg, "--leak-resolution=high",
5811 MC_(clo_leak_resolution), Vg_HighRes) {}
5812
5813 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5814 Bool ok = parse_ignore_ranges(tmp_str);
5815 if (!ok) {
5816 VG_(message)(Vg_DebugMsg,
5817 "ERROR: --ignore-ranges: "
5818 "invalid syntax, or end <= start in range\n");
5819 return False;
5820 }
5821 if (gIgnoredAddressRanges) {
5822 UInt i;
5823 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5824 UWord val = IAR_INVALID;
5825 UWord key_min = ~(UWord)0;
5826 UWord key_max = (UWord)0;
5827 VG_(indexRangeMap)( &key_min, &key_max, &val,
5828 gIgnoredAddressRanges, i );
5829 tl_assert(key_min <= key_max);
5830 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5831 if (key_max - key_min > limit && val == IAR_CommandLine) {
5832 VG_(message)(Vg_DebugMsg,
5833 "ERROR: --ignore-ranges: suspiciously large range:\n");
5834 VG_(message)(Vg_DebugMsg,
5835 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
5836 key_max - key_min + 1);
5837 return False;
5838 }
5839 }
5840 }
5841 }
5842
5843 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5844 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
5845
5846 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5847 MC_(clo_keep_stacktraces), KS_alloc) {}
5848 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5849 MC_(clo_keep_stacktraces), KS_free) {}
5850 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5851 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5852 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5853 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5854 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5855 MC_(clo_keep_stacktraces), KS_none) {}
5856
5857 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5858 MC_(clo_show_mismatched_frees)) {}
5859 else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
5860 MC_(clo_expensive_definedness_checks)) {}
5861
5862 else
5863 return VG_(replacement_malloc_process_cmd_line_option)(arg);
5864
5865 return True;
5866
5867
5868 bad_level:
5869 VG_(fmsg_bad_option)(arg,
5870 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5871 }
5872
mc_print_usage(void)5873 static void mc_print_usage(void)
5874 {
5875 VG_(printf)(
5876 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
5877 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
5878 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5879 " [definite,possible]\n"
5880 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
5881 " [definite,possible]\n"
5882 " where kind is one of:\n"
5883 " definite indirect possible reachable all none\n"
5884 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5885 " improving leak search false positive [all]\n"
5886 " where heur is one of:\n"
5887 " stdstring length64 newarray multipleinheritance all none\n"
5888 " --show-reachable=yes same as --show-leak-kinds=all\n"
5889 " --show-reachable=no --show-possibly-lost=yes\n"
5890 " same as --show-leak-kinds=definite,possible\n"
5891 " --show-reachable=no --show-possibly-lost=no\n"
5892 " same as --show-leak-kinds=definite\n"
5893 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
5894 " --track-origins=no|yes show origins of undefined values? [no]\n"
5895 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
5896 " --expensive-definedness-checks=no|yes\n"
5897 " Use extra-precise definedness tracking [no]\n"
5898 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
5899 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
5900 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
5901 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
5902 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
5903 " --free-fill=<hexnumber> fill free'd areas with given value\n"
5904 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5905 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
5906 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
5907 );
5908 }
5909
mc_print_debug_usage(void)5910 static void mc_print_debug_usage(void)
5911 {
5912 VG_(printf)(
5913 " (none)\n"
5914 );
5915 }
5916
5917
5918 /*------------------------------------------------------------*/
5919 /*--- Client blocks ---*/
5920 /*------------------------------------------------------------*/
5921
5922 /* Client block management:
5923
5924 This is managed as an expanding array of client block descriptors.
5925 Indices of live descriptors are issued to the client, so it can ask
5926 to free them later. Therefore we cannot slide live entries down
5927 over dead ones. Instead we must use free/inuse flags and scan for
5928 an empty slot at allocation time. This in turn means allocation is
5929 relatively expensive, so we hope this does not happen too often.
5930
5931 An unused block has start == size == 0
5932 */
5933
5934 /* type CGenBlock is defined in mc_include.h */
5935
5936 /* This subsystem is self-initialising. */
5937 static UWord cgb_size = 0;
5938 static UWord cgb_used = 0;
5939 static CGenBlock* cgbs = NULL;
5940
5941 /* Stats for this subsystem. */
5942 static ULong cgb_used_MAX = 0; /* Max in use. */
5943 static ULong cgb_allocs = 0; /* Number of allocs. */
5944 static ULong cgb_discards = 0; /* Number of discards. */
5945 static ULong cgb_search = 0; /* Number of searches. */
5946
5947
5948 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5949 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5950 /*OUT*/UWord* nBlocks )
5951 {
5952 *blocks = cgbs;
5953 *nBlocks = cgb_used;
5954 }
5955
5956
5957 static
alloc_client_block(void)5958 Int alloc_client_block ( void )
5959 {
5960 UWord i, sz_new;
5961 CGenBlock* cgbs_new;
5962
5963 cgb_allocs++;
5964
5965 for (i = 0; i < cgb_used; i++) {
5966 cgb_search++;
5967 if (cgbs[i].start == 0 && cgbs[i].size == 0)
5968 return i;
5969 }
5970
5971 /* Not found. Try to allocate one at the end. */
5972 if (cgb_used < cgb_size) {
5973 cgb_used++;
5974 return cgb_used-1;
5975 }
5976
5977 /* Ok, we have to allocate a new one. */
5978 tl_assert(cgb_used == cgb_size);
5979 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5980
5981 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5982 for (i = 0; i < cgb_used; i++)
5983 cgbs_new[i] = cgbs[i];
5984
5985 if (cgbs != NULL)
5986 VG_(free)( cgbs );
5987 cgbs = cgbs_new;
5988
5989 cgb_size = sz_new;
5990 cgb_used++;
5991 if (cgb_used > cgb_used_MAX)
5992 cgb_used_MAX = cgb_used;
5993 return cgb_used-1;
5994 }
5995
5996
show_client_block_stats(void)5997 static void show_client_block_stats ( void )
5998 {
5999 VG_(message)(Vg_DebugMsg,
6000 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6001 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6002 );
6003 }
print_monitor_help(void)6004 static void print_monitor_help ( void )
6005 {
6006 VG_(gdb_printf)
6007 (
6008 "\n"
6009 "memcheck monitor commands:\n"
6010 " xb <addr> [<len>]\n"
6011 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6012 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6013 " Then prints the bytes values below the corresponding validity bits\n"
6014 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6015 " Example: xb 0x8049c78 10\n"
6016 " get_vbits <addr> [<len>]\n"
6017 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6018 " make_memory [noaccess|undefined\n"
6019 " |defined|Definedifaddressable] <addr> [<len>]\n"
6020 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6021 " check_memory [addressable|defined] <addr> [<len>]\n"
6022 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6023 " and outputs a description of <addr>\n"
6024 " leak_check [full*|summary]\n"
6025 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6026 " [heuristics heur1,heur2,...]\n"
6027 " [increased*|changed|any]\n"
6028 " [unlimited*|limited <max_loss_records_output>]\n"
6029 " * = defaults\n"
6030 " where kind is one of:\n"
6031 " definite indirect possible reachable all none\n"
6032 " where heur is one of:\n"
6033 " stdstring length64 newarray multipleinheritance all none*\n"
6034 " Examples: leak_check\n"
6035 " leak_check summary any\n"
6036 " leak_check full kinds indirect,possible\n"
6037 " leak_check full reachable any limited 100\n"
6038 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6039 " [unlimited*|limited <max_blocks>]\n"
6040 " [heuristics heur1,heur2,...]\n"
6041 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6042 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6043 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6044 " * = defaults\n"
6045 " who_points_at <addr> [<len>]\n"
6046 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6047 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6048 " with len > 1, will also show \"interior pointers\")\n"
6049 "\n");
6050 }
6051
6052 /* Print szB bytes at address, with a format similar to the gdb command
6053 x /<szB>xb address.
6054 res[i] == 1 indicates the corresponding byte is addressable. */
gdb_xb(Addr address,SizeT szB,Int res[])6055 static void gdb_xb (Addr address, SizeT szB, Int res[])
6056 {
6057 UInt i;
6058
6059 for (i = 0; i < szB; i++) {
6060 UInt bnr = i % 8;
6061 if (bnr == 0) {
6062 if (i != 0)
6063 VG_(printf) ("\n"); // Terminate previous line
6064 VG_(printf) ("%p:", (void*)(address+i));
6065 }
6066 if (res[i] == 1)
6067 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6068 else
6069 VG_(printf) ("\t0x??");
6070 }
6071 VG_(printf) ("\n"); // Terminate previous line
6072 }
6073
6074
6075 /* Returns the address of the next non space character,
6076 or address of the string terminator. */
next_non_space(HChar * s)6077 static HChar* next_non_space (HChar *s)
6078 {
6079 while (*s && *s == ' ')
6080 s++;
6081 return s;
6082 }
6083
6084 /* Parse an integer slice, i.e. a single integer or a range of integer.
6085 Syntax is:
6086 <integer>[..<integer> ]
6087 (spaces are allowed before and/or after ..).
6088 Return True if range correctly parsed, False otherwise. */
VG_(parse_slice)6089 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6090 UInt *from, UInt *to)
6091 {
6092 HChar* wl;
6093 HChar *endptr;
6094 endptr = NULL;////
6095 wl = VG_(strtok_r) (s, " ", saveptr);
6096
6097 /* slice must start with an integer. */
6098 if (wl == NULL) {
6099 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6100 return False;
6101 }
6102 *from = VG_(strtoull10) (wl, &endptr);
6103 if (endptr == wl) {
6104 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6105 return False;
6106 }
6107
6108 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6109 /* wl token is an integer terminating the string
6110 or else next token does not start with .
6111 In both cases, the slice is a single integer. */
6112 *to = *from;
6113 return True;
6114 }
6115
6116 if (*endptr == '\0') {
6117 // iii .. => get the next token
6118 wl = VG_(strtok_r) (NULL, " .", saveptr);
6119 } else {
6120 // It must be iii..
6121 if (*endptr != '.' && *(endptr+1) != '.') {
6122 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6123 return False;
6124 }
6125 if ( *(endptr+2) == ' ') {
6126 // It must be iii.. jjj => get the next token
6127 wl = VG_(strtok_r) (NULL, " .", saveptr);
6128 } else {
6129 // It must be iii..jjj
6130 wl = endptr+2;
6131 }
6132 }
6133
6134 *to = VG_(strtoull10) (wl, &endptr);
6135 if (*endptr != '\0') {
6136 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6137 return False;
6138 }
6139
6140 if (*from > *to) {
6141 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6142 "in slice <from>..<to>\n");
6143 return False;
6144 }
6145
6146 return True;
6147 }
6148
6149 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)6150 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6151 {
6152 HChar* wcmd;
6153 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
6154 HChar *ssaveptr;
6155
6156 VG_(strcpy) (s, req);
6157
6158 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6159 /* NB: if possible, avoid introducing a new command below which
6160 starts with the same first letter(s) as an already existing
6161 command. This ensures a shorter abbreviation for the user. */
6162 switch (VG_(keyword_id)
6163 ("help get_vbits leak_check make_memory check_memory "
6164 "block_list who_points_at xb",
6165 wcmd, kwd_report_duplicated_matches)) {
6166 case -2: /* multiple matches */
6167 return True;
6168 case -1: /* not found */
6169 return False;
6170 case 0: /* help */
6171 print_monitor_help();
6172 return True;
6173 case 1: { /* get_vbits */
6174 Addr address;
6175 SizeT szB = 1;
6176 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6177 UChar vbits;
6178 Int i;
6179 Int unaddressable = 0;
6180 for (i = 0; i < szB; i++) {
6181 Int res = mc_get_or_set_vbits_for_client
6182 (address+i, (Addr) &vbits, 1,
6183 False, /* get them */
6184 False /* is client request */ );
6185 /* we are before the first character on next line, print a \n. */
6186 if ((i % 32) == 0 && i != 0)
6187 VG_(printf) ("\n");
6188 /* we are before the next block of 4 starts, print a space. */
6189 else if ((i % 4) == 0 && i != 0)
6190 VG_(printf) (" ");
6191 if (res == 1) {
6192 VG_(printf) ("%02x", vbits);
6193 } else {
6194 tl_assert(3 == res);
6195 unaddressable++;
6196 VG_(printf) ("__");
6197 }
6198 }
6199 VG_(printf) ("\n");
6200 if (unaddressable) {
6201 VG_(printf)
6202 ("Address %p len %lu has %d bytes unaddressable\n",
6203 (void *)address, szB, unaddressable);
6204 }
6205 }
6206 return True;
6207 }
6208 case 2: { /* leak_check */
6209 Int err = 0;
6210 LeakCheckParams lcp;
6211 HChar* kw;
6212
6213 lcp.mode = LC_Full;
6214 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6215 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6216 lcp.heuristics = 0;
6217 lcp.deltamode = LCD_Increased;
6218 lcp.max_loss_records_output = 999999999;
6219 lcp.requested_by_monitor_command = True;
6220
6221 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6222 kw != NULL;
6223 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6224 switch (VG_(keyword_id)
6225 ("full summary "
6226 "kinds reachable possibleleak definiteleak "
6227 "heuristics "
6228 "increased changed any "
6229 "unlimited limited ",
6230 kw, kwd_report_all)) {
6231 case -2: err++; break;
6232 case -1: err++; break;
6233 case 0: /* full */
6234 lcp.mode = LC_Full; break;
6235 case 1: /* summary */
6236 lcp.mode = LC_Summary; break;
6237 case 2: { /* kinds */
6238 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6239 if (wcmd == NULL
6240 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6241 True/*allow_all*/,
6242 wcmd,
6243 &lcp.show_leak_kinds)) {
6244 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6245 err++;
6246 }
6247 break;
6248 }
6249 case 3: /* reachable */
6250 lcp.show_leak_kinds = MC_(all_Reachedness)();
6251 break;
6252 case 4: /* possibleleak */
6253 lcp.show_leak_kinds
6254 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6255 break;
6256 case 5: /* definiteleak */
6257 lcp.show_leak_kinds = R2S(Unreached);
6258 break;
6259 case 6: { /* heuristics */
6260 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6261 if (wcmd == NULL
6262 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6263 True,/*allow_all*/
6264 wcmd,
6265 &lcp.heuristics)) {
6266 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6267 err++;
6268 }
6269 break;
6270 }
6271 case 7: /* increased */
6272 lcp.deltamode = LCD_Increased; break;
6273 case 8: /* changed */
6274 lcp.deltamode = LCD_Changed; break;
6275 case 9: /* any */
6276 lcp.deltamode = LCD_Any; break;
6277 case 10: /* unlimited */
6278 lcp.max_loss_records_output = 999999999; break;
6279 case 11: { /* limited */
6280 Int int_value;
6281 const HChar* endptr;
6282
6283 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6284 if (wcmd == NULL) {
6285 int_value = 0;
6286 endptr = "empty"; /* to report an error below */
6287 } else {
6288 HChar *the_end;
6289 int_value = VG_(strtoll10) (wcmd, &the_end);
6290 endptr = the_end;
6291 }
6292 if (*endptr != '\0')
6293 VG_(gdb_printf) ("missing or malformed integer value\n");
6294 else if (int_value > 0)
6295 lcp.max_loss_records_output = (UInt) int_value;
6296 else
6297 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6298 " got %d\n", int_value);
6299 break;
6300 }
6301 default:
6302 tl_assert (0);
6303 }
6304 }
6305 if (!err)
6306 MC_(detect_memory_leaks)(tid, &lcp);
6307 return True;
6308 }
6309
6310 case 3: { /* make_memory */
6311 Addr address;
6312 SizeT szB = 1;
6313 Int kwdid = VG_(keyword_id)
6314 ("noaccess undefined defined Definedifaddressable",
6315 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6316 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6317 return True;
6318 switch (kwdid) {
6319 case -2: break;
6320 case -1: break;
6321 case 0: MC_(make_mem_noaccess) (address, szB); break;
6322 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6323 MC_OKIND_USER ); break;
6324 case 2: MC_(make_mem_defined) ( address, szB ); break;
6325 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6326 default: tl_assert(0);
6327 }
6328 return True;
6329 }
6330
6331 case 4: { /* check_memory */
6332 Addr address;
6333 SizeT szB = 1;
6334 Addr bad_addr;
6335 UInt okind;
6336 const HChar* src;
6337 UInt otag;
6338 UInt ecu;
6339 ExeContext* origin_ec;
6340 MC_ReadResult res;
6341
6342 Int kwdid = VG_(keyword_id)
6343 ("addressable defined",
6344 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6345 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6346 return True;
6347 switch (kwdid) {
6348 case -2: break;
6349 case -1: break;
6350 case 0: /* addressable */
6351 if (is_mem_addressable ( address, szB, &bad_addr ))
6352 VG_(printf) ("Address %p len %lu addressable\n",
6353 (void *)address, szB);
6354 else
6355 VG_(printf)
6356 ("Address %p len %lu not addressable:\nbad address %p\n",
6357 (void *)address, szB, (void *) bad_addr);
6358 MC_(pp_describe_addr) (address);
6359 break;
6360 case 1: /* defined */
6361 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6362 if (MC_AddrErr == res)
6363 VG_(printf)
6364 ("Address %p len %lu not addressable:\nbad address %p\n",
6365 (void *)address, szB, (void *) bad_addr);
6366 else if (MC_ValueErr == res) {
6367 okind = otag & 3;
6368 switch (okind) {
6369 case MC_OKIND_STACK:
6370 src = " was created by a stack allocation"; break;
6371 case MC_OKIND_HEAP:
6372 src = " was created by a heap allocation"; break;
6373 case MC_OKIND_USER:
6374 src = " was created by a client request"; break;
6375 case MC_OKIND_UNKNOWN:
6376 src = ""; break;
6377 default: tl_assert(0);
6378 }
6379 VG_(printf)
6380 ("Address %p len %lu not defined:\n"
6381 "Uninitialised value at %p%s\n",
6382 (void *)address, szB, (void *) bad_addr, src);
6383 ecu = otag & ~3;
6384 if (VG_(is_plausible_ECU)(ecu)) {
6385 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6386 VG_(pp_ExeContext)( origin_ec );
6387 }
6388 }
6389 else
6390 VG_(printf) ("Address %p len %lu defined\n",
6391 (void *)address, szB);
6392 MC_(pp_describe_addr) (address);
6393 break;
6394 default: tl_assert(0);
6395 }
6396 return True;
6397 }
6398
6399 case 5: { /* block_list */
6400 HChar* wl;
6401 HChar *the_end;
6402 UInt lr_nr_from = 0;
6403 UInt lr_nr_to = 0;
6404
6405 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6406 UInt limit_blocks = 999999999;
6407 Int int_value;
6408 UInt heuristics = 0;
6409
6410 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6411 wl != NULL;
6412 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6413 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6414 wl, kwd_report_all)) {
6415 case -2: return True;
6416 case -1: return True;
6417 case 0: /* unlimited */
6418 limit_blocks = 999999999; break;
6419 case 1: /* limited */
6420 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6421 if (wcmd == NULL) {
6422 VG_(gdb_printf) ("missing integer value\n");
6423 return True;
6424 }
6425 int_value = VG_(strtoll10) (wcmd, &the_end);
6426 if (*the_end != '\0') {
6427 VG_(gdb_printf) ("malformed integer value\n");
6428 return True;
6429 }
6430 if (int_value <= 0) {
6431 VG_(gdb_printf) ("max_blocks must be >= 1,"
6432 " got %d\n", int_value);
6433 return True;
6434 }
6435 limit_blocks = (UInt) int_value;
6436 break;
6437 case 2: /* heuristics */
6438 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6439 if (wcmd == NULL
6440 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6441 True,/*allow_all*/
6442 wcmd,
6443 &heuristics)) {
6444 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6445 return True;
6446 }
6447 break;
6448 default:
6449 tl_assert (0);
6450 }
6451 }
6452 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6453 is 1 more than the index in lr_array. */
6454 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6455 lr_nr_to-1,
6456 limit_blocks,
6457 heuristics))
6458 VG_(gdb_printf) ("invalid loss record nr\n");
6459 }
6460 return True;
6461 }
6462
6463 case 6: { /* who_points_at */
6464 Addr address;
6465 SizeT szB = 1;
6466
6467 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6468 return True;
6469 if (address == (Addr) 0) {
6470 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6471 return True;
6472 }
6473 MC_(who_points_at) (address, szB);
6474 return True;
6475 }
6476
6477 case 7: { /* xb */
6478 Addr address;
6479 SizeT szB = 1;
6480 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6481 UChar vbits[8];
6482 Int res[8];
6483 Int i;
6484 Int unaddressable = 0;
6485 for (i = 0; i < szB; i++) {
6486 Int bnr = i % 8;
6487 res[bnr] = mc_get_or_set_vbits_for_client
6488 (address+i, (Addr) &vbits[bnr], 1,
6489 False, /* get them */
6490 False /* is client request */ );
6491 /* We going to print the first vabits of a new line.
6492 Terminate the previous line if needed: prints a line with the
6493 address and the data. */
6494 if (bnr == 0) {
6495 if (i != 0) {
6496 VG_(printf) ("\n");
6497 gdb_xb (address + i - 8, 8, res);
6498 }
6499 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6500 }
6501 if (res[bnr] == 1) {
6502 VG_(printf) ("\t %02x", vbits[bnr]);
6503 } else {
6504 tl_assert(3 == res[bnr]);
6505 unaddressable++;
6506 VG_(printf) ("\t __");
6507 }
6508 }
6509 VG_(printf) ("\n");
6510 if (szB % 8 == 0 && szB > 0)
6511 gdb_xb (address + szB - 8, 8, res);
6512 else
6513 gdb_xb (address + szB - szB % 8, szB % 8, res);
6514 if (unaddressable) {
6515 VG_(printf)
6516 ("Address %p len %lu has %d bytes unaddressable\n",
6517 (void *)address, szB, unaddressable);
6518 }
6519 }
6520 return True;
6521 }
6522
6523 default:
6524 tl_assert(0);
6525 return False;
6526 }
6527 }
6528
6529 /*------------------------------------------------------------*/
6530 /*--- Client requests ---*/
6531 /*------------------------------------------------------------*/
6532
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)6533 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6534 {
6535 Int i;
6536 Addr bad_addr;
6537
6538 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6539 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6540 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6541 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6542 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6543 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6544 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6545 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6546 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6547 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6548 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6549 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6550 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6551 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6552 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6553 return False;
6554
6555 switch (arg[0]) {
6556 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6557 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6558 if (!ok)
6559 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6560 *ret = ok ? (UWord)NULL : bad_addr;
6561 break;
6562 }
6563
6564 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6565 Bool errorV = False;
6566 Addr bad_addrV = 0;
6567 UInt otagV = 0;
6568 Bool errorA = False;
6569 Addr bad_addrA = 0;
6570 is_mem_defined_comprehensive(
6571 arg[1], arg[2],
6572 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6573 );
6574 if (errorV) {
6575 MC_(record_user_error) ( tid, bad_addrV,
6576 /*isAddrErr*/False, otagV );
6577 }
6578 if (errorA) {
6579 MC_(record_user_error) ( tid, bad_addrA,
6580 /*isAddrErr*/True, 0 );
6581 }
6582 /* Return the lower of the two erring addresses, if any. */
6583 *ret = 0;
6584 if (errorV && !errorA) {
6585 *ret = bad_addrV;
6586 }
6587 if (!errorV && errorA) {
6588 *ret = bad_addrA;
6589 }
6590 if (errorV && errorA) {
6591 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6592 }
6593 break;
6594 }
6595
6596 case VG_USERREQ__DO_LEAK_CHECK: {
6597 LeakCheckParams lcp;
6598
6599 if (arg[1] == 0)
6600 lcp.mode = LC_Full;
6601 else if (arg[1] == 1)
6602 lcp.mode = LC_Summary;
6603 else {
6604 VG_(message)(Vg_UserMsg,
6605 "Warning: unknown memcheck leak search mode\n");
6606 lcp.mode = LC_Full;
6607 }
6608
6609 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6610 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6611 lcp.heuristics = MC_(clo_leak_check_heuristics);
6612
6613 if (arg[2] == 0)
6614 lcp.deltamode = LCD_Any;
6615 else if (arg[2] == 1)
6616 lcp.deltamode = LCD_Increased;
6617 else if (arg[2] == 2)
6618 lcp.deltamode = LCD_Changed;
6619 else {
6620 VG_(message)
6621 (Vg_UserMsg,
6622 "Warning: unknown memcheck leak search deltamode\n");
6623 lcp.deltamode = LCD_Any;
6624 }
6625 lcp.max_loss_records_output = 999999999;
6626 lcp.requested_by_monitor_command = False;
6627
6628 MC_(detect_memory_leaks)(tid, &lcp);
6629 *ret = 0; /* return value is meaningless */
6630 break;
6631 }
6632
6633 case VG_USERREQ__MAKE_MEM_NOACCESS:
6634 MC_(make_mem_noaccess) ( arg[1], arg[2] );
6635 *ret = -1;
6636 break;
6637
6638 case VG_USERREQ__MAKE_MEM_UNDEFINED:
6639 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
6640 MC_OKIND_USER );
6641 *ret = -1;
6642 break;
6643
6644 case VG_USERREQ__MAKE_MEM_DEFINED:
6645 MC_(make_mem_defined) ( arg[1], arg[2] );
6646 *ret = -1;
6647 break;
6648
6649 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
6650 make_mem_defined_if_addressable ( arg[1], arg[2] );
6651 *ret = -1;
6652 break;
6653
6654 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
6655 if (arg[1] != 0 && arg[2] != 0) {
6656 i = alloc_client_block();
6657 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6658 cgbs[i].start = arg[1];
6659 cgbs[i].size = arg[2];
6660 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
6661 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
6662 *ret = i;
6663 } else
6664 *ret = -1;
6665 break;
6666
6667 case VG_USERREQ__DISCARD: /* discard */
6668 if (cgbs == NULL
6669 || arg[2] >= cgb_used ||
6670 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
6671 *ret = 1;
6672 } else {
6673 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
6674 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
6675 VG_(free)(cgbs[arg[2]].desc);
6676 cgb_discards++;
6677 *ret = 0;
6678 }
6679 break;
6680
6681 case VG_USERREQ__GET_VBITS:
6682 *ret = mc_get_or_set_vbits_for_client
6683 ( arg[1], arg[2], arg[3],
6684 False /* get them */,
6685 True /* is client request */ );
6686 break;
6687
6688 case VG_USERREQ__SET_VBITS:
6689 *ret = mc_get_or_set_vbits_for_client
6690 ( arg[1], arg[2], arg[3],
6691 True /* set them */,
6692 True /* is client request */ );
6693 break;
6694
6695 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
6696 UWord** argp = (UWord**)arg;
6697 // MC_(bytes_leaked) et al were set by the last leak check (or zero
6698 // if no prior leak checks performed).
6699 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
6700 *argp[2] = MC_(bytes_dubious);
6701 *argp[3] = MC_(bytes_reachable);
6702 *argp[4] = MC_(bytes_suppressed);
6703 // there is no argp[5]
6704 //*argp[5] = MC_(bytes_indirect);
6705 // XXX need to make *argp[1-4] defined; currently done in the
6706 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
6707 *ret = 0;
6708 return True;
6709 }
6710 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
6711 UWord** argp = (UWord**)arg;
6712 // MC_(blocks_leaked) et al were set by the last leak check (or zero
6713 // if no prior leak checks performed).
6714 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
6715 *argp[2] = MC_(blocks_dubious);
6716 *argp[3] = MC_(blocks_reachable);
6717 *argp[4] = MC_(blocks_suppressed);
6718 // there is no argp[5]
6719 //*argp[5] = MC_(blocks_indirect);
6720 // XXX need to make *argp[1-4] defined; currently done in the
6721 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6722 *ret = 0;
6723 return True;
6724 }
6725 case VG_USERREQ__MALLOCLIKE_BLOCK: {
6726 Addr p = (Addr)arg[1];
6727 SizeT sizeB = arg[2];
6728 UInt rzB = arg[3];
6729 Bool is_zeroed = (Bool)arg[4];
6730
6731 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6732 MC_AllocCustom, MC_(malloc_list) );
6733 if (rzB > 0) {
6734 MC_(make_mem_noaccess) ( p - rzB, rzB);
6735 MC_(make_mem_noaccess) ( p + sizeB, rzB);
6736 }
6737 return True;
6738 }
6739 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6740 Addr p = (Addr)arg[1];
6741 SizeT oldSizeB = arg[2];
6742 SizeT newSizeB = arg[3];
6743 UInt rzB = arg[4];
6744
6745 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6746 return True;
6747 }
6748 case VG_USERREQ__FREELIKE_BLOCK: {
6749 Addr p = (Addr)arg[1];
6750 UInt rzB = arg[2];
6751
6752 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6753 return True;
6754 }
6755
6756 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6757 HChar* s = (HChar*)arg[1];
6758 Addr dst = (Addr) arg[2];
6759 Addr src = (Addr) arg[3];
6760 SizeT len = (SizeT)arg[4];
6761 MC_(record_overlap_error)(tid, s, src, dst, len);
6762 return True;
6763 }
6764
6765 case VG_USERREQ__CREATE_MEMPOOL: {
6766 Addr pool = (Addr)arg[1];
6767 UInt rzB = arg[2];
6768 Bool is_zeroed = (Bool)arg[3];
6769
6770 MC_(create_mempool) ( pool, rzB, is_zeroed );
6771 return True;
6772 }
6773
6774 case VG_USERREQ__DESTROY_MEMPOOL: {
6775 Addr pool = (Addr)arg[1];
6776
6777 MC_(destroy_mempool) ( pool );
6778 return True;
6779 }
6780
6781 case VG_USERREQ__MEMPOOL_ALLOC: {
6782 Addr pool = (Addr)arg[1];
6783 Addr addr = (Addr)arg[2];
6784 UInt size = arg[3];
6785
6786 MC_(mempool_alloc) ( tid, pool, addr, size );
6787 return True;
6788 }
6789
6790 case VG_USERREQ__MEMPOOL_FREE: {
6791 Addr pool = (Addr)arg[1];
6792 Addr addr = (Addr)arg[2];
6793
6794 MC_(mempool_free) ( pool, addr );
6795 return True;
6796 }
6797
6798 case VG_USERREQ__MEMPOOL_TRIM: {
6799 Addr pool = (Addr)arg[1];
6800 Addr addr = (Addr)arg[2];
6801 UInt size = arg[3];
6802
6803 MC_(mempool_trim) ( pool, addr, size );
6804 return True;
6805 }
6806
6807 case VG_USERREQ__MOVE_MEMPOOL: {
6808 Addr poolA = (Addr)arg[1];
6809 Addr poolB = (Addr)arg[2];
6810
6811 MC_(move_mempool) ( poolA, poolB );
6812 return True;
6813 }
6814
6815 case VG_USERREQ__MEMPOOL_CHANGE: {
6816 Addr pool = (Addr)arg[1];
6817 Addr addrA = (Addr)arg[2];
6818 Addr addrB = (Addr)arg[3];
6819 UInt size = arg[4];
6820
6821 MC_(mempool_change) ( pool, addrA, addrB, size );
6822 return True;
6823 }
6824
6825 case VG_USERREQ__MEMPOOL_EXISTS: {
6826 Addr pool = (Addr)arg[1];
6827
6828 *ret = (UWord) MC_(mempool_exists) ( pool );
6829 return True;
6830 }
6831
6832 case VG_USERREQ__GDB_MONITOR_COMMAND: {
6833 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6834 if (handled)
6835 *ret = 1;
6836 else
6837 *ret = 0;
6838 return handled;
6839 }
6840
6841 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6842 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6843 Bool addRange
6844 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6845 Bool ok
6846 = modify_ignore_ranges(addRange, arg[1], arg[2]);
6847 *ret = ok ? 1 : 0;
6848 return True;
6849 }
6850
6851 default:
6852 VG_(message)(
6853 Vg_UserMsg,
6854 "Warning: unknown memcheck client request code %llx\n",
6855 (ULong)arg[0]
6856 );
6857 return False;
6858 }
6859 return True;
6860 }
6861
6862
6863 /*------------------------------------------------------------*/
6864 /*--- Crude profiling machinery. ---*/
6865 /*------------------------------------------------------------*/
6866
6867 // We track a number of interesting events (using PROF_EVENT)
6868 // if MC_PROFILE_MEMORY is defined.
6869
6870 #ifdef MC_PROFILE_MEMORY
6871
6872 ULong MC_(event_ctr)[MCPE_LAST];
6873
6874 /* Event counter names. Use the name of the function that increases the
6875 event counter. Drop any MC_() and mc_ prefices. */
6876 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
6877 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
6878 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
6879 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
6880 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
6881 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
6882 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
6883 "make_aligned_word32_undefined_slow",
6884 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
6885 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
6886 "make_aligned_word64_undefined_slow",
6887 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
6888 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
6889 "make_aligned_word32_noaccess_slow",
6890 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
6891 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
6892 "make_aligned_word64_noaccess_slow",
6893 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
6894 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
6895 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
6896 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
6897 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
6898 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
6899 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
6900 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
6901 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
6902 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
6903 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
6904 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
6905 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
6906 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
6907 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
6908 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
6909 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
6910 "is_mem_defined_comprehensive(loop)",
6911 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
6912 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
6913 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
6914 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
6915 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
6916 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
6917 "set_address_range_perms(single-secmap)",
6918 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
6919 "set_address_range_perms(startof-secmap)",
6920 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
6921 "set_address_range_perms(multiple-secmaps)",
6922 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
6923 "set_address_range_perms(dist-sm1)",
6924 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
6925 "set_address_range_perms(dist-sm2)",
6926 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
6927 "set_address_range_perms(dist-sm1-quick)",
6928 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
6929 "set_address_range_perms(dist-sm2-quick)",
6930 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
6931 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
6932 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
6933 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
6934 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
6935 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
6936 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
6937 "set_address_range_perms(loop64K-free-dist-sm)",
6938 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
6939 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
6940 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
6941 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
6942 [MCPE_LOADV64] = "LOADV64",
6943 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
6944 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
6945 [MCPE_STOREV64] = "STOREV64",
6946 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
6947 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
6948 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
6949 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
6950 [MCPE_LOADV32] = "LOADV32",
6951 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
6952 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
6953 [MCPE_STOREV32] = "STOREV32",
6954 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
6955 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
6956 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
6957 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
6958 [MCPE_LOADV16] = "LOADV16",
6959 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
6960 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
6961 [MCPE_STOREV16] = "STOREV16",
6962 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
6963 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
6964 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
6965 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
6966 [MCPE_LOADV8] = "LOADV8",
6967 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
6968 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
6969 [MCPE_STOREV8] = "STOREV8",
6970 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
6971 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
6972 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
6973 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
6974 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
6975 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
6976 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
6977 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
6978 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
6979 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
6980 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
6981 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
6982 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
6983 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
6984 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
6985 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
6986 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
6987 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
6988 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
6989 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
6990 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
6991 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
6992 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
6993 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
6994 };
6995
init_prof_mem(void)6996 static void init_prof_mem ( void )
6997 {
6998 Int i, name_count = 0;
6999
7000 for (i = 0; i < MCPE_LAST; i++) {
7001 MC_(event_ctr)[i] = 0;
7002 if (MC_(event_ctr_name)[i] != NULL)
7003 ++name_count;
7004 }
7005
7006 /* Make sure every profiling event has a name */
7007 tl_assert(name_count == MCPE_LAST);
7008 }
7009
done_prof_mem(void)7010 static void done_prof_mem ( void )
7011 {
7012 Int i, n;
7013 Bool spaced = False;
7014 for (i = n = 0; i < MCPE_LAST; i++) {
7015 if (!spaced && (n % 10) == 0) {
7016 VG_(printf)("\n");
7017 spaced = True;
7018 }
7019 if (MC_(event_ctr)[i] > 0) {
7020 spaced = False;
7021 ++n;
7022 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7023 i, MC_(event_ctr)[i],
7024 MC_(event_ctr_name)[i]);
7025 }
7026 }
7027 }
7028
7029 #else
7030
init_prof_mem(void)7031 static void init_prof_mem ( void ) { }
done_prof_mem(void)7032 static void done_prof_mem ( void ) { }
7033
7034 #endif
7035
7036
7037 /*------------------------------------------------------------*/
7038 /*--- Origin tracking stuff ---*/
7039 /*------------------------------------------------------------*/
7040
7041 /*--------------------------------------------*/
7042 /*--- Origin tracking: load handlers ---*/
7043 /*--------------------------------------------*/
7044
merge_origins(UInt or1,UInt or2)7045 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7046 return or1 > or2 ? or1 : or2;
7047 }
7048
MC_(helperc_b_load1)7049 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7050 OCacheLine* line;
7051 UChar descr;
7052 UWord lineoff = oc_line_offset(a);
7053 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7054
7055 if (OC_ENABLE_ASSERTIONS) {
7056 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7057 }
7058
7059 line = find_OCacheLine( a );
7060
7061 descr = line->descr[lineoff];
7062 if (OC_ENABLE_ASSERTIONS) {
7063 tl_assert(descr < 0x10);
7064 }
7065
7066 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7067 return 0;
7068 } else {
7069 return line->w32[lineoff];
7070 }
7071 }
7072
MC_(helperc_b_load2)7073 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7074 OCacheLine* line;
7075 UChar descr;
7076 UWord lineoff, byteoff;
7077
7078 if (UNLIKELY(a & 1)) {
7079 /* Handle misaligned case, slowly. */
7080 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7081 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7082 return merge_origins(oLo, oHi);
7083 }
7084
7085 lineoff = oc_line_offset(a);
7086 byteoff = a & 3; /* 0 or 2 */
7087
7088 if (OC_ENABLE_ASSERTIONS) {
7089 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7090 }
7091 line = find_OCacheLine( a );
7092
7093 descr = line->descr[lineoff];
7094 if (OC_ENABLE_ASSERTIONS) {
7095 tl_assert(descr < 0x10);
7096 }
7097
7098 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7099 return 0;
7100 } else {
7101 return line->w32[lineoff];
7102 }
7103 }
7104
MC_(helperc_b_load4)7105 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7106 OCacheLine* line;
7107 UChar descr;
7108 UWord lineoff;
7109
7110 if (UNLIKELY(a & 3)) {
7111 /* Handle misaligned case, slowly. */
7112 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7113 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7114 return merge_origins(oLo, oHi);
7115 }
7116
7117 lineoff = oc_line_offset(a);
7118 if (OC_ENABLE_ASSERTIONS) {
7119 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7120 }
7121
7122 line = find_OCacheLine( a );
7123
7124 descr = line->descr[lineoff];
7125 if (OC_ENABLE_ASSERTIONS) {
7126 tl_assert(descr < 0x10);
7127 }
7128
7129 if (LIKELY(0 == descr)) {
7130 return 0;
7131 } else {
7132 return line->w32[lineoff];
7133 }
7134 }
7135
MC_(helperc_b_load8)7136 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7137 OCacheLine* line;
7138 UChar descrLo, descrHi, descr;
7139 UWord lineoff;
7140
7141 if (UNLIKELY(a & 7)) {
7142 /* Handle misaligned case, slowly. */
7143 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7144 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7145 return merge_origins(oLo, oHi);
7146 }
7147
7148 lineoff = oc_line_offset(a);
7149 if (OC_ENABLE_ASSERTIONS) {
7150 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7151 }
7152
7153 line = find_OCacheLine( a );
7154
7155 descrLo = line->descr[lineoff + 0];
7156 descrHi = line->descr[lineoff + 1];
7157 descr = descrLo | descrHi;
7158 if (OC_ENABLE_ASSERTIONS) {
7159 tl_assert(descr < 0x10);
7160 }
7161
7162 if (LIKELY(0 == descr)) {
7163 return 0; /* both 32-bit chunks are defined */
7164 } else {
7165 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7166 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7167 return merge_origins(oLo, oHi);
7168 }
7169 }
7170
MC_(helperc_b_load16)7171 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7172 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7173 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7174 UInt oBoth = merge_origins(oLo, oHi);
7175 return (UWord)oBoth;
7176 }
7177
MC_(helperc_b_load32)7178 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7179 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7180 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7181 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7182 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7183 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7184 merge_origins(oQ2, oQ3));
7185 return (UWord)oAll;
7186 }
7187
7188
7189 /*--------------------------------------------*/
7190 /*--- Origin tracking: store handlers ---*/
7191 /*--------------------------------------------*/
7192
MC_(helperc_b_store1)7193 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7194 OCacheLine* line;
7195 UWord lineoff = oc_line_offset(a);
7196 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7197
7198 if (OC_ENABLE_ASSERTIONS) {
7199 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7200 }
7201
7202 line = find_OCacheLine( a );
7203
7204 if (d32 == 0) {
7205 line->descr[lineoff] &= ~(1 << byteoff);
7206 } else {
7207 line->descr[lineoff] |= (1 << byteoff);
7208 line->w32[lineoff] = d32;
7209 }
7210 }
7211
MC_(helperc_b_store2)7212 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7213 OCacheLine* line;
7214 UWord lineoff, byteoff;
7215
7216 if (UNLIKELY(a & 1)) {
7217 /* Handle misaligned case, slowly. */
7218 MC_(helperc_b_store1)( a + 0, d32 );
7219 MC_(helperc_b_store1)( a + 1, d32 );
7220 return;
7221 }
7222
7223 lineoff = oc_line_offset(a);
7224 byteoff = a & 3; /* 0 or 2 */
7225
7226 if (OC_ENABLE_ASSERTIONS) {
7227 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7228 }
7229
7230 line = find_OCacheLine( a );
7231
7232 if (d32 == 0) {
7233 line->descr[lineoff] &= ~(3 << byteoff);
7234 } else {
7235 line->descr[lineoff] |= (3 << byteoff);
7236 line->w32[lineoff] = d32;
7237 }
7238 }
7239
MC_(helperc_b_store4)7240 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7241 OCacheLine* line;
7242 UWord lineoff;
7243
7244 if (UNLIKELY(a & 3)) {
7245 /* Handle misaligned case, slowly. */
7246 MC_(helperc_b_store2)( a + 0, d32 );
7247 MC_(helperc_b_store2)( a + 2, d32 );
7248 return;
7249 }
7250
7251 lineoff = oc_line_offset(a);
7252 if (OC_ENABLE_ASSERTIONS) {
7253 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7254 }
7255
7256 line = find_OCacheLine( a );
7257
7258 if (d32 == 0) {
7259 line->descr[lineoff] = 0;
7260 } else {
7261 line->descr[lineoff] = 0xF;
7262 line->w32[lineoff] = d32;
7263 }
7264 }
7265
MC_(helperc_b_store8)7266 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7267 OCacheLine* line;
7268 UWord lineoff;
7269
7270 if (UNLIKELY(a & 7)) {
7271 /* Handle misaligned case, slowly. */
7272 MC_(helperc_b_store4)( a + 0, d32 );
7273 MC_(helperc_b_store4)( a + 4, d32 );
7274 return;
7275 }
7276
7277 lineoff = oc_line_offset(a);
7278 if (OC_ENABLE_ASSERTIONS) {
7279 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7280 }
7281
7282 line = find_OCacheLine( a );
7283
7284 if (d32 == 0) {
7285 line->descr[lineoff + 0] = 0;
7286 line->descr[lineoff + 1] = 0;
7287 } else {
7288 line->descr[lineoff + 0] = 0xF;
7289 line->descr[lineoff + 1] = 0xF;
7290 line->w32[lineoff + 0] = d32;
7291 line->w32[lineoff + 1] = d32;
7292 }
7293 }
7294
MC_(helperc_b_store16)7295 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7296 MC_(helperc_b_store8)( a + 0, d32 );
7297 MC_(helperc_b_store8)( a + 8, d32 );
7298 }
7299
MC_(helperc_b_store32)7300 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7301 MC_(helperc_b_store8)( a + 0, d32 );
7302 MC_(helperc_b_store8)( a + 8, d32 );
7303 MC_(helperc_b_store8)( a + 16, d32 );
7304 MC_(helperc_b_store8)( a + 24, d32 );
7305 }
7306
7307
7308 /*--------------------------------------------*/
7309 /*--- Origin tracking: sarp handlers ---*/
7310 /*--------------------------------------------*/
7311
7312 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)7313 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7314 if ((a & 1) && len >= 1) {
7315 MC_(helperc_b_store1)( a, otag );
7316 a++;
7317 len--;
7318 }
7319 if ((a & 2) && len >= 2) {
7320 MC_(helperc_b_store2)( a, otag );
7321 a += 2;
7322 len -= 2;
7323 }
7324 if (len >= 4)
7325 tl_assert(0 == (a & 3));
7326 while (len >= 4) {
7327 MC_(helperc_b_store4)( a, otag );
7328 a += 4;
7329 len -= 4;
7330 }
7331 if (len >= 2) {
7332 MC_(helperc_b_store2)( a, otag );
7333 a += 2;
7334 len -= 2;
7335 }
7336 if (len >= 1) {
7337 MC_(helperc_b_store1)( a, otag );
7338 //a++;
7339 len--;
7340 }
7341 tl_assert(len == 0);
7342 }
7343
7344 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)7345 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7346 if ((a & 1) && len >= 1) {
7347 MC_(helperc_b_store1)( a, 0 );
7348 a++;
7349 len--;
7350 }
7351 if ((a & 2) && len >= 2) {
7352 MC_(helperc_b_store2)( a, 0 );
7353 a += 2;
7354 len -= 2;
7355 }
7356 if (len >= 4)
7357 tl_assert(0 == (a & 3));
7358 while (len >= 4) {
7359 MC_(helperc_b_store4)( a, 0 );
7360 a += 4;
7361 len -= 4;
7362 }
7363 if (len >= 2) {
7364 MC_(helperc_b_store2)( a, 0 );
7365 a += 2;
7366 len -= 2;
7367 }
7368 if (len >= 1) {
7369 MC_(helperc_b_store1)( a, 0 );
7370 //a++;
7371 len--;
7372 }
7373 tl_assert(len == 0);
7374 }
7375
7376
7377 /*------------------------------------------------------------*/
7378 /*--- Setup and finalisation ---*/
7379 /*------------------------------------------------------------*/
7380
mc_post_clo_init(void)7381 static void mc_post_clo_init ( void )
7382 {
7383 /* If we've been asked to emit XML, mash around various other
7384 options so as to constrain the output somewhat. */
7385 if (VG_(clo_xml)) {
7386 /* Extract as much info as possible from the leak checker. */
7387 MC_(clo_leak_check) = LC_Full;
7388 }
7389
7390 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
7391 VG_(message)(Vg_UserMsg,
7392 "Warning: --freelist-big-blocks value %lld has no effect\n"
7393 "as it is >= to --freelist-vol value %lld\n",
7394 MC_(clo_freelist_big_blocks),
7395 MC_(clo_freelist_vol));
7396
7397 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7398
7399 if (MC_(clo_mc_level) == 3) {
7400 /* We're doing origin tracking. */
7401 # ifdef PERF_FAST_STACK
7402 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
7403 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
7404 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
7405 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
7406 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
7407 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7408 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7409 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7410 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7411 # endif
7412 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
7413 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
7414 } else {
7415 /* Not doing origin tracking */
7416 # ifdef PERF_FAST_STACK
7417 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
7418 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
7419 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
7420 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
7421 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
7422 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7423 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7424 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7425 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7426 # endif
7427 VG_(track_new_mem_stack) ( mc_new_mem_stack );
7428 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7429 }
7430
7431 // We assume that brk()/sbrk() does not initialise new memory. Is this
7432 // accurate? John Reiser says:
7433 //
7434 // 0) sbrk() can *decrease* process address space. No zero fill is done
7435 // for a decrease, not even the fragment on the high end of the last page
7436 // that is beyond the new highest address. For maximum safety and
7437 // portability, then the bytes in the last page that reside above [the
7438 // new] sbrk(0) should be considered to be uninitialized, but in practice
7439 // it is exceedingly likely that they will retain their previous
7440 // contents.
7441 //
7442 // 1) If an increase is large enough to require new whole pages, then
7443 // those new whole pages (like all new pages) are zero-filled by the
7444 // operating system. So if sbrk(0) already is page aligned, then
7445 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7446 //
7447 // 2) Any increase that lies within an existing allocated page is not
7448 // changed. So if (x = sbrk(0)) is not page aligned, then
7449 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7450 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7451 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7452 // of them come along for the ride because the operating system deals
7453 // only in whole pages. Again, for maximum safety and portability, then
7454 // anything that lives above [the new] sbrk(0) should be considered
7455 // uninitialized, but in practice will retain previous contents [zero in
7456 // this case.]"
7457 //
7458 // In short:
7459 //
7460 // A key property of sbrk/brk is that new whole pages that are supplied
7461 // by the operating system *do* get initialized to zero.
7462 //
7463 // As for the portability of all this:
7464 //
7465 // sbrk and brk are not POSIX. However, any system that is a derivative
7466 // of *nix has sbrk and brk because there are too many softwares (such as
7467 // the Bourne shell) which rely on the traditional memory map (.text,
7468 // .data+.bss, stack) and the existence of sbrk/brk.
7469 //
7470 // So we should arguably observe all this. However:
7471 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7472 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7473 // doubt most programmers know the above information.
7474 // So I'm not terribly unhappy with marking it as undefined. --njn.
7475 //
7476 // [More: I think most of what John said only applies to sbrk(). It seems
7477 // that brk() always deals in whole pages. And since this event deals
7478 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7479 // just mark all memory it allocates as defined.]
7480 //
7481 # if !defined(VGO_solaris)
7482 if (MC_(clo_mc_level) == 3)
7483 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
7484 else
7485 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
7486 # else
7487 // On Solaris, brk memory has to be marked as defined, otherwise we get
7488 // many false positives.
7489 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
7490 # endif
7491
7492 /* This origin tracking cache is huge (~100M), so only initialise
7493 if we need it. */
7494 if (MC_(clo_mc_level) >= 3) {
7495 init_OCache();
7496 tl_assert(ocacheL1 != NULL);
7497 tl_assert(ocacheL2 != NULL);
7498 } else {
7499 tl_assert(ocacheL1 == NULL);
7500 tl_assert(ocacheL2 == NULL);
7501 }
7502
7503 MC_(chunk_poolalloc) = VG_(newPA)
7504 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7505 1000,
7506 VG_(malloc),
7507 "mc.cMC.1 (MC_Chunk pools)",
7508 VG_(free));
7509
7510 /* Do not check definedness of guest state if --undef-value-errors=no */
7511 if (MC_(clo_mc_level) >= 2)
7512 VG_(track_pre_reg_read) ( mc_pre_reg_read );
7513 }
7514
print_SM_info(const HChar * type,Int n_SMs)7515 static void print_SM_info(const HChar* type, Int n_SMs)
7516 {
7517 VG_(message)(Vg_DebugMsg,
7518 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7519 type,
7520 n_SMs,
7521 n_SMs * sizeof(SecMap) / 1024UL,
7522 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7523 }
7524
mc_print_stats(void)7525 static void mc_print_stats (void)
7526 {
7527 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7528
7529 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7530 VG_(free_queue_volume), VG_(free_queue_length));
7531 VG_(message)(Vg_DebugMsg,
7532 " memcheck: sanity checks: %d cheap, %d expensive\n",
7533 n_sanity_cheap, n_sanity_expensive );
7534 VG_(message)(Vg_DebugMsg,
7535 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7536 n_auxmap_L2_nodes,
7537 n_auxmap_L2_nodes * 64,
7538 n_auxmap_L2_nodes / 16 );
7539 VG_(message)(Vg_DebugMsg,
7540 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7541 n_auxmap_L1_searches, n_auxmap_L1_cmps,
7542 (10ULL * n_auxmap_L1_cmps)
7543 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7544 );
7545 VG_(message)(Vg_DebugMsg,
7546 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7547 n_auxmap_L2_searches, n_auxmap_L2_nodes
7548 );
7549
7550 print_SM_info("n_issued ", n_issued_SMs);
7551 print_SM_info("n_deissued ", n_deissued_SMs);
7552 print_SM_info("max_noaccess ", max_noaccess_SMs);
7553 print_SM_info("max_undefined", max_undefined_SMs);
7554 print_SM_info("max_defined ", max_defined_SMs);
7555 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
7556
7557 // Three DSMs, plus the non-DSM ones
7558 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7559 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7560 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7561 // the elements on pointer size.
7562 // Note that the pool allocator has some additional small overhead
7563 // which is not counted in the below.
7564 // Hardwiring this logic sucks, but I don't see how else to do it.
7565 max_secVBit_szB = max_secVBit_nodes *
7566 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7567 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7568
7569 VG_(message)(Vg_DebugMsg,
7570 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7571 max_secVBit_nodes, max_secVBit_szB / 1024,
7572 max_secVBit_szB / (1024 * 1024));
7573 VG_(message)(Vg_DebugMsg,
7574 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7575 sec_vbits_new_nodes + sec_vbits_updates,
7576 sec_vbits_new_nodes, sec_vbits_updates );
7577 VG_(message)(Vg_DebugMsg,
7578 " memcheck: max shadow mem size: %luk, %luM\n",
7579 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
7580
7581 if (MC_(clo_mc_level) >= 3) {
7582 VG_(message)(Vg_DebugMsg,
7583 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
7584 stats_ocacheL1_find,
7585 stats_ocacheL1_misses,
7586 stats_ocacheL1_lossage );
7587 VG_(message)(Vg_DebugMsg,
7588 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
7589 stats_ocacheL1_find - stats_ocacheL1_misses
7590 - stats_ocacheL1_found_at_1
7591 - stats_ocacheL1_found_at_N,
7592 stats_ocacheL1_found_at_1 );
7593 VG_(message)(Vg_DebugMsg,
7594 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
7595 stats_ocacheL1_found_at_N,
7596 stats_ocacheL1_movefwds );
7597 VG_(message)(Vg_DebugMsg,
7598 " ocacheL1: %'12lu sizeB %'12d useful\n",
7599 (SizeT)sizeof(OCache),
7600 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
7601 VG_(message)(Vg_DebugMsg,
7602 " ocacheL2: %'12lu refs %'12lu misses\n",
7603 stats__ocacheL2_refs,
7604 stats__ocacheL2_misses );
7605 VG_(message)(Vg_DebugMsg,
7606 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
7607 stats__ocacheL2_n_nodes_max,
7608 stats__ocacheL2_n_nodes );
7609 VG_(message)(Vg_DebugMsg,
7610 " niacache: %'12lu refs %'12lu misses\n",
7611 stats__nia_cache_queries, stats__nia_cache_misses);
7612 } else {
7613 tl_assert(ocacheL1 == NULL);
7614 tl_assert(ocacheL2 == NULL);
7615 }
7616 }
7617
7618
mc_fini(Int exitcode)7619 static void mc_fini ( Int exitcode )
7620 {
7621 MC_(print_malloc_stats)();
7622
7623 if (MC_(clo_leak_check) != LC_Off) {
7624 LeakCheckParams lcp;
7625 lcp.mode = MC_(clo_leak_check);
7626 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7627 lcp.heuristics = MC_(clo_leak_check_heuristics);
7628 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7629 lcp.deltamode = LCD_Any;
7630 lcp.max_loss_records_output = 999999999;
7631 lcp.requested_by_monitor_command = False;
7632 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
7633 } else {
7634 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7635 VG_(umsg)(
7636 "For a detailed leak analysis, rerun with: --leak-check=full\n"
7637 "\n"
7638 );
7639 }
7640 }
7641
7642 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7643 VG_(message)(Vg_UserMsg,
7644 "For counts of detected and suppressed errors, rerun with: -v\n");
7645 }
7646
7647 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
7648 && MC_(clo_mc_level) == 2) {
7649 VG_(message)(Vg_UserMsg,
7650 "Use --track-origins=yes to see where "
7651 "uninitialised values come from\n");
7652 }
7653
7654 /* Print a warning if any client-request generated ignore-ranges
7655 still exist. It would be reasonable to expect that a properly
7656 written program would remove any such ranges before exiting, and
7657 since they are a bit on the dangerous side, let's comment. By
7658 contrast ranges which are specified on the command line normally
7659 pertain to hardware mapped into the address space, and so we
7660 can't expect the client to have got rid of them. */
7661 if (gIgnoredAddressRanges) {
7662 UInt i, nBad = 0;
7663 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
7664 UWord val = IAR_INVALID;
7665 UWord key_min = ~(UWord)0;
7666 UWord key_max = (UWord)0;
7667 VG_(indexRangeMap)( &key_min, &key_max, &val,
7668 gIgnoredAddressRanges, i );
7669 if (val != IAR_ClientReq)
7670 continue;
7671 /* Print the offending range. Also, if it is the first,
7672 print a banner before it. */
7673 nBad++;
7674 if (nBad == 1) {
7675 VG_(umsg)(
7676 "WARNING: exiting program has the following client-requested\n"
7677 "WARNING: address error disablement range(s) still in force,\n"
7678 "WARNING: "
7679 "possibly as a result of some mistake in the use of the\n"
7680 "WARNING: "
7681 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
7682 );
7683 }
7684 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
7685 i, key_min, key_max, showIARKind(val));
7686 }
7687 }
7688
7689 done_prof_mem();
7690
7691 if (VG_(clo_stats))
7692 mc_print_stats();
7693
7694 if (0) {
7695 VG_(message)(Vg_DebugMsg,
7696 "------ Valgrind's client block stats follow ---------------\n" );
7697 show_client_block_stats();
7698 }
7699 }
7700
7701 /* mark the given addr/len unaddressable for watchpoint implementation
7702 The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)7703 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
7704 Addr addr, SizeT len)
7705 {
7706 /* GDBTD this is somewhat fishy. We might rather have to save the previous
7707 accessibility and definedness in gdbserver so as to allow restoring it
7708 properly. Currently, we assume that the user only watches things
7709 which are properly addressable and defined */
7710 if (insert)
7711 MC_(make_mem_noaccess) (addr, len);
7712 else
7713 MC_(make_mem_defined) (addr, len);
7714 return True;
7715 }
7716
mc_pre_clo_init(void)7717 static void mc_pre_clo_init(void)
7718 {
7719 VG_(details_name) ("Memcheck");
7720 VG_(details_version) (NULL);
7721 VG_(details_description) ("a memory error detector");
7722 VG_(details_copyright_author)(
7723 "Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.");
7724 VG_(details_bug_reports_to) (VG_BUGS_TO);
7725 VG_(details_avg_translation_sizeB) ( 640 );
7726
7727 VG_(basic_tool_funcs) (mc_post_clo_init,
7728 MC_(instrument),
7729 mc_fini);
7730
7731 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
7732
7733
7734 VG_(needs_core_errors) ();
7735 VG_(needs_tool_errors) (MC_(eq_Error),
7736 MC_(before_pp_Error),
7737 MC_(pp_Error),
7738 True,/*show TIDs for errors*/
7739 MC_(update_Error_extra),
7740 MC_(is_recognised_suppression),
7741 MC_(read_extra_suppression_info),
7742 MC_(error_matches_suppression),
7743 MC_(get_error_name),
7744 MC_(get_extra_suppression_info),
7745 MC_(print_extra_suppression_use),
7746 MC_(update_extra_suppression_use));
7747 VG_(needs_libc_freeres) ();
7748 VG_(needs_command_line_options)(mc_process_cmd_line_options,
7749 mc_print_usage,
7750 mc_print_debug_usage);
7751 VG_(needs_client_requests) (mc_handle_client_request);
7752 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
7753 mc_expensive_sanity_check);
7754 VG_(needs_print_stats) (mc_print_stats);
7755 VG_(needs_info_location) (MC_(pp_describe_addr));
7756 VG_(needs_malloc_replacement) (MC_(malloc),
7757 MC_(__builtin_new),
7758 MC_(__builtin_vec_new),
7759 MC_(memalign),
7760 MC_(calloc),
7761 MC_(free),
7762 MC_(__builtin_delete),
7763 MC_(__builtin_vec_delete),
7764 MC_(realloc),
7765 MC_(malloc_usable_size),
7766 MC_MALLOC_DEFAULT_REDZONE_SZB );
7767 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
7768
7769 VG_(needs_xml_output) ();
7770
7771 VG_(track_new_mem_startup) ( mc_new_mem_startup );
7772
7773 // Handling of mmap and mprotect isn't simple (well, it is simple,
7774 // but the justification isn't.) See comments above, just prior to
7775 // mc_new_mem_mmap.
7776 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
7777 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
7778
7779 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
7780
7781 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
7782 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
7783 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
7784
7785 /* Defer the specification of the new_mem_stack functions to the
7786 post_clo_init function, since we need to first parse the command
7787 line before deciding which set to use. */
7788
7789 # ifdef PERF_FAST_STACK
7790 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
7791 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
7792 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
7793 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
7794 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
7795 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
7796 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
7797 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
7798 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
7799 # endif
7800 VG_(track_die_mem_stack) ( mc_die_mem_stack );
7801
7802 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
7803
7804 VG_(track_pre_mem_read) ( check_mem_is_defined );
7805 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
7806 VG_(track_pre_mem_write) ( check_mem_is_addressable );
7807 VG_(track_post_mem_write) ( mc_post_mem_write );
7808
7809 VG_(track_post_reg_write) ( mc_post_reg_write );
7810 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
7811
7812 if (MC_(clo_mc_level) >= 2) {
7813 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
7814 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
7815 }
7816
7817 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
7818
7819 init_shadow_memory();
7820 // MC_(chunk_poolalloc) must be allocated in post_clo_init
7821 tl_assert(MC_(chunk_poolalloc) == NULL);
7822 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
7823 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
7824 init_prof_mem();
7825
7826 tl_assert( mc_expensive_sanity_check() );
7827
7828 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
7829 tl_assert(sizeof(UWord) == sizeof(Addr));
7830 // Call me paranoid. I don't care.
7831 tl_assert(sizeof(void*) == sizeof(Addr));
7832
7833 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
7834 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
7835
7836 /* This is small. Always initialise it. */
7837 init_nia_to_ecu_cache();
7838
7839 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
7840 if we need to, since the command line args haven't been
7841 processed yet. Hence defer it to mc_post_clo_init. */
7842 tl_assert(ocacheL1 == NULL);
7843 tl_assert(ocacheL2 == NULL);
7844
7845 /* Check some important stuff. See extensive comments above
7846 re UNALIGNED_OR_HIGH for background. */
7847 # if VG_WORDSIZE == 4
7848 tl_assert(sizeof(void*) == 4);
7849 tl_assert(sizeof(Addr) == 4);
7850 tl_assert(sizeof(UWord) == 4);
7851 tl_assert(sizeof(Word) == 4);
7852 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
7853 tl_assert(MASK(1) == 0UL);
7854 tl_assert(MASK(2) == 1UL);
7855 tl_assert(MASK(4) == 3UL);
7856 tl_assert(MASK(8) == 7UL);
7857 # else
7858 tl_assert(VG_WORDSIZE == 8);
7859 tl_assert(sizeof(void*) == 8);
7860 tl_assert(sizeof(Addr) == 8);
7861 tl_assert(sizeof(UWord) == 8);
7862 tl_assert(sizeof(Word) == 8);
7863 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7864 tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7865 tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7866 tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7867 tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7868 # endif
7869 }
7870
7871 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7872
7873 /*--------------------------------------------------------------------*/
7874 /*--- end mc_main.c ---*/
7875 /*--------------------------------------------------------------------*/
7876