1 
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
5 /*---                                                    mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of MemCheck, a heavyweight Valgrind tool for
10    detecting memory errors.
11 
12    Copyright (C) 2000-2013 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h"     // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_rangemap.h"
46 #include "pub_tool_replacemalloc.h"
47 #include "pub_tool_tooliface.h"
48 #include "pub_tool_threadstate.h"
49 
50 #include "mc_include.h"
51 #include "memcheck.h"   /* for client requests */
52 
53 
54 /* Set to 1 to do a little more sanity checking */
55 #define VG_DEBUG_MEMORY 0
56 
57 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
58 
59 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
60 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
61 
62 
63 /*------------------------------------------------------------*/
64 /*--- Fast-case knobs                                      ---*/
65 /*------------------------------------------------------------*/
66 
67 // Comment these out to disable the fast cases (don't just set them to zero).
68 
69 #define PERF_FAST_LOADV    1
70 #define PERF_FAST_STOREV   1
71 
72 #define PERF_FAST_SARP     1
73 
74 #define PERF_FAST_STACK    1
75 #define PERF_FAST_STACK2   1
76 
77 /* Change this to 1 to enable assertions on origin tracking cache fast
78    paths */
79 #define OC_ENABLE_ASSERTIONS 0
80 
81 
82 /*------------------------------------------------------------*/
83 /*--- Comments on the origin tracking implementation       ---*/
84 /*------------------------------------------------------------*/
85 
86 /* See detailed comment entitled
87    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
88    which is contained further on in this file. */
89 
90 
91 /*------------------------------------------------------------*/
92 /*--- V bits and A bits                                    ---*/
93 /*------------------------------------------------------------*/
94 
95 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
96    thinks the corresponding value bit is defined.  And every memory byte
97    has an A bit, which tracks whether Memcheck thinks the program can access
98    it safely (ie. it's mapped, and has at least one of the RWX permission bits
99    set).  So every N-bit register is shadowed with N V bits, and every memory
100    byte is shadowed with 8 V bits and one A bit.
101 
102    In the implementation, we use two forms of compression (compressed V bits
103    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
104    for memory.
105 
106    Memcheck also tracks extra information about each heap block that is
107    allocated, for detecting memory leaks and other purposes.
108 */
109 
110 /*------------------------------------------------------------*/
111 /*--- Basic A/V bitmap representation.                     ---*/
112 /*------------------------------------------------------------*/
113 
114 /* All reads and writes are checked against a memory map (a.k.a. shadow
115    memory), which records the state of all memory in the process.
116 
117    On 32-bit machines the memory map is organised as follows.
118    The top 16 bits of an address are used to index into a top-level
119    map table, containing 65536 entries.  Each entry is a pointer to a
120    second-level map, which records the accesibililty and validity
121    permissions for the 65536 bytes indexed by the lower 16 bits of the
122    address.  Each byte is represented by two bits (details are below).  So
123    each second-level map contains 16384 bytes.  This two-level arrangement
124    conveniently divides the 4G address space into 64k lumps, each size 64k
125    bytes.
126 
127    All entries in the primary (top-level) map must point to a valid
128    secondary (second-level) map.  Since many of the 64kB chunks will
129    have the same status for every bit -- ie. noaccess (for unused
130    address space) or entirely addressable and defined (for code segments) --
131    there are three distinguished secondary maps, which indicate 'noaccess',
132    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
133    map entry points to the relevant distinguished map.  In practice,
134    typically more than half of the addressable memory is represented with
135    the 'undefined' or 'defined' distinguished secondary map, so it gives a
136    good saving.  It also lets us set the V+A bits of large address regions
137    quickly in set_address_range_perms().
138 
139    On 64-bit machines it's more complicated.  If we followed the same basic
140    scheme we'd have a four-level table which would require too many memory
141    accesses.  So instead the top-level map table has 2^20 entries (indexed
142    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
143    accesses above 64GB are handled with a slow, sparse auxiliary table.
144    Valgrind's address space manager tries very hard to keep things below
145    this 64GB barrier so that performance doesn't suffer too much.
146 
147    Note that this file has a lot of different functions for reading and
148    writing shadow memory.  Only a couple are strictly necessary (eg.
149    get_vabits2 and set_vabits2), most are just specialised for specific
150    common cases to improve performance.
151 
152    Aside: the V+A bits are less precise than they could be -- we have no way
153    of marking memory as read-only.  It would be great if we could add an
154    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
155    which requires 2.3 bits to hold, and there's no way to do that elegantly
156    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
157    seem worth it.
158 */
159 
160 /* --------------- Basic configuration --------------- */
161 
162 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
163 
164 #if VG_WORDSIZE == 4
165 
166 /* cover the entire address space */
167 #  define N_PRIMARY_BITS  16
168 
169 #else
170 
171 /* Just handle the first 64G fast and the rest via auxiliary
172    primaries.  If you change this, Memcheck will assert at startup.
173    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
174 #  define N_PRIMARY_BITS  20
175 
176 #endif
177 
178 
179 /* Do not change this. */
180 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
181 
182 /* Do not change this. */
183 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
184 
185 
186 /* --------------- Secondary maps --------------- */
187 
188 // Each byte of memory conceptually has an A bit, which indicates its
189 // addressability, and 8 V bits, which indicates its definedness.
190 //
191 // But because very few bytes are partially defined, we can use a nice
192 // compression scheme to reduce the size of shadow memory.  Each byte of
193 // memory has 2 bits which indicates its state (ie. V+A bits):
194 //
195 //   00:  noaccess    (unaddressable but treated as fully defined)
196 //   01:  undefined   (addressable and fully undefined)
197 //   10:  defined     (addressable and fully defined)
198 //   11:  partdefined (addressable and partially defined)
199 //
200 // In the "partdefined" case, we use a secondary table to store the V bits.
201 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
202 // bits.
203 //
204 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
205 // four bytes (32 bits) of memory are in each chunk.  Hence the name
206 // "vabits8".  This lets us get the V+A bits for four bytes at a time
207 // easily (without having to do any shifting and/or masking), and that is a
208 // very common operation.  (Note that although each vabits8 chunk
209 // is 8 bits in size, it represents 32 bits of memory.)
210 //
211 // The representation is "inverse" little-endian... each 4 bytes of
212 // memory is represented by a 1 byte value, where:
213 //
214 // - the status of byte (a+0) is held in bits [1..0]
215 // - the status of byte (a+1) is held in bits [3..2]
216 // - the status of byte (a+2) is held in bits [5..4]
217 // - the status of byte (a+3) is held in bits [7..6]
218 //
219 // It's "inverse" because endianness normally describes a mapping from
220 // value bits to memory addresses;  in this case the mapping is inverted.
221 // Ie. instead of particular value bits being held in certain addresses, in
222 // this case certain addresses are represented by particular value bits.
223 // See insert_vabits2_into_vabits8() for an example.
224 //
225 // But note that we don't compress the V bits stored in registers;  they
226 // need to be explicit to made the shadow operations possible.  Therefore
227 // when moving values between registers and memory we need to convert
228 // between the expanded in-register format and the compressed in-memory
229 // format.  This isn't so difficult, it just requires careful attention in a
230 // few places.
231 
232 // These represent eight bits of memory.
233 #define VA_BITS2_NOACCESS     0x0      // 00b
234 #define VA_BITS2_UNDEFINED    0x1      // 01b
235 #define VA_BITS2_DEFINED      0x2      // 10b
236 #define VA_BITS2_PARTDEFINED  0x3      // 11b
237 
238 // These represent 16 bits of memory.
239 #define VA_BITS4_NOACCESS     0x0      // 00_00b
240 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
241 #define VA_BITS4_DEFINED      0xa      // 10_10b
242 
243 // These represent 32 bits of memory.
244 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
245 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
246 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
247 
248 // These represent 64 bits of memory.
249 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
250 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
251 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
252 
253 
254 #define SM_CHUNKS             16384
255 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
256 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
257 
258 // Paranoia:  it's critical for performance that the requested inlining
259 // occurs.  So try extra hard.
260 #define INLINE    inline __attribute__((always_inline))
261 
start_of_this_sm(Addr a)262 static INLINE Addr start_of_this_sm ( Addr a ) {
263    return (a & (~SM_MASK));
264 }
is_start_of_sm(Addr a)265 static INLINE Bool is_start_of_sm ( Addr a ) {
266    return (start_of_this_sm(a) == a);
267 }
268 
269 typedef
270    struct {
271       UChar vabits8[SM_CHUNKS];
272    }
273    SecMap;
274 
275 // 3 distinguished secondary maps, one for no-access, one for
276 // accessible but undefined, and one for accessible and defined.
277 // Distinguished secondaries may never be modified.
278 #define SM_DIST_NOACCESS   0
279 #define SM_DIST_UNDEFINED  1
280 #define SM_DIST_DEFINED    2
281 
282 static SecMap sm_distinguished[3];
283 
is_distinguished_sm(SecMap * sm)284 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
285    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
286 }
287 
288 // Forward declaration
289 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
290 
291 /* dist_sm points to one of our three distinguished secondaries.  Make
292    a copy of it so that we can write to it.
293 */
copy_for_writing(SecMap * dist_sm)294 static SecMap* copy_for_writing ( SecMap* dist_sm )
295 {
296    SecMap* new_sm;
297    tl_assert(dist_sm == &sm_distinguished[0]
298           || dist_sm == &sm_distinguished[1]
299           || dist_sm == &sm_distinguished[2]);
300 
301    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
302    if (new_sm == NULL)
303       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
304                                    sizeof(SecMap) );
305    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
306    update_SM_counts(dist_sm, new_sm);
307    return new_sm;
308 }
309 
310 /* --------------- Stats --------------- */
311 
312 static Int   n_issued_SMs      = 0;
313 static Int   n_deissued_SMs    = 0;
314 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
315 static Int   n_undefined_SMs   = 0;
316 static Int   n_defined_SMs     = 0;
317 static Int   n_non_DSM_SMs     = 0;
318 static Int   max_noaccess_SMs  = 0;
319 static Int   max_undefined_SMs = 0;
320 static Int   max_defined_SMs   = 0;
321 static Int   max_non_DSM_SMs   = 0;
322 
323 /* # searches initiated in auxmap_L1, and # base cmps required */
324 static ULong n_auxmap_L1_searches  = 0;
325 static ULong n_auxmap_L1_cmps      = 0;
326 /* # of searches that missed in auxmap_L1 and therefore had to
327    be handed to auxmap_L2. And the number of nodes inserted. */
328 static ULong n_auxmap_L2_searches  = 0;
329 static ULong n_auxmap_L2_nodes     = 0;
330 
331 static Int   n_sanity_cheap     = 0;
332 static Int   n_sanity_expensive = 0;
333 
334 static Int   n_secVBit_nodes   = 0;
335 static Int   max_secVBit_nodes = 0;
336 
update_SM_counts(SecMap * oldSM,SecMap * newSM)337 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
338 {
339    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
340    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
341    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
342    else                                                  { n_non_DSM_SMs  --;
343                                                            n_deissued_SMs ++; }
344 
345    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
346    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
347    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
348    else                                                  { n_non_DSM_SMs  ++;
349                                                            n_issued_SMs   ++; }
350 
351    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
352    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
353    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
354    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
355 }
356 
357 /* --------------- Primary maps --------------- */
358 
359 /* The main primary map.  This covers some initial part of the address
360    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
361    handled using the auxiliary primary map.
362 */
363 static SecMap* primary_map[N_PRIMARY_MAP];
364 
365 
366 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
367    value, and sm points at the relevant secondary map.  As with the
368    main primary map, the secondary may be either a real secondary, or
369    one of the three distinguished secondaries.  DO NOT CHANGE THIS
370    LAYOUT: the first word has to be the key for OSet fast lookups.
371 */
372 typedef
373    struct {
374       Addr    base;
375       SecMap* sm;
376    }
377    AuxMapEnt;
378 
379 /* Tunable parameter: How big is the L1 queue? */
380 #define N_AUXMAP_L1 24
381 
382 /* Tunable parameter: How far along the L1 queue to insert
383    entries resulting from L2 lookups? */
384 #define AUXMAP_L1_INSERT_IX 12
385 
386 static struct {
387           Addr       base;
388           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
389        }
390        auxmap_L1[N_AUXMAP_L1];
391 
392 static OSet* auxmap_L2 = NULL;
393 
init_auxmap_L1_L2(void)394 static void init_auxmap_L1_L2 ( void )
395 {
396    Int i;
397    for (i = 0; i < N_AUXMAP_L1; i++) {
398       auxmap_L1[i].base = 0;
399       auxmap_L1[i].ent  = NULL;
400    }
401 
402    tl_assert(0 == offsetof(AuxMapEnt,base));
403    tl_assert(sizeof(Addr) == sizeof(void*));
404    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
405                                     /*fastCmp*/ NULL,
406                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
407 }
408 
409 /* Check representation invariants; if OK return NULL; else a
410    descriptive bit of text.  Also return the number of
411    non-distinguished secondary maps referred to from the auxiliary
412    primary maps. */
413 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)414 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
415 {
416    Word i, j;
417    /* On a 32-bit platform, the L2 and L1 tables should
418       both remain empty forever.
419 
420       On a 64-bit platform:
421       In the L2 table:
422        all .base & 0xFFFF == 0
423        all .base > MAX_PRIMARY_ADDRESS
424       In the L1 table:
425        all .base & 0xFFFF == 0
426        all (.base > MAX_PRIMARY_ADDRESS
427             .base & 0xFFFF == 0
428             and .ent points to an AuxMapEnt with the same .base)
429            or
430            (.base == 0 and .ent == NULL)
431    */
432    *n_secmaps_found = 0;
433    if (sizeof(void*) == 4) {
434       /* 32-bit platform */
435       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
436          return "32-bit: auxmap_L2 is non-empty";
437       for (i = 0; i < N_AUXMAP_L1; i++)
438         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
439       return "32-bit: auxmap_L1 is non-empty";
440    } else {
441       /* 64-bit platform */
442       UWord elems_seen = 0;
443       AuxMapEnt *elem, *res;
444       AuxMapEnt key;
445       /* L2 table */
446       VG_(OSetGen_ResetIter)(auxmap_L2);
447       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
448          elems_seen++;
449          if (0 != (elem->base & (Addr)0xFFFF))
450             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
451          if (elem->base <= MAX_PRIMARY_ADDRESS)
452             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
453          if (elem->sm == NULL)
454             return "64-bit: .sm in _L2 is NULL";
455          if (!is_distinguished_sm(elem->sm))
456             (*n_secmaps_found)++;
457       }
458       if (elems_seen != n_auxmap_L2_nodes)
459          return "64-bit: disagreement on number of elems in _L2";
460       /* Check L1-L2 correspondence */
461       for (i = 0; i < N_AUXMAP_L1; i++) {
462          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
463             continue;
464          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
465             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
466          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
467             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
468          if (auxmap_L1[i].ent == NULL)
469             return "64-bit: .ent is NULL in auxmap_L1";
470          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
471             return "64-bit: _L1 and _L2 bases are inconsistent";
472          /* Look it up in auxmap_L2. */
473          key.base = auxmap_L1[i].base;
474          key.sm   = 0;
475          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
476          if (res == NULL)
477             return "64-bit: _L1 .base not found in _L2";
478          if (res != auxmap_L1[i].ent)
479             return "64-bit: _L1 .ent disagrees with _L2 entry";
480       }
481       /* Check L1 contains no duplicates */
482       for (i = 0; i < N_AUXMAP_L1; i++) {
483          if (auxmap_L1[i].base == 0)
484             continue;
485 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
486             if (auxmap_L1[j].base == 0)
487                continue;
488             if (auxmap_L1[j].base == auxmap_L1[i].base)
489                return "64-bit: duplicate _L1 .base entries";
490          }
491       }
492    }
493    return NULL; /* ok */
494 }
495 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)496 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
497 {
498    Word i;
499    tl_assert(ent);
500    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
501    for (i = N_AUXMAP_L1-1; i > rank; i--)
502       auxmap_L1[i] = auxmap_L1[i-1];
503    auxmap_L1[rank].base = ent->base;
504    auxmap_L1[rank].ent  = ent;
505 }
506 
maybe_find_in_auxmap(Addr a)507 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
508 {
509    AuxMapEnt  key;
510    AuxMapEnt* res;
511    Word       i;
512 
513    tl_assert(a > MAX_PRIMARY_ADDRESS);
514    a &= ~(Addr)0xFFFF;
515 
516    /* First search the front-cache, which is a self-organising
517       list containing the most popular entries. */
518 
519    if (LIKELY(auxmap_L1[0].base == a))
520       return auxmap_L1[0].ent;
521    if (LIKELY(auxmap_L1[1].base == a)) {
522       Addr       t_base = auxmap_L1[0].base;
523       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
524       auxmap_L1[0].base = auxmap_L1[1].base;
525       auxmap_L1[0].ent  = auxmap_L1[1].ent;
526       auxmap_L1[1].base = t_base;
527       auxmap_L1[1].ent  = t_ent;
528       return auxmap_L1[0].ent;
529    }
530 
531    n_auxmap_L1_searches++;
532 
533    for (i = 0; i < N_AUXMAP_L1; i++) {
534       if (auxmap_L1[i].base == a) {
535          break;
536       }
537    }
538    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
539 
540    n_auxmap_L1_cmps += (ULong)(i+1);
541 
542    if (i < N_AUXMAP_L1) {
543       if (i > 0) {
544          Addr       t_base = auxmap_L1[i-1].base;
545          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
546          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
547          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
548          auxmap_L1[i-0].base = t_base;
549          auxmap_L1[i-0].ent  = t_ent;
550          i--;
551       }
552       return auxmap_L1[i].ent;
553    }
554 
555    n_auxmap_L2_searches++;
556 
557    /* First see if we already have it. */
558    key.base = a;
559    key.sm   = 0;
560 
561    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
562    if (res)
563       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
564    return res;
565 }
566 
find_or_alloc_in_auxmap(Addr a)567 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
568 {
569    AuxMapEnt *nyu, *res;
570 
571    /* First see if we already have it. */
572    res = maybe_find_in_auxmap( a );
573    if (LIKELY(res))
574       return res;
575 
576    /* Ok, there's no entry in the secondary map, so we'll have
577       to allocate one. */
578    a &= ~(Addr)0xFFFF;
579 
580    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
581    nyu->base = a;
582    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
583    VG_(OSetGen_Insert)( auxmap_L2, nyu );
584    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585    n_auxmap_L2_nodes++;
586    return nyu;
587 }
588 
589 /* --------------- SecMap fundamentals --------------- */
590 
591 // In all these, 'low' means it's definitely in the main primary map,
592 // 'high' means it's definitely in the auxiliary table.
593 
get_secmap_low_ptr(Addr a)594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595 {
596    UWord pm_off = a >> 16;
597 #  if VG_DEBUG_MEMORY >= 1
598    tl_assert(pm_off < N_PRIMARY_MAP);
599 #  endif
600    return &primary_map[ pm_off ];
601 }
602 
get_secmap_high_ptr(Addr a)603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604 {
605    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606    return &am->sm;
607 }
608 
get_secmap_ptr(Addr a)609 static INLINE SecMap** get_secmap_ptr ( Addr a )
610 {
611    return ( a <= MAX_PRIMARY_ADDRESS
612           ? get_secmap_low_ptr(a)
613           : get_secmap_high_ptr(a));
614 }
615 
get_secmap_for_reading_low(Addr a)616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617 {
618    return *get_secmap_low_ptr(a);
619 }
620 
get_secmap_for_reading_high(Addr a)621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622 {
623    return *get_secmap_high_ptr(a);
624 }
625 
get_secmap_for_writing_low(Addr a)626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627 {
628    SecMap** p = get_secmap_low_ptr(a);
629    if (UNLIKELY(is_distinguished_sm(*p)))
630       *p = copy_for_writing(*p);
631    return *p;
632 }
633 
get_secmap_for_writing_high(Addr a)634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635 {
636    SecMap** p = get_secmap_high_ptr(a);
637    if (UNLIKELY(is_distinguished_sm(*p)))
638       *p = copy_for_writing(*p);
639    return *p;
640 }
641 
642 /* Produce the secmap for 'a', either from the primary map or by
643    ensuring there is an entry for it in the aux primary map.  The
644    secmap may be a distinguished one as the caller will only want to
645    be able to read it.
646 */
get_secmap_for_reading(Addr a)647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
648 {
649    return ( a <= MAX_PRIMARY_ADDRESS
650           ? get_secmap_for_reading_low (a)
651           : get_secmap_for_reading_high(a) );
652 }
653 
654 /* Produce the secmap for 'a', either from the primary map or by
655    ensuring there is an entry for it in the aux primary map.  The
656    secmap may not be a distinguished one, since the caller will want
657    to be able to write it.  If it is a distinguished secondary, make a
658    writable copy of it, install it, and return the copy instead.  (COW
659    semantics).
660 */
get_secmap_for_writing(Addr a)661 static INLINE SecMap* get_secmap_for_writing ( Addr a )
662 {
663    return ( a <= MAX_PRIMARY_ADDRESS
664           ? get_secmap_for_writing_low (a)
665           : get_secmap_for_writing_high(a) );
666 }
667 
668 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
669    allocate one if one doesn't already exist.  This is used by the
670    leak checker.
671 */
maybe_get_secmap_for(Addr a)672 static SecMap* maybe_get_secmap_for ( Addr a )
673 {
674    if (a <= MAX_PRIMARY_ADDRESS) {
675       return get_secmap_for_reading_low(a);
676    } else {
677       AuxMapEnt* am = maybe_find_in_auxmap(a);
678       return am ? am->sm : NULL;
679    }
680 }
681 
682 /* --------------- Fundamental functions --------------- */
683 
684 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686 {
687    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
688    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
689    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
690 }
691 
692 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694 {
695    UInt shift;
696    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
697    shift     =  (a & 2)   << 1;        // shift by 0 or 4
698    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
699    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
700 }
701 
702 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704 {
705    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
706    vabits8 >>= shift;                  // shift the two bits to the bottom
707    return 0x3 & vabits8;               // mask out the rest
708 }
709 
710 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713    UInt shift;
714    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
715    shift = (a & 2) << 1;               // shift by 0 or 4
716    vabits8 >>= shift;                  // shift the four bits to the bottom
717    return 0xf & vabits8;               // mask out the rest
718 }
719 
720 // Note that these four are only used in slow cases.  The fast cases do
721 // clever things like combine the auxmap check (in
722 // get_secmap_{read,writ}able) with alignment checks.
723 
724 // *** WARNING! ***
725 // Any time this function is called, if it is possible that vabits2
726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727 // sec-V-bits table must also be set!
728 static INLINE
set_vabits2(Addr a,UChar vabits2)729 void set_vabits2 ( Addr a, UChar vabits2 )
730 {
731    SecMap* sm       = get_secmap_for_writing(a);
732    UWord   sm_off   = SM_OFF(a);
733    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734 }
735 
736 static INLINE
get_vabits2(Addr a)737 UChar get_vabits2 ( Addr a )
738 {
739    SecMap* sm       = get_secmap_for_reading(a);
740    UWord   sm_off   = SM_OFF(a);
741    UChar   vabits8  = sm->vabits8[sm_off];
742    return extract_vabits2_from_vabits8(a, vabits8);
743 }
744 
745 // *** WARNING! ***
746 // Any time this function is called, if it is possible that any of the
747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748 // corresponding entry(s) in the sec-V-bits table must also be set!
749 static INLINE
get_vabits8_for_aligned_word32(Addr a)750 UChar get_vabits8_for_aligned_word32 ( Addr a )
751 {
752    SecMap* sm       = get_secmap_for_reading(a);
753    UWord   sm_off   = SM_OFF(a);
754    UChar   vabits8  = sm->vabits8[sm_off];
755    return vabits8;
756 }
757 
758 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760 {
761    SecMap* sm       = get_secmap_for_writing(a);
762    UWord   sm_off   = SM_OFF(a);
763    sm->vabits8[sm_off] = vabits8;
764 }
765 
766 
767 // Forward declarations
768 static UWord get_sec_vbits8(Addr a);
769 static void  set_sec_vbits8(Addr a, UWord vbits8);
770 
771 // Returns False if there was an addressability error.
772 static INLINE
set_vbits8(Addr a,UChar vbits8)773 Bool set_vbits8 ( Addr a, UChar vbits8 )
774 {
775    Bool  ok      = True;
776    UChar vabits2 = get_vabits2(a);
777    if ( VA_BITS2_NOACCESS != vabits2 ) {
778       // Addressable.  Convert in-register format to in-memory format.
779       // Also remove any existing sec V bit entry for the byte if no
780       // longer necessary.
781       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
782       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
784                                                 set_sec_vbits8(a, vbits8);  }
785       set_vabits2(a, vabits2);
786 
787    } else {
788       // Unaddressable!  Do nothing -- when writing to unaddressable
789       // memory it acts as a black hole, and the V bits can never be seen
790       // again.  So we don't have to write them at all.
791       ok = False;
792    }
793    return ok;
794 }
795 
796 // Returns False if there was an addressability error.  In that case, we put
797 // all defined bits into vbits8.
798 static INLINE
get_vbits8(Addr a,UChar * vbits8)799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
800 {
801    Bool  ok      = True;
802    UChar vabits2 = get_vabits2(a);
803 
804    // Convert the in-memory format to in-register format.
805    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
806    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
808       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
809       ok = False;
810    } else {
811       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812       *vbits8 = get_sec_vbits8(a);
813    }
814    return ok;
815 }
816 
817 
818 /* --------------- Secondary V bit table ------------ */
819 
820 // This table holds the full V bit pattern for partially-defined bytes
821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822 // memory.
823 //
824 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
825 // then overwrite the same address with a fully defined byte, the sec-V-bit
826 // node will not necessarily be removed.  This is because checking for
827 // whether removal is necessary would slow down the fast paths.
828 //
829 // To avoid the stale nodes building up too much, we periodically (once the
830 // table reaches a certain size) garbage collect (GC) the table by
831 // traversing it and evicting any nodes not having PDB.
832 // If more than a certain proportion of nodes survived, we increase the
833 // table size so that GCs occur less often.
834 //
835 // This policy is designed to avoid bad table bloat in the worst case where
836 // a program creates huge numbers of stale PDBs -- we would get this bloat
837 // if we had no GC -- while handling well the case where a node becomes
838 // stale but shortly afterwards is rewritten with a PDB and so becomes
839 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
840 // remove all stale nodes as soon as possible, we just end up re-adding a
841 // lot of them in later again.  The "sufficiently stale" approach avoids
842 // this.  (If a program has many live PDBs, performance will just suck,
843 // there's no way around that.)
844 //
845 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
846 // holding on to stale entries for 2 GCs before discarding them can lead
847 // to massive space leaks.  So we're changing to an arrangement where
848 // lines are evicted as soon as they are observed to be stale during a
849 // GC.  This also has a side benefit of allowing the sufficiently_stale
850 // field to be removed from the SecVBitNode struct, reducing its size by
851 // 8 bytes, which is a substantial space saving considering that the
852 // struct was previously 32 or so bytes, on a 64 bit target.
853 //
854 // In order to try and mitigate the problem that the "sufficiently stale"
855 // heuristic was designed to avoid, the table size is allowed to drift
856 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
857 // means that nodes will exist in the table longer on average, and hopefully
858 // will be deleted and re-added less frequently.
859 //
860 // The previous scaling up mechanism (now called STEPUP) is retained:
861 // if residency exceeds 50%, the table is scaled up, although by a
862 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
863 // frequency of GCs when there are many PDBs at reduces the tendency of
864 // stale PDBs to reside for long periods in the table.
865 
866 static OSet* secVBitTable;
867 
868 // Stats
869 static ULong sec_vbits_new_nodes = 0;
870 static ULong sec_vbits_updates   = 0;
871 
872 // This must be a power of two;  this is checked in mc_pre_clo_init().
873 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
874 // a larger address range) they take more space but we can get multiple
875 // partially-defined bytes in one if they are close to each other, reducing
876 // the number of total nodes.  In practice sometimes they are clustered (eg.
877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878 // row), but often not.  So we choose something intermediate.
879 #define BYTES_PER_SEC_VBIT_NODE     16
880 
881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882 // more than this many nodes survive a GC.
883 #define STEPUP_SURVIVOR_PROPORTION  0.5
884 #define STEPUP_GROWTH_FACTOR        1.414213562
885 
886 // If the above heuristic doesn't apply, then we may make the table
887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888 // this many nodes survive a GC, _and_ the total table size does
889 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
890 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
891 // effectively although gradually reduces residency and increases time
892 // between GCs for programs with small numbers of PDBs.  The 80000 limit
893 // effectively limits the table size to around 2MB for programs with
894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
895 // entries, to try and reduce the costs resulting from deleting and
896 // re-adding of entries.
897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
898 #define DRIFTUP_GROWTH_FACTOR       1.015
899 #define DRIFTUP_MAX_SIZE            80000
900 
901 // We GC the table when it gets this many nodes in it, ie. it's effectively
902 // the table size.  It can change.
903 static Int  secVBitLimit = 1000;
904 
905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
907 // come out anyway.
908 static UInt GCs_done = 0;
909 
910 typedef
911    struct {
912       Addr  a;
913       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914    }
915    SecVBitNode;
916 
createSecVBitTable(void)917 static OSet* createSecVBitTable(void)
918 {
919    OSet* newSecVBitTable;
920    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921       ( offsetof(SecVBitNode, a),
922         NULL, // use fast comparisons
923         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924         VG_(free),
925         1000,
926         sizeof(SecVBitNode));
927    return newSecVBitTable;
928 }
929 
gcSecVBitTable(void)930 static void gcSecVBitTable(void)
931 {
932    OSet*        secVBitTable2;
933    SecVBitNode* n;
934    Int          i, n_nodes = 0, n_survivors = 0;
935 
936    GCs_done++;
937 
938    // Create the new table.
939    secVBitTable2 = createSecVBitTable();
940 
941    // Traverse the table, moving fresh nodes into the new table.
942    VG_(OSetGen_ResetIter)(secVBitTable);
943    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944       // Keep node if any of its bytes are non-stale.  Using
945       // get_vabits2() for the lookup is not very efficient, but I don't
946       // think it matters.
947       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949             // Found a non-stale byte, so keep =>
950             // Insert a copy of the node into the new table.
951             SecVBitNode* n2 =
952                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953             *n2 = *n;
954             VG_(OSetGen_Insert)(secVBitTable2, n2);
955             break;
956          }
957       }
958    }
959 
960    // Get the before and after sizes.
961    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
962    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963 
964    // Destroy the old table, and put the new one in its place.
965    VG_(OSetGen_Destroy)(secVBitTable);
966    secVBitTable = secVBitTable2;
967 
968    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
969       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
970                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
971    }
972 
973    // Increase table size if necessary.
974    if ((Double)n_survivors
975        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
976       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
977       if (VG_(clo_verbosity) > 1)
978          VG_(message)(Vg_DebugMsg,
979                       "memcheck GC: %d new table size (stepup)\n",
980                       secVBitLimit);
981    }
982    else
983    if (secVBitLimit < DRIFTUP_MAX_SIZE
984        && (Double)n_survivors
985           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
986       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
987       if (VG_(clo_verbosity) > 1)
988          VG_(message)(Vg_DebugMsg,
989                       "memcheck GC: %d new table size (driftup)\n",
990                       secVBitLimit);
991    }
992 }
993 
get_sec_vbits8(Addr a)994 static UWord get_sec_vbits8(Addr a)
995 {
996    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
997    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
998    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
999    UChar        vbits8;
1000    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1001    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1002    // make it to the secondary V bits table.
1003    vbits8 = n->vbits8[amod];
1004    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1005    return vbits8;
1006 }
1007 
set_sec_vbits8(Addr a,UWord vbits8)1008 static void set_sec_vbits8(Addr a, UWord vbits8)
1009 {
1010    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1011    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1012    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1013    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1014    // make it to the secondary V bits table.
1015    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1016    if (n) {
1017       n->vbits8[amod] = vbits8;     // update
1018       sec_vbits_updates++;
1019    } else {
1020       // Do a table GC if necessary.  Nb: do this before creating and
1021       // inserting the new node, to avoid erroneously GC'ing the new node.
1022       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1023          gcSecVBitTable();
1024       }
1025 
1026       // New node:  assign the specific byte, make the rest invalid (they
1027       // should never be read as-is, but be cautious).
1028       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1029       n->a            = aAligned;
1030       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1031          n->vbits8[i] = V_BITS8_UNDEFINED;
1032       }
1033       n->vbits8[amod] = vbits8;
1034 
1035       // Insert the new node.
1036       VG_(OSetGen_Insert)(secVBitTable, n);
1037       sec_vbits_new_nodes++;
1038 
1039       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1040       if (n_secVBit_nodes > max_secVBit_nodes)
1041          max_secVBit_nodes = n_secVBit_nodes;
1042    }
1043 }
1044 
1045 /* --------------- Endianness helpers --------------- */
1046 
1047 /* Returns the offset in memory of the byteno-th most significant byte
1048    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1049 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1050                                     UWord byteno ) {
1051    return bigendian ? (wordszB-1-byteno) : byteno;
1052 }
1053 
1054 
1055 /* --------------- Ignored address ranges --------------- */
1056 
1057 /* Denotes the address-error-reportability status for address ranges:
1058    IAR_NotIgnored:  the usual case -- report errors in this range
1059    IAR_CommandLine: don't report errors -- from command line setting
1060    IAR_ClientReq:   don't report errors -- from client request
1061 */
1062 typedef
1063    enum { IAR_INVALID=99,
1064           IAR_NotIgnored,
1065           IAR_CommandLine,
1066           IAR_ClientReq }
1067    IARKind;
1068 
showIARKind(IARKind iark)1069 static const HChar* showIARKind ( IARKind iark )
1070 {
1071    switch (iark) {
1072       case IAR_INVALID:     return "INVALID";
1073       case IAR_NotIgnored:  return "NotIgnored";
1074       case IAR_CommandLine: return "CommandLine";
1075       case IAR_ClientReq:   return "ClientReq";
1076       default:              return "???";
1077    }
1078 }
1079 
1080 // RangeMap<IARKind>
1081 static RangeMap* gIgnoredAddressRanges = NULL;
1082 
init_gIgnoredAddressRanges(void)1083 static void init_gIgnoredAddressRanges ( void )
1084 {
1085    if (LIKELY(gIgnoredAddressRanges != NULL))
1086       return;
1087    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1088                                              VG_(free), IAR_NotIgnored );
1089 }
1090 
MC_(in_ignored_range)1091 Bool MC_(in_ignored_range) ( Addr a )
1092 {
1093    if (LIKELY(gIgnoredAddressRanges == NULL))
1094       return False;
1095    UWord how     = IAR_INVALID;
1096    UWord key_min = ~(UWord)0;
1097    UWord key_max =  (UWord)0;
1098    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1099    tl_assert(key_min <= a && a <= key_max);
1100    switch (how) {
1101       case IAR_NotIgnored:  return False;
1102       case IAR_CommandLine: return True;
1103       case IAR_ClientReq:   return True;
1104       default: break; /* invalid */
1105    }
1106    VG_(tool_panic)("MC_(in_ignore_range)");
1107    /*NOTREACHED*/
1108 }
1109 
1110 /* Parse two Addr separated by a dash, or fail. */
1111 
parse_range(const HChar ** ppc,Addr * result1,Addr * result2)1112 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1113 {
1114    Bool ok = VG_(parse_Addr) (ppc, result1);
1115    if (!ok)
1116       return False;
1117    if (**ppc != '-')
1118       return False;
1119    (*ppc)++;
1120    ok = VG_(parse_Addr) (ppc, result2);
1121    if (!ok)
1122       return False;
1123    return True;
1124 }
1125 
1126 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1127    fail.  If they are valid, add them to the global set of ignored
1128    ranges. */
parse_ignore_ranges(const HChar * str0)1129 static Bool parse_ignore_ranges ( const HChar* str0 )
1130 {
1131    init_gIgnoredAddressRanges();
1132    const HChar*  str = str0;
1133    const HChar** ppc = &str;
1134    while (1) {
1135       Addr start = ~(Addr)0;
1136       Addr end   = (Addr)0;
1137       Bool ok    = parse_range(ppc, &start, &end);
1138       if (!ok)
1139          return False;
1140       if (start > end)
1141          return False;
1142       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1143       if (**ppc == 0)
1144          return True;
1145       if (**ppc != ',')
1146          return False;
1147       (*ppc)++;
1148    }
1149    /*NOTREACHED*/
1150    return False;
1151 }
1152 
1153 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1154 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1155 {
1156    init_gIgnoredAddressRanges();
1157    const Bool verbose = (VG_(clo_verbosity) > 1);
1158    if (len == 0) {
1159       return False;
1160    }
1161    if (addRange) {
1162       VG_(bindRangeMap)(gIgnoredAddressRanges,
1163                         start, start+len-1, IAR_ClientReq);
1164       if (verbose)
1165          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1166                    (void*)start, (void*)(start+len-1));
1167    } else {
1168       VG_(bindRangeMap)(gIgnoredAddressRanges,
1169                         start, start+len-1, IAR_NotIgnored);
1170       if (verbose)
1171          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1172                    (void*)start, (void*)(start+len-1));
1173    }
1174    if (verbose) {
1175       VG_(dmsg)("memcheck:   now have %ld ranges:\n",
1176                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1177       Word i;
1178       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1179          UWord val     = IAR_INVALID;
1180          UWord key_min = ~(UWord)0;
1181          UWord key_max = (UWord)0;
1182          VG_(indexRangeMap)( &key_min, &key_max, &val,
1183                              gIgnoredAddressRanges, i );
1184          VG_(dmsg)("memcheck:      [%ld]  %016llx-%016llx  %s\n",
1185                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
1186       }
1187    }
1188    return True;
1189 }
1190 
1191 
1192 /* --------------- Load/store slow cases. --------------- */
1193 
1194 static
1195 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1196 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1197                                 Addr a, SizeT nBits, Bool bigendian )
1198 {
1199    ULong  pessim[4];     /* only used when p-l-ok=yes */
1200    SSizeT szB            = nBits / 8;
1201    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
1202    SSizeT i, j;          /* Must be signed. */
1203    SizeT  n_addrs_bad = 0;
1204    Addr   ai;
1205    UChar  vbits8;
1206    Bool   ok;
1207 
1208    /* Code below assumes load size is a power of two and at least 64
1209       bits. */
1210    tl_assert((szB & (szB-1)) == 0 && szL > 0);
1211 
1212    /* If this triggers, you probably just need to increase the size of
1213       the pessim array. */
1214    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1215 
1216    for (j = 0; j < szL; j++) {
1217       pessim[j] = V_BITS64_DEFINED;
1218       res[j] = V_BITS64_UNDEFINED;
1219    }
1220 
1221    /* Make up a result V word, which contains the loaded data for
1222       valid addresses and Defined for invalid addresses.  Iterate over
1223       the bytes in the word, from the most significant down to the
1224       least.  The vbits to return are calculated into vbits128.  Also
1225       compute the pessimising value to be used when
1226       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1227       info can be gleaned from the pessim array) but is used as a
1228       cross-check. */
1229    for (j = szL-1; j >= 0; j--) {
1230       ULong vbits64    = V_BITS64_UNDEFINED;
1231       ULong pessim64   = V_BITS64_DEFINED;
1232       UWord long_index = byte_offset_w(szL, bigendian, j);
1233       for (i = 8-1; i >= 0; i--) {
1234          PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
1235          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1236          ok = get_vbits8(ai, &vbits8);
1237          vbits64 <<= 8;
1238          vbits64 |= vbits8;
1239          if (!ok) n_addrs_bad++;
1240          pessim64 <<= 8;
1241          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1242       }
1243       res[long_index] = vbits64;
1244       pessim[long_index] = pessim64;
1245    }
1246 
1247    /* In the common case, all the addresses involved are valid, so we
1248       just return the computed V bits and have done. */
1249    if (LIKELY(n_addrs_bad == 0))
1250       return;
1251 
1252    /* If there's no possibility of getting a partial-loads-ok
1253       exemption, report the error and quit. */
1254    if (!MC_(clo_partial_loads_ok)) {
1255       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1256       return;
1257    }
1258 
1259    /* The partial-loads-ok excemption might apply.  Find out if it
1260       does.  If so, don't report an addressing error, but do return
1261       Undefined for the bytes that are out of range, so as to avoid
1262       false negatives.  If it doesn't apply, just report an addressing
1263       error in the usual way. */
1264 
1265    /* Some code steps along byte strings in aligned chunks
1266       even when there is only a partially defined word at the end (eg,
1267       optimised strlen).  This is allowed by the memory model of
1268       modern machines, since an aligned load cannot span two pages and
1269       thus cannot "partially fault".
1270 
1271       Therefore, a load from a partially-addressible place is allowed
1272       if all of the following hold:
1273       - the command-line flag is set [by default, it isn't]
1274       - it's an aligned load
1275       - at least one of the addresses in the word *is* valid
1276 
1277       Since this suppresses the addressing error, we avoid false
1278       negatives by marking bytes undefined when they come from an
1279       invalid address.
1280    */
1281 
1282    /* "at least one of the addresses is invalid" */
1283    ok = False;
1284    for (j = 0; j < szL; j++)
1285       ok |= pessim[j] != V_BITS64_DEFINED;
1286    tl_assert(ok);
1287 
1288    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1289       /* Exemption applies.  Use the previously computed pessimising
1290          value and return the combined result, but don't flag an
1291          addressing error.  The pessimising value is Defined for valid
1292          addresses and Undefined for invalid addresses. */
1293       /* for assumption that doing bitwise or implements UifU */
1294       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1295       /* (really need "UifU" here...)
1296          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
1297       for (j = szL-1; j >= 0; j--)
1298          res[j] |= pessim[j];
1299       return;
1300    }
1301 
1302    /* Exemption doesn't apply.  Flag an addressing error in the normal
1303       way. */
1304    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1305 }
1306 
1307 
1308 static
1309 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1310 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1311 {
1312    PROF_EVENT(30, "mc_LOADVn_slow");
1313 
1314    /* ------------ BEGIN semi-fast cases ------------ */
1315    /* These deal quickly-ish with the common auxiliary primary map
1316       cases on 64-bit platforms.  Are merely a speedup hack; can be
1317       omitted without loss of correctness/functionality.  Note that in
1318       both cases the "sizeof(void*) == 8" causes these cases to be
1319       folded out by compilers on 32-bit platforms.  These are derived
1320       from LOADV64 and LOADV32.
1321    */
1322    if (LIKELY(sizeof(void*) == 8
1323                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1324       SecMap* sm       = get_secmap_for_reading(a);
1325       UWord   sm_off16 = SM_OFF_16(a);
1326       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1327       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1328          return V_BITS64_DEFINED;
1329       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1330          return V_BITS64_UNDEFINED;
1331       /* else fall into the slow case */
1332    }
1333    if (LIKELY(sizeof(void*) == 8
1334                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1335       SecMap* sm = get_secmap_for_reading(a);
1336       UWord sm_off = SM_OFF(a);
1337       UWord vabits8 = sm->vabits8[sm_off];
1338       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1339          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1340       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1341          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1342       /* else fall into slow case */
1343    }
1344    /* ------------ END semi-fast cases ------------ */
1345 
1346    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1347    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1348    SSizeT szB         = nBits / 8;
1349    SSizeT i;          /* Must be signed. */
1350    SizeT  n_addrs_bad = 0;
1351    Addr   ai;
1352    UChar  vbits8;
1353    Bool   ok;
1354 
1355    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1356 
1357    /* Make up a 64-bit result V word, which contains the loaded data
1358       for valid addresses and Defined for invalid addresses.  Iterate
1359       over the bytes in the word, from the most significant down to
1360       the least.  The vbits to return are calculated into vbits64.
1361       Also compute the pessimising value to be used when
1362       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1363       info can be gleaned from pessim64) but is used as a
1364       cross-check. */
1365    for (i = szB-1; i >= 0; i--) {
1366       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1367       ai = a + byte_offset_w(szB, bigendian, i);
1368       ok = get_vbits8(ai, &vbits8);
1369       vbits64 <<= 8;
1370       vbits64 |= vbits8;
1371       if (!ok) n_addrs_bad++;
1372       pessim64 <<= 8;
1373       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1374    }
1375 
1376    /* In the common case, all the addresses involved are valid, so we
1377       just return the computed V bits and have done. */
1378    if (LIKELY(n_addrs_bad == 0))
1379       return vbits64;
1380 
1381    /* If there's no possibility of getting a partial-loads-ok
1382       exemption, report the error and quit. */
1383    if (!MC_(clo_partial_loads_ok)) {
1384       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1385       return vbits64;
1386    }
1387 
1388    /* The partial-loads-ok excemption might apply.  Find out if it
1389       does.  If so, don't report an addressing error, but do return
1390       Undefined for the bytes that are out of range, so as to avoid
1391       false negatives.  If it doesn't apply, just report an addressing
1392       error in the usual way. */
1393 
1394    /* Some code steps along byte strings in aligned word-sized chunks
1395       even when there is only a partially defined word at the end (eg,
1396       optimised strlen).  This is allowed by the memory model of
1397       modern machines, since an aligned load cannot span two pages and
1398       thus cannot "partially fault".  Despite such behaviour being
1399       declared undefined by ANSI C/C++.
1400 
1401       Therefore, a load from a partially-addressible place is allowed
1402       if all of the following hold:
1403       - the command-line flag is set [by default, it isn't]
1404       - it's a word-sized, word-aligned load
1405       - at least one of the addresses in the word *is* valid
1406 
1407       Since this suppresses the addressing error, we avoid false
1408       negatives by marking bytes undefined when they come from an
1409       invalid address.
1410    */
1411 
1412    /* "at least one of the addresses is invalid" */
1413    tl_assert(pessim64 != V_BITS64_DEFINED);
1414 
1415    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1416        && n_addrs_bad < VG_WORDSIZE) {
1417       /* Exemption applies.  Use the previously computed pessimising
1418          value for vbits64 and return the combined result, but don't
1419          flag an addressing error.  The pessimising value is Defined
1420          for valid addresses and Undefined for invalid addresses. */
1421       /* for assumption that doing bitwise or implements UifU */
1422       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1423       /* (really need "UifU" here...)
1424          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1425       vbits64 |= pessim64;
1426       return vbits64;
1427    }
1428 
1429    /* Also, in appears that gcc generates string-stepping code in
1430       32-bit chunks on 64 bit platforms.  So, also grant an exception
1431       for this case.  Note that the first clause of the conditional
1432       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1433       will get folded out in 32 bit builds. */
1434    if (VG_WORDSIZE == 8
1435        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1436       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1437       /* (really need "UifU" here...)
1438          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1439       vbits64 |= pessim64;
1440       /* Mark the upper 32 bits as undefined, just to be on the safe
1441          side. */
1442       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1443       return vbits64;
1444    }
1445 
1446    /* Exemption doesn't apply.  Flag an addressing error in the normal
1447       way. */
1448    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1449 
1450    return vbits64;
1451 }
1452 
1453 
1454 static
1455 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1456 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1457 {
1458    SizeT szB = nBits / 8;
1459    SizeT i, n_addrs_bad = 0;
1460    UChar vbits8;
1461    Addr  ai;
1462    Bool  ok;
1463 
1464    PROF_EVENT(35, "mc_STOREVn_slow");
1465 
1466    /* ------------ BEGIN semi-fast cases ------------ */
1467    /* These deal quickly-ish with the common auxiliary primary map
1468       cases on 64-bit platforms.  Are merely a speedup hack; can be
1469       omitted without loss of correctness/functionality.  Note that in
1470       both cases the "sizeof(void*) == 8" causes these cases to be
1471       folded out by compilers on 32-bit platforms.  The logic below
1472       is somewhat similar to some cases extensively commented in
1473       MC_(helperc_STOREV8).
1474    */
1475    if (LIKELY(sizeof(void*) == 8
1476                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1477       SecMap* sm       = get_secmap_for_reading(a);
1478       UWord   sm_off16 = SM_OFF_16(a);
1479       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1480       if (LIKELY( !is_distinguished_sm(sm) &&
1481                           (VA_BITS16_DEFINED   == vabits16 ||
1482                            VA_BITS16_UNDEFINED == vabits16) )) {
1483          /* Handle common case quickly: a is suitably aligned, */
1484          /* is mapped, and is addressible. */
1485          // Convert full V-bits in register to compact 2-bit form.
1486          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1487             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1488             return;
1489          } else if (V_BITS64_UNDEFINED == vbytes) {
1490             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1491             return;
1492          }
1493          /* else fall into the slow case */
1494       }
1495       /* else fall into the slow case */
1496    }
1497    if (LIKELY(sizeof(void*) == 8
1498                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1499       SecMap* sm      = get_secmap_for_reading(a);
1500       UWord   sm_off  = SM_OFF(a);
1501       UWord   vabits8 = sm->vabits8[sm_off];
1502       if (LIKELY( !is_distinguished_sm(sm) &&
1503                           (VA_BITS8_DEFINED   == vabits8 ||
1504                            VA_BITS8_UNDEFINED == vabits8) )) {
1505          /* Handle common case quickly: a is suitably aligned, */
1506          /* is mapped, and is addressible. */
1507          // Convert full V-bits in register to compact 2-bit form.
1508          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1509             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1510             return;
1511          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1512             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1513             return;
1514          }
1515          /* else fall into the slow case */
1516       }
1517       /* else fall into the slow case */
1518    }
1519    /* ------------ END semi-fast cases ------------ */
1520 
1521    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1522 
1523    /* Dump vbytes in memory, iterating from least to most significant
1524       byte.  At the same time establish addressibility of the location. */
1525    for (i = 0; i < szB; i++) {
1526       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1527       ai     = a + byte_offset_w(szB, bigendian, i);
1528       vbits8 = vbytes & 0xff;
1529       ok     = set_vbits8(ai, vbits8);
1530       if (!ok) n_addrs_bad++;
1531       vbytes >>= 8;
1532    }
1533 
1534    /* If an address error has happened, report it. */
1535    if (n_addrs_bad > 0)
1536       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1537 }
1538 
1539 
1540 /*------------------------------------------------------------*/
1541 /*--- Setting permissions over address ranges.             ---*/
1542 /*------------------------------------------------------------*/
1543 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1544 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1545                                       UWord dsm_num )
1546 {
1547    UWord    sm_off, sm_off16;
1548    UWord    vabits2 = vabits16 & 0x3;
1549    SizeT    lenA, lenB, len_to_next_secmap;
1550    Addr     aNext;
1551    SecMap*  sm;
1552    SecMap** sm_ptr;
1553    SecMap*  example_dsm;
1554 
1555    PROF_EVENT(150, "set_address_range_perms");
1556 
1557    /* Check the V+A bits make sense. */
1558    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1559              VA_BITS16_UNDEFINED == vabits16 ||
1560              VA_BITS16_DEFINED   == vabits16);
1561 
1562    // This code should never write PDBs;  ensure this.  (See comment above
1563    // set_vabits2().)
1564    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1565 
1566    if (lenT == 0)
1567       return;
1568 
1569    if (lenT > 256 * 1024 * 1024) {
1570       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1571          const HChar* s = "unknown???";
1572          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1573          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1574          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1575          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1576                                   "large range [0x%lx, 0x%lx) (%s)\n",
1577                                   a, a + lenT, s);
1578       }
1579    }
1580 
1581 #ifndef PERF_FAST_SARP
1582    /*------------------ debug-only case ------------------ */
1583    {
1584       // Endianness doesn't matter here because all bytes are being set to
1585       // the same value.
1586       // Nb: We don't have to worry about updating the sec-V-bits table
1587       // after these set_vabits2() calls because this code never writes
1588       // VA_BITS2_PARTDEFINED values.
1589       SizeT i;
1590       for (i = 0; i < lenT; i++) {
1591          set_vabits2(a + i, vabits2);
1592       }
1593       return;
1594    }
1595 #endif
1596 
1597    /*------------------ standard handling ------------------ */
1598 
1599    /* Get the distinguished secondary that we might want
1600       to use (part of the space-compression scheme). */
1601    example_dsm = &sm_distinguished[dsm_num];
1602 
1603    // We have to handle ranges covering various combinations of partial and
1604    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1605    // Cases marked with a '*' are common.
1606    //
1607    //   TYPE                                             PARTS USED
1608    //   ----                                             ----------
1609    // * one partial sec-map                  (p)         1
1610    // - one whole sec-map                    (P)         2
1611    //
1612    // * two partial sec-maps                 (pp)        1,3
1613    // - one partial, one whole sec-map       (pP)        1,2
1614    // - one whole, one partial sec-map       (Pp)        2,3
1615    // - two whole sec-maps                   (PP)        2,2
1616    //
1617    // * one partial, one whole, one partial  (pPp)       1,2,3
1618    // - one partial, two whole               (pPP)       1,2,2
1619    // - two whole, one partial               (PPp)       2,2,3
1620    // - three whole                          (PPP)       2,2,2
1621    //
1622    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1623    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1624    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1625    // - N whole                              (PP...PP)   2,2...2,3
1626 
1627    // Break up total length (lenT) into two parts:  length in the first
1628    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1629    aNext = start_of_this_sm(a) + SM_SIZE;
1630    len_to_next_secmap = aNext - a;
1631    if ( lenT <= len_to_next_secmap ) {
1632       // Range entirely within one sec-map.  Covers almost all cases.
1633       PROF_EVENT(151, "set_address_range_perms-single-secmap");
1634       lenA = lenT;
1635       lenB = 0;
1636    } else if (is_start_of_sm(a)) {
1637       // Range spans at least one whole sec-map, and starts at the beginning
1638       // of a sec-map; skip to Part 2.
1639       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1640       lenA = 0;
1641       lenB = lenT;
1642       goto part2;
1643    } else {
1644       // Range spans two or more sec-maps, first one is partial.
1645       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1646       lenA = len_to_next_secmap;
1647       lenB = lenT - lenA;
1648    }
1649 
1650    //------------------------------------------------------------------------
1651    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1652    // entirely within a sec_map and this part alone will suffice.  Also,
1653    // doing it this way lets us avoid repeatedly testing for the crossing of
1654    // a sec-map boundary within these loops.
1655    //------------------------------------------------------------------------
1656 
1657    // If it's distinguished, make it undistinguished if necessary.
1658    sm_ptr = get_secmap_ptr(a);
1659    if (is_distinguished_sm(*sm_ptr)) {
1660       if (*sm_ptr == example_dsm) {
1661          // Sec-map already has the V+A bits that we want, so skip.
1662          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1663          a    = aNext;
1664          lenA = 0;
1665       } else {
1666          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1667          *sm_ptr = copy_for_writing(*sm_ptr);
1668       }
1669    }
1670    sm = *sm_ptr;
1671 
1672    // 1 byte steps
1673    while (True) {
1674       if (VG_IS_8_ALIGNED(a)) break;
1675       if (lenA < 1)           break;
1676       PROF_EVENT(156, "set_address_range_perms-loop1a");
1677       sm_off = SM_OFF(a);
1678       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1679       a    += 1;
1680       lenA -= 1;
1681    }
1682    // 8-aligned, 8 byte steps
1683    while (True) {
1684       if (lenA < 8) break;
1685       PROF_EVENT(157, "set_address_range_perms-loop8a");
1686       sm_off16 = SM_OFF_16(a);
1687       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1688       a    += 8;
1689       lenA -= 8;
1690    }
1691    // 1 byte steps
1692    while (True) {
1693       if (lenA < 1) break;
1694       PROF_EVENT(158, "set_address_range_perms-loop1b");
1695       sm_off = SM_OFF(a);
1696       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1697       a    += 1;
1698       lenA -= 1;
1699    }
1700 
1701    // We've finished the first sec-map.  Is that it?
1702    if (lenB == 0)
1703       return;
1704 
1705    //------------------------------------------------------------------------
1706    // Part 2: Fast-set entire sec-maps at a time.
1707    //------------------------------------------------------------------------
1708   part2:
1709    // 64KB-aligned, 64KB steps.
1710    // Nb: we can reach here with lenB < SM_SIZE
1711    tl_assert(0 == lenA);
1712    while (True) {
1713       if (lenB < SM_SIZE) break;
1714       tl_assert(is_start_of_sm(a));
1715       PROF_EVENT(159, "set_address_range_perms-loop64K");
1716       sm_ptr = get_secmap_ptr(a);
1717       if (!is_distinguished_sm(*sm_ptr)) {
1718          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1719          // Free the non-distinguished sec-map that we're replacing.  This
1720          // case happens moderately often, enough to be worthwhile.
1721          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1722          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1723       }
1724       update_SM_counts(*sm_ptr, example_dsm);
1725       // Make the sec-map entry point to the example DSM
1726       *sm_ptr = example_dsm;
1727       lenB -= SM_SIZE;
1728       a    += SM_SIZE;
1729    }
1730 
1731    // We've finished the whole sec-maps.  Is that it?
1732    if (lenB == 0)
1733       return;
1734 
1735    //------------------------------------------------------------------------
1736    // Part 3: Finish off the final partial sec-map, if necessary.
1737    //------------------------------------------------------------------------
1738 
1739    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1740 
1741    // If it's distinguished, make it undistinguished if necessary.
1742    sm_ptr = get_secmap_ptr(a);
1743    if (is_distinguished_sm(*sm_ptr)) {
1744       if (*sm_ptr == example_dsm) {
1745          // Sec-map already has the V+A bits that we want, so stop.
1746          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1747          return;
1748       } else {
1749          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1750          *sm_ptr = copy_for_writing(*sm_ptr);
1751       }
1752    }
1753    sm = *sm_ptr;
1754 
1755    // 8-aligned, 8 byte steps
1756    while (True) {
1757       if (lenB < 8) break;
1758       PROF_EVENT(163, "set_address_range_perms-loop8b");
1759       sm_off16 = SM_OFF_16(a);
1760       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1761       a    += 8;
1762       lenB -= 8;
1763    }
1764    // 1 byte steps
1765    while (True) {
1766       if (lenB < 1) return;
1767       PROF_EVENT(164, "set_address_range_perms-loop1c");
1768       sm_off = SM_OFF(a);
1769       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1770       a    += 1;
1771       lenB -= 1;
1772    }
1773 }
1774 
1775 
1776 /* --- Set permissions for arbitrary address ranges --- */
1777 
MC_(make_mem_noaccess)1778 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1779 {
1780    PROF_EVENT(40, "MC_(make_mem_noaccess)");
1781    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1782    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1783    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1784       ocache_sarp_Clear_Origins ( a, len );
1785 }
1786 
make_mem_undefined(Addr a,SizeT len)1787 static void make_mem_undefined ( Addr a, SizeT len )
1788 {
1789    PROF_EVENT(41, "make_mem_undefined");
1790    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1791    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1792 }
1793 
MC_(make_mem_undefined_w_otag)1794 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1795 {
1796    PROF_EVENT(43, "MC_(make_mem_undefined)");
1797    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1798    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1799    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1800       ocache_sarp_Set_Origins ( a, len, otag );
1801 }
1802 
1803 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1804 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1805                                           ThreadId tid, UInt okind )
1806 {
1807    UInt        ecu;
1808    ExeContext* here;
1809    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1810       if it is invalid.  So no need to do it here. */
1811    tl_assert(okind <= 3);
1812    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1813    tl_assert(here);
1814    ecu = VG_(get_ECU_from_ExeContext)(here);
1815    tl_assert(VG_(is_plausible_ECU)(ecu));
1816    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1817 }
1818 
1819 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1820 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
1821 {
1822    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1823 }
1824 
1825 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1826 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
1827 {
1828    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1829 }
1830 
MC_(make_mem_defined)1831 void MC_(make_mem_defined) ( Addr a, SizeT len )
1832 {
1833    PROF_EVENT(42, "MC_(make_mem_defined)");
1834    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1835    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1836    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1837       ocache_sarp_Clear_Origins ( a, len );
1838 }
1839 
1840 /* For each byte in [a,a+len), if the byte is addressable, make it be
1841    defined, but if it isn't addressible, leave it alone.  In other
1842    words a version of MC_(make_mem_defined) that doesn't mess with
1843    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1844 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1845 {
1846    SizeT i;
1847    UChar vabits2;
1848    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1849    for (i = 0; i < len; i++) {
1850       vabits2 = get_vabits2( a+i );
1851       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1852          set_vabits2(a+i, VA_BITS2_DEFINED);
1853          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1854             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1855          }
1856       }
1857    }
1858 }
1859 
1860 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1861 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1862 {
1863    SizeT i;
1864    UChar vabits2;
1865    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1866    for (i = 0; i < len; i++) {
1867       vabits2 = get_vabits2( a+i );
1868       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1869          set_vabits2(a+i, VA_BITS2_DEFINED);
1870          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1871             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1872          }
1873       }
1874    }
1875 }
1876 
1877 /* --- Block-copy permissions (needed for implementing realloc() and
1878        sys_mremap). --- */
1879 
MC_(copy_address_range_state)1880 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1881 {
1882    SizeT i, j;
1883    UChar vabits2, vabits8;
1884    Bool  aligned, nooverlap;
1885 
1886    DEBUG("MC_(copy_address_range_state)\n");
1887    PROF_EVENT(50, "MC_(copy_address_range_state)");
1888 
1889    if (len == 0 || src == dst)
1890       return;
1891 
1892    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1893    nooverlap = src+len <= dst || dst+len <= src;
1894 
1895    if (nooverlap && aligned) {
1896 
1897       /* Vectorised fast case, when no overlap and suitably aligned */
1898       /* vector loop */
1899       i = 0;
1900       while (len >= 4) {
1901          vabits8 = get_vabits8_for_aligned_word32( src+i );
1902          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1903          if (LIKELY(VA_BITS8_DEFINED == vabits8
1904                             || VA_BITS8_UNDEFINED == vabits8
1905                             || VA_BITS8_NOACCESS == vabits8)) {
1906             /* do nothing */
1907          } else {
1908             /* have to copy secondary map info */
1909             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1910                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1911             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1912                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1913             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1914                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1915             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1916                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1917          }
1918          i += 4;
1919          len -= 4;
1920       }
1921       /* fixup loop */
1922       while (len >= 1) {
1923          vabits2 = get_vabits2( src+i );
1924          set_vabits2( dst+i, vabits2 );
1925          if (VA_BITS2_PARTDEFINED == vabits2) {
1926             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1927          }
1928          i++;
1929          len--;
1930       }
1931 
1932    } else {
1933 
1934       /* We have to do things the slow way */
1935       if (src < dst) {
1936          for (i = 0, j = len-1; i < len; i++, j--) {
1937             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1938             vabits2 = get_vabits2( src+j );
1939             set_vabits2( dst+j, vabits2 );
1940             if (VA_BITS2_PARTDEFINED == vabits2) {
1941                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1942             }
1943          }
1944       }
1945 
1946       if (src > dst) {
1947          for (i = 0; i < len; i++) {
1948             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1949             vabits2 = get_vabits2( src+i );
1950             set_vabits2( dst+i, vabits2 );
1951             if (VA_BITS2_PARTDEFINED == vabits2) {
1952                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1953             }
1954          }
1955       }
1956    }
1957 
1958 }
1959 
1960 
1961 /*------------------------------------------------------------*/
1962 /*--- Origin tracking stuff - cache basics                 ---*/
1963 /*------------------------------------------------------------*/
1964 
1965 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1966    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1967 
1968    Note that this implementation draws inspiration from the "origin
1969    tracking by value piggybacking" scheme described in "Tracking Bad
1970    Apples: Reporting the Origin of Null and Undefined Value Errors"
1971    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1972    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1973    implemented completely differently.
1974 
1975    Origin tags and ECUs -- about the shadow values
1976    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977 
1978    This implementation tracks the defining point of all uninitialised
1979    values using so called "origin tags", which are 32-bit integers,
1980    rather than using the values themselves to encode the origins.  The
1981    latter, so-called value piggybacking", is what the OOPSLA07 paper
1982    describes.
1983 
1984    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1985    ints (UInts), regardless of the machine's word size.  Each tag
1986    comprises an upper 30-bit ECU field and a lower 2-bit
1987    'kind' field.  The ECU field is a number given out by m_execontext
1988    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1989    directly as an origin tag (otag), but in fact we want to put
1990    additional information 'kind' field to indicate roughly where the
1991    tag came from.  This helps print more understandable error messages
1992    for the user -- it has no other purpose.  In summary:
1993 
1994    * Both ECUs and origin tags are represented as 32-bit words
1995 
1996    * m_execontext and the core-tool interface deal purely in ECUs.
1997      They have no knowledge of origin tags - that is a purely
1998      Memcheck-internal matter.
1999 
2000    * all valid ECUs have the lowest 2 bits zero and at least
2001      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2002 
2003    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2004      constants defined in mc_include.h.
2005 
2006    * to convert an otag back to an ECU, AND it with ~3
2007 
2008    One important fact is that no valid otag is zero.  A zero otag is
2009    used by the implementation to indicate "no origin", which could
2010    mean that either the value is defined, or it is undefined but the
2011    implementation somehow managed to lose the origin.
2012 
2013    The ECU used for memory created by malloc etc is derived from the
2014    stack trace at the time the malloc etc happens.  This means the
2015    mechanism can show the exact allocation point for heap-created
2016    uninitialised values.
2017 
2018    In contrast, it is simply too expensive to create a complete
2019    backtrace for each stack allocation.  Therefore we merely use a
2020    depth-1 backtrace for stack allocations, which can be done once at
2021    translation time, rather than N times at run time.  The result of
2022    this is that, for stack created uninitialised values, Memcheck can
2023    only show the allocating function, and not what called it.
2024    Furthermore, compilers tend to move the stack pointer just once at
2025    the start of the function, to allocate all locals, and so in fact
2026    the stack origin almost always simply points to the opening brace
2027    of the function.  Net result is, for stack origins, the mechanism
2028    can tell you in which function the undefined value was created, but
2029    that's all.  Users will need to carefully check all locals in the
2030    specified function.
2031 
2032    Shadowing registers and memory
2033    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2034 
2035    Memory is shadowed using a two level cache structure (ocacheL1 and
2036    ocacheL2).  Memory references are first directed to ocacheL1.  This
2037    is a traditional 2-way set associative cache with 32-byte lines and
2038    approximate LRU replacement within each set.
2039 
2040    A naive implementation would require storing one 32 bit otag for
2041    each byte of memory covered, a 4:1 space overhead.  Instead, there
2042    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2043    that shows which of the 4 bytes have that shadow value and which
2044    have a shadow value of zero (indicating no origin).  Hence a lot of
2045    space is saved, but the cost is that only one different origin per
2046    4 bytes of address space can be represented.  This is a source of
2047    imprecision, but how much of a problem it really is remains to be
2048    seen.
2049 
2050    A cache line that contains all zeroes ("no origins") contains no
2051    useful information, and can be ejected from the L1 cache "for
2052    free", in the sense that a read miss on the L1 causes a line of
2053    zeroes to be installed.  However, ejecting a line containing
2054    nonzeroes risks losing origin information permanently.  In order to
2055    prevent such lossage, ejected nonzero lines are placed in a
2056    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2057    lines.  This can grow arbitrarily large, and so should ensure that
2058    Memcheck runs out of memory in preference to losing useful origin
2059    info due to cache size limitations.
2060 
2061    Shadowing registers is a bit tricky, because the shadow values are
2062    32 bits, regardless of the size of the register.  That gives a
2063    problem for registers smaller than 32 bits.  The solution is to
2064    find spaces in the guest state that are unused, and use those to
2065    shadow guest state fragments smaller than 32 bits.  For example, on
2066    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
2067    shadow are allocated for the register's otag, then there are still
2068    12 bytes left over which could be used to shadow 3 other values.
2069 
2070    This implies there is some non-obvious mapping from guest state
2071    (start,length) pairs to the relevant shadow offset (for the origin
2072    tags).  And it is unfortunately guest-architecture specific.  The
2073    mapping is contained in mc_machine.c, which is quite lengthy but
2074    straightforward.
2075 
2076    Instrumenting the IR
2077    ~~~~~~~~~~~~~~~~~~~~
2078 
2079    Instrumentation is largely straightforward, and done by the
2080    functions schemeE and schemeS in mc_translate.c.  These generate
2081    code for handling the origin tags of expressions (E) and statements
2082    (S) respectively.  The rather strange names are a reference to the
2083    "compilation schemes" shown in Simon Peyton Jones' book "The
2084    Implementation of Functional Programming Languages" (Prentice Hall,
2085    1987, see
2086    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2087 
2088    schemeS merely arranges to move shadow values around the guest
2089    state to track the incoming IR.  schemeE is largely trivial too.
2090    The only significant point is how to compute the otag corresponding
2091    to binary (or ternary, quaternary, etc) operator applications.  The
2092    rule is simple: just take whichever value is larger (32-bit
2093    unsigned max).  Constants get the special value zero.  Hence this
2094    rule always propagates a nonzero (known) otag in preference to a
2095    zero (unknown, or more likely, value-is-defined) tag, as we want.
2096    If two different undefined values are inputs to a binary operator
2097    application, then which is propagated is arbitrary, but that
2098    doesn't matter, since the program is erroneous in using either of
2099    the values, and so there's no point in attempting to propagate
2100    both.
2101 
2102    Since constants are abstracted to (otag) zero, much of the
2103    instrumentation code can be folded out without difficulty by the
2104    generic post-instrumentation IR cleanup pass, using these rules:
2105    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2106    constants is evaluated at JIT time.  And the resulting dead code
2107    removal.  In practice this causes surprisingly few Max32Us to
2108    survive through to backend code generation.
2109 
2110    Integration with the V-bits machinery
2111    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2112 
2113    This is again largely straightforward.  Mostly the otag and V bits
2114    stuff are independent.  The only point of interaction is when the V
2115    bits instrumenter creates a call to a helper function to report an
2116    uninitialised value error -- in that case it must first use schemeE
2117    to get hold of the origin tag expression for the value, and pass
2118    that to the helper too.
2119 
2120    There is the usual stuff to do with setting address range
2121    permissions.  When memory is painted undefined, we must also know
2122    the origin tag to paint with, which involves some tedious plumbing,
2123    particularly to do with the fast case stack handlers.  When memory
2124    is painted defined or noaccess then the origin tags must be forced
2125    to zero.
2126 
2127    One of the goals of the implementation was to ensure that the
2128    non-origin tracking mode isn't slowed down at all.  To do this,
2129    various functions to do with memory permissions setting (again,
2130    mostly pertaining to the stack) are duplicated for the with- and
2131    without-otag case.
2132 
2133    Dealing with stack redzones, and the NIA cache
2134    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2135 
2136    This is one of the few non-obvious parts of the implementation.
2137 
2138    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2139    reserved area below the stack pointer, that can be used as scratch
2140    space by compiler generated code for functions.  In the Memcheck
2141    sources this is referred to as the "stack redzone".  The important
2142    thing here is that such redzones are considered volatile across
2143    function calls and returns.  So Memcheck takes care to mark them as
2144    undefined for each call and return, on the afflicted platforms.
2145    Past experience shows this is essential in order to get reliable
2146    messages about uninitialised values that come from the stack.
2147 
2148    So the question is, when we paint a redzone undefined, what origin
2149    tag should we use for it?  Consider a function f() calling g().  If
2150    we paint the redzone using an otag derived from the ExeContext of
2151    the CALL/BL instruction in f, then any errors in g causing it to
2152    use uninitialised values that happen to lie in the redzone, will be
2153    reported as having their origin in f.  Which is highly confusing.
2154 
2155    The same applies for returns: if, on a return, we paint the redzone
2156    using a origin tag derived from the ExeContext of the RET/BLR
2157    instruction in g, then any later errors in f causing it to use
2158    uninitialised values in the redzone, will be reported as having
2159    their origin in g.  Which is just as confusing.
2160 
2161    To do it right, in both cases we need to use an origin tag which
2162    pertains to the instruction which dynamically follows the CALL/BL
2163    or RET/BLR.  In short, one derived from the NIA - the "next
2164    instruction address".
2165 
2166    To make this work, Memcheck's redzone-painting helper,
2167    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2168    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
2169    ExeContext's ECU as the basis for the otag used to paint the
2170    redzone.  The expensive part of this is converting an NIA into an
2171    ECU, since this happens once for every call and every return.  So
2172    we use a simple 511-line, 2-way set associative cache
2173    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2174    the cost out.
2175 
2176    Further background comments
2177    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2178 
2179    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
2180    > it really just the address of the relevant ExeContext?
2181 
2182    Well, it's not the address, but a value which has a 1-1 mapping
2183    with ExeContexts, and is guaranteed not to be zero, since zero
2184    denotes (to memcheck) "unknown origin or defined value".  So these
2185    UInts are just numbers starting at 4 and incrementing by 4; each
2186    ExeContext is given a number when it is created.  (*** NOTE this
2187    confuses otags and ECUs; see comments above ***).
2188 
2189    Making these otags 32-bit regardless of the machine's word size
2190    makes the 64-bit implementation easier (next para).  And it doesn't
2191    really limit us in any way, since for the tags to overflow would
2192    require that the program somehow caused 2^30-1 different
2193    ExeContexts to be created, in which case it is probably in deep
2194    trouble.  Not to mention V will have soaked up many tens of
2195    gigabytes of memory merely to store them all.
2196 
2197    So having 64-bit origins doesn't really buy you anything, and has
2198    the following downsides:
2199 
2200    Suppose that instead, an otag is a UWord.  This would mean that, on
2201    a 64-bit target,
2202 
2203    1. It becomes hard to shadow any element of guest state which is
2204       smaller than 8 bytes.  To do so means you'd need to find some
2205       8-byte-sized hole in the guest state which you don't want to
2206       shadow, and use that instead to hold the otag.  On ppc64, the
2207       condition code register(s) are split into 20 UChar sized pieces,
2208       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2209       and so that would entail finding 160 bytes somewhere else in the
2210       guest state.
2211 
2212       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2213       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2214       same) and so I had to look for 4 untracked otag-sized areas in
2215       the guest state to make that possible.
2216 
2217       The same problem exists of course when origin tags are only 32
2218       bits, but it's less extreme.
2219 
2220    2. (More compelling) it doubles the size of the origin shadow
2221       memory.  Given that the shadow memory is organised as a fixed
2222       size cache, and that accuracy of tracking is limited by origins
2223       falling out the cache due to space conflicts, this isn't good.
2224 
2225    > Another question: is the origin tracking perfect, or are there
2226    > cases where it fails to determine an origin?
2227 
2228    It is imperfect for at least for the following reasons, and
2229    probably more:
2230 
2231    * Insufficient capacity in the origin cache.  When a line is
2232      evicted from the cache it is gone forever, and so subsequent
2233      queries for the line produce zero, indicating no origin
2234      information.  Interestingly, a line containing all zeroes can be
2235      evicted "free" from the cache, since it contains no useful
2236      information, so there is scope perhaps for some cleverer cache
2237      management schemes.  (*** NOTE, with the introduction of the
2238      second level origin tag cache, ocacheL2, this is no longer a
2239      problem. ***)
2240 
2241    * The origin cache only stores one otag per 32-bits of address
2242      space, plus 4 bits indicating which of the 4 bytes has that tag
2243      and which are considered defined.  The result is that if two
2244      undefined bytes in the same word are stored in memory, the first
2245      stored byte's origin will be lost and replaced by the origin for
2246      the second byte.
2247 
2248    * Nonzero origin tags for defined values.  Consider a binary
2249      operator application op(x,y).  Suppose y is undefined (and so has
2250      a valid nonzero origin tag), and x is defined, but erroneously
2251      has a nonzero origin tag (defined values should have tag zero).
2252      If the erroneous tag has a numeric value greater than y's tag,
2253      then the rule for propagating origin tags though binary
2254      operations, which is simply to take the unsigned max of the two
2255      tags, will erroneously propagate x's tag rather than y's.
2256 
2257    * Some obscure uses of x86/amd64 byte registers can cause lossage
2258      or confusion of origins.  %AH .. %DH are treated as different
2259      from, and unrelated to, their parent registers, %EAX .. %EDX.
2260      So some wierd sequences like
2261 
2262         movb undefined-value, %AH
2263         movb defined-value, %AL
2264         .. use %AX or %EAX ..
2265 
2266      will cause the origin attributed to %AH to be ignored, since %AL,
2267      %AX, %EAX are treated as the same register, and %AH as a
2268      completely separate one.
2269 
2270    But having said all that, it actually seems to work fairly well in
2271    practice.
2272 */
2273 
2274 static UWord stats_ocacheL1_find           = 0;
2275 static UWord stats_ocacheL1_found_at_1     = 0;
2276 static UWord stats_ocacheL1_found_at_N     = 0;
2277 static UWord stats_ocacheL1_misses         = 0;
2278 static UWord stats_ocacheL1_lossage        = 0;
2279 static UWord stats_ocacheL1_movefwds       = 0;
2280 
2281 static UWord stats__ocacheL2_refs          = 0;
2282 static UWord stats__ocacheL2_misses        = 0;
2283 static UWord stats__ocacheL2_n_nodes_max   = 0;
2284 
2285 /* Cache of 32-bit values, one every 32 bits of address space */
2286 
2287 #define OC_BITS_PER_LINE 5
2288 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2289 
oc_line_offset(Addr a)2290 static INLINE UWord oc_line_offset ( Addr a ) {
2291    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2292 }
is_valid_oc_tag(Addr tag)2293 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2294    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2295 }
2296 
2297 #define OC_LINES_PER_SET 2
2298 
2299 #define OC_N_SET_BITS    20
2300 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2301 
2302 /* These settings give:
2303    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2304    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2305 */
2306 
2307 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2308 
2309 
2310 typedef
2311    struct {
2312       Addr  tag;
2313       UInt  w32[OC_W32S_PER_LINE];
2314       UChar descr[OC_W32S_PER_LINE];
2315    }
2316    OCacheLine;
2317 
2318 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2319    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2320    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2321 static UChar classify_OCacheLine ( OCacheLine* line )
2322 {
2323    UWord i;
2324    if (line->tag == 1/*invalid*/)
2325       return 'e'; /* EMPTY */
2326    tl_assert(is_valid_oc_tag(line->tag));
2327    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2328       tl_assert(0 == ((~0xF) & line->descr[i]));
2329       if (line->w32[i] > 0 && line->descr[i] > 0)
2330          return 'n'; /* NONZERO - contains useful info */
2331    }
2332    return 'z'; /* ZERO - no useful info */
2333 }
2334 
2335 typedef
2336    struct {
2337       OCacheLine line[OC_LINES_PER_SET];
2338    }
2339    OCacheSet;
2340 
2341 typedef
2342    struct {
2343       OCacheSet set[OC_N_SETS];
2344    }
2345    OCache;
2346 
2347 static OCache* ocacheL1 = NULL;
2348 static UWord   ocacheL1_event_ctr = 0;
2349 
2350 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2351 static void init_OCache ( void )
2352 {
2353    UWord line, set;
2354    tl_assert(MC_(clo_mc_level) >= 3);
2355    tl_assert(ocacheL1 == NULL);
2356    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2357    if (ocacheL1 == NULL) {
2358       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2359                                    sizeof(OCache) );
2360    }
2361    tl_assert(ocacheL1 != NULL);
2362    for (set = 0; set < OC_N_SETS; set++) {
2363       for (line = 0; line < OC_LINES_PER_SET; line++) {
2364          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2365       }
2366    }
2367    init_ocacheL2();
2368 }
2369 
moveLineForwards(OCacheSet * set,UWord lineno)2370 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2371 {
2372    OCacheLine tmp;
2373    stats_ocacheL1_movefwds++;
2374    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2375    tmp = set->line[lineno-1];
2376    set->line[lineno-1] = set->line[lineno];
2377    set->line[lineno] = tmp;
2378 }
2379 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2380 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2381    UWord i;
2382    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2383       line->w32[i] = 0; /* NO ORIGIN */
2384       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2385    }
2386    line->tag = tag;
2387 }
2388 
2389 //////////////////////////////////////////////////////////////
2390 //// OCache backing store
2391 
2392 static OSet* ocacheL2 = NULL;
2393 
ocacheL2_malloc(const HChar * cc,SizeT szB)2394 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2395    return VG_(malloc)(cc, szB);
2396 }
ocacheL2_free(void * v)2397 static void ocacheL2_free ( void* v ) {
2398    VG_(free)( v );
2399 }
2400 
2401 /* Stats: # nodes currently in tree */
2402 static UWord stats__ocacheL2_n_nodes = 0;
2403 
init_ocacheL2(void)2404 static void init_ocacheL2 ( void )
2405 {
2406    tl_assert(!ocacheL2);
2407    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2408    tl_assert(0 == offsetof(OCacheLine,tag));
2409    ocacheL2
2410       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2411                              NULL, /* fast cmp */
2412                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2413    stats__ocacheL2_n_nodes = 0;
2414 }
2415 
2416 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2417 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2418 {
2419    OCacheLine* line;
2420    tl_assert(is_valid_oc_tag(tag));
2421    stats__ocacheL2_refs++;
2422    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2423    return line;
2424 }
2425 
2426 /* Delete the line with the given tag from the tree, if it is present, and
2427    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2428 static void ocacheL2_del_tag ( Addr tag )
2429 {
2430    OCacheLine* line;
2431    tl_assert(is_valid_oc_tag(tag));
2432    stats__ocacheL2_refs++;
2433    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2434    if (line) {
2435       VG_(OSetGen_FreeNode)(ocacheL2, line);
2436       tl_assert(stats__ocacheL2_n_nodes > 0);
2437       stats__ocacheL2_n_nodes--;
2438    }
2439 }
2440 
2441 /* Add a copy of the given line to the tree.  It must not already be
2442    present. */
ocacheL2_add_line(OCacheLine * line)2443 static void ocacheL2_add_line ( OCacheLine* line )
2444 {
2445    OCacheLine* copy;
2446    tl_assert(is_valid_oc_tag(line->tag));
2447    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2448    *copy = *line;
2449    stats__ocacheL2_refs++;
2450    VG_(OSetGen_Insert)( ocacheL2, copy );
2451    stats__ocacheL2_n_nodes++;
2452    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2453       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2454 }
2455 
2456 ////
2457 //////////////////////////////////////////////////////////////
2458 
2459 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2460 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2461 {
2462    OCacheLine *victim, *inL2;
2463    UChar c;
2464    UWord line;
2465    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2466    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2467    UWord tag     = a & tagmask;
2468    tl_assert(setno >= 0 && setno < OC_N_SETS);
2469 
2470    /* we already tried line == 0; skip therefore. */
2471    for (line = 1; line < OC_LINES_PER_SET; line++) {
2472       if (ocacheL1->set[setno].line[line].tag == tag) {
2473          if (line == 1) {
2474             stats_ocacheL1_found_at_1++;
2475          } else {
2476             stats_ocacheL1_found_at_N++;
2477          }
2478          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2479                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2480             moveLineForwards( &ocacheL1->set[setno], line );
2481             line--;
2482          }
2483          return &ocacheL1->set[setno].line[line];
2484       }
2485    }
2486 
2487    /* A miss.  Use the last slot.  Implicitly this means we're
2488       ejecting the line in the last slot. */
2489    stats_ocacheL1_misses++;
2490    tl_assert(line == OC_LINES_PER_SET);
2491    line--;
2492    tl_assert(line > 0);
2493 
2494    /* First, move the to-be-ejected line to the L2 cache. */
2495    victim = &ocacheL1->set[setno].line[line];
2496    c = classify_OCacheLine(victim);
2497    switch (c) {
2498       case 'e':
2499          /* the line is empty (has invalid tag); ignore it. */
2500          break;
2501       case 'z':
2502          /* line contains zeroes.  We must ensure the backing store is
2503             updated accordingly, either by copying the line there
2504             verbatim, or by ensuring it isn't present there.  We
2505             chosse the latter on the basis that it reduces the size of
2506             the backing store. */
2507          ocacheL2_del_tag( victim->tag );
2508          break;
2509       case 'n':
2510          /* line contains at least one real, useful origin.  Copy it
2511             to the backing store. */
2512          stats_ocacheL1_lossage++;
2513          inL2 = ocacheL2_find_tag( victim->tag );
2514          if (inL2) {
2515             *inL2 = *victim;
2516          } else {
2517             ocacheL2_add_line( victim );
2518          }
2519          break;
2520       default:
2521          tl_assert(0);
2522    }
2523 
2524    /* Now we must reload the L1 cache from the backing tree, if
2525       possible. */
2526    tl_assert(tag != victim->tag); /* stay sane */
2527    inL2 = ocacheL2_find_tag( tag );
2528    if (inL2) {
2529       /* We're in luck.  It's in the L2. */
2530       ocacheL1->set[setno].line[line] = *inL2;
2531    } else {
2532       /* Missed at both levels of the cache hierarchy.  We have to
2533          declare it as full of zeroes (unknown origins). */
2534       stats__ocacheL2_misses++;
2535       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2536    }
2537 
2538    /* Move it one forwards */
2539    moveLineForwards( &ocacheL1->set[setno], line );
2540    line--;
2541 
2542    return &ocacheL1->set[setno].line[line];
2543 }
2544 
find_OCacheLine(Addr a)2545 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2546 {
2547    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2548    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2549    UWord tag     = a & tagmask;
2550 
2551    stats_ocacheL1_find++;
2552 
2553    if (OC_ENABLE_ASSERTIONS) {
2554       tl_assert(setno >= 0 && setno < OC_N_SETS);
2555       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2556    }
2557 
2558    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2559       return &ocacheL1->set[setno].line[0];
2560    }
2561 
2562    return find_OCacheLine_SLOW( a );
2563 }
2564 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2565 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2566 {
2567    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2568    //// Set the origins for a+0 .. a+7
2569    { OCacheLine* line;
2570      UWord lineoff = oc_line_offset(a);
2571      if (OC_ENABLE_ASSERTIONS) {
2572         tl_assert(lineoff >= 0
2573                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2574      }
2575      line = find_OCacheLine( a );
2576      line->descr[lineoff+0] = 0xF;
2577      line->descr[lineoff+1] = 0xF;
2578      line->w32[lineoff+0]   = otag;
2579      line->w32[lineoff+1]   = otag;
2580    }
2581    //// END inlined, specialised version of MC_(helperc_b_store8)
2582 }
2583 
2584 
2585 /*------------------------------------------------------------*/
2586 /*--- Aligned fast case permission setters,                ---*/
2587 /*--- for dealing with stacks                              ---*/
2588 /*------------------------------------------------------------*/
2589 
2590 /*--------------------- 32-bit ---------------------*/
2591 
2592 /* Nb: by "aligned" here we mean 4-byte aligned */
2593 
make_aligned_word32_undefined(Addr a)2594 static INLINE void make_aligned_word32_undefined ( Addr a )
2595 {
2596    PROF_EVENT(300, "make_aligned_word32_undefined");
2597 
2598 #ifndef PERF_FAST_STACK2
2599    make_mem_undefined(a, 4);
2600 #else
2601    {
2602       UWord   sm_off;
2603       SecMap* sm;
2604 
2605       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2606          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2607          make_mem_undefined(a, 4);
2608          return;
2609       }
2610 
2611       sm                  = get_secmap_for_writing_low(a);
2612       sm_off              = SM_OFF(a);
2613       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2614    }
2615 #endif
2616 }
2617 
2618 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2619 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2620 {
2621    make_aligned_word32_undefined(a);
2622    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2623    //// Set the origins for a+0 .. a+3
2624    { OCacheLine* line;
2625      UWord lineoff = oc_line_offset(a);
2626      if (OC_ENABLE_ASSERTIONS) {
2627         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2628      }
2629      line = find_OCacheLine( a );
2630      line->descr[lineoff] = 0xF;
2631      line->w32[lineoff]   = otag;
2632    }
2633    //// END inlined, specialised version of MC_(helperc_b_store4)
2634 }
2635 
2636 static INLINE
make_aligned_word32_noaccess(Addr a)2637 void make_aligned_word32_noaccess ( Addr a )
2638 {
2639    PROF_EVENT(310, "make_aligned_word32_noaccess");
2640 
2641 #ifndef PERF_FAST_STACK2
2642    MC_(make_mem_noaccess)(a, 4);
2643 #else
2644    {
2645       UWord   sm_off;
2646       SecMap* sm;
2647 
2648       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2649          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2650          MC_(make_mem_noaccess)(a, 4);
2651          return;
2652       }
2653 
2654       sm                  = get_secmap_for_writing_low(a);
2655       sm_off              = SM_OFF(a);
2656       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2657 
2658       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2659       //// Set the origins for a+0 .. a+3.
2660       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2661          OCacheLine* line;
2662          UWord lineoff = oc_line_offset(a);
2663          if (OC_ENABLE_ASSERTIONS) {
2664             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2665          }
2666          line = find_OCacheLine( a );
2667          line->descr[lineoff] = 0;
2668       }
2669       //// END inlined, specialised version of MC_(helperc_b_store4)
2670    }
2671 #endif
2672 }
2673 
2674 /*--------------------- 64-bit ---------------------*/
2675 
2676 /* Nb: by "aligned" here we mean 8-byte aligned */
2677 
make_aligned_word64_undefined(Addr a)2678 static INLINE void make_aligned_word64_undefined ( Addr a )
2679 {
2680    PROF_EVENT(320, "make_aligned_word64_undefined");
2681 
2682 #ifndef PERF_FAST_STACK2
2683    make_mem_undefined(a, 8);
2684 #else
2685    {
2686       UWord   sm_off16;
2687       SecMap* sm;
2688 
2689       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2690          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2691          make_mem_undefined(a, 8);
2692          return;
2693       }
2694 
2695       sm       = get_secmap_for_writing_low(a);
2696       sm_off16 = SM_OFF_16(a);
2697       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2698    }
2699 #endif
2700 }
2701 
2702 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2703 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2704 {
2705    make_aligned_word64_undefined(a);
2706    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2707    //// Set the origins for a+0 .. a+7
2708    { OCacheLine* line;
2709      UWord lineoff = oc_line_offset(a);
2710      tl_assert(lineoff >= 0
2711                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2712      line = find_OCacheLine( a );
2713      line->descr[lineoff+0] = 0xF;
2714      line->descr[lineoff+1] = 0xF;
2715      line->w32[lineoff+0]   = otag;
2716      line->w32[lineoff+1]   = otag;
2717    }
2718    //// END inlined, specialised version of MC_(helperc_b_store8)
2719 }
2720 
2721 static INLINE
make_aligned_word64_noaccess(Addr a)2722 void make_aligned_word64_noaccess ( Addr a )
2723 {
2724    PROF_EVENT(330, "make_aligned_word64_noaccess");
2725 
2726 #ifndef PERF_FAST_STACK2
2727    MC_(make_mem_noaccess)(a, 8);
2728 #else
2729    {
2730       UWord   sm_off16;
2731       SecMap* sm;
2732 
2733       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2734          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2735          MC_(make_mem_noaccess)(a, 8);
2736          return;
2737       }
2738 
2739       sm       = get_secmap_for_writing_low(a);
2740       sm_off16 = SM_OFF_16(a);
2741       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2742 
2743       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2744       //// Clear the origins for a+0 .. a+7.
2745       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2746          OCacheLine* line;
2747          UWord lineoff = oc_line_offset(a);
2748          tl_assert(lineoff >= 0
2749                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2750          line = find_OCacheLine( a );
2751          line->descr[lineoff+0] = 0;
2752          line->descr[lineoff+1] = 0;
2753       }
2754       //// END inlined, specialised version of MC_(helperc_b_store8)
2755    }
2756 #endif
2757 }
2758 
2759 
2760 /*------------------------------------------------------------*/
2761 /*--- Stack pointer adjustment                             ---*/
2762 /*------------------------------------------------------------*/
2763 
2764 #ifdef PERF_FAST_STACK
2765 #  define MAYBE_USED
2766 #else
2767 #  define MAYBE_USED __attribute__((unused))
2768 #endif
2769 
2770 /*--------------- adjustment by 4 bytes ---------------*/
2771 
2772 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2773 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2774 {
2775    UInt otag = ecu | MC_OKIND_STACK;
2776    PROF_EVENT(110, "new_mem_stack_4");
2777    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2778       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2779    } else {
2780       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2781    }
2782 }
2783 
2784 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2785 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2786 {
2787    PROF_EVENT(110, "new_mem_stack_4");
2788    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2789       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2790    } else {
2791       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2792    }
2793 }
2794 
2795 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2796 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2797 {
2798    PROF_EVENT(120, "die_mem_stack_4");
2799    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2800       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2801    } else {
2802       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2803    }
2804 }
2805 
2806 /*--------------- adjustment by 8 bytes ---------------*/
2807 
2808 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2809 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2810 {
2811    UInt otag = ecu | MC_OKIND_STACK;
2812    PROF_EVENT(111, "new_mem_stack_8");
2813    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2814       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2815    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2816       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2817       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2818    } else {
2819       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2820    }
2821 }
2822 
2823 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2824 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2825 {
2826    PROF_EVENT(111, "new_mem_stack_8");
2827    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2828       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2829    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2830       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2831       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2832    } else {
2833       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2834    }
2835 }
2836 
2837 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2838 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2839 {
2840    PROF_EVENT(121, "die_mem_stack_8");
2841    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2842       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2843    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2844       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2845       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2846    } else {
2847       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2848    }
2849 }
2850 
2851 /*--------------- adjustment by 12 bytes ---------------*/
2852 
2853 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2854 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2855 {
2856    UInt otag = ecu | MC_OKIND_STACK;
2857    PROF_EVENT(112, "new_mem_stack_12");
2858    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2859       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2860       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2861    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2862       /* from previous test we don't have 8-alignment at offset +0,
2863          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2864          do 4 at +0 and then 8 at +4/. */
2865       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2866       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2867    } else {
2868       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2869    }
2870 }
2871 
2872 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2873 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2874 {
2875    PROF_EVENT(112, "new_mem_stack_12");
2876    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2877       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2878       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2879    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2880       /* from previous test we don't have 8-alignment at offset +0,
2881          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2882          do 4 at +0 and then 8 at +4/. */
2883       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2884       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2885    } else {
2886       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2887    }
2888 }
2889 
2890 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2891 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2892 {
2893    PROF_EVENT(122, "die_mem_stack_12");
2894    /* Note the -12 in the test */
2895    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2896       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2897          -4. */
2898       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2899       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2900    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2901       /* We have 4-alignment at +0, but we don't have 8-alignment at
2902          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2903          and then 8 at -8. */
2904       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2905       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2906    } else {
2907       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2908    }
2909 }
2910 
2911 /*--------------- adjustment by 16 bytes ---------------*/
2912 
2913 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2914 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2915 {
2916    UInt otag = ecu | MC_OKIND_STACK;
2917    PROF_EVENT(113, "new_mem_stack_16");
2918    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2919       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2920       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2921       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2922    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2923       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2924          Hence do 4 at +0, 8 at +4, 4 at +12. */
2925       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2927       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2928    } else {
2929       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2930    }
2931 }
2932 
2933 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2934 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2935 {
2936    PROF_EVENT(113, "new_mem_stack_16");
2937    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2938       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2939       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2940       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2941    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2942       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2943          Hence do 4 at +0, 8 at +4, 4 at +12. */
2944       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2946       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2947    } else {
2948       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2949    }
2950 }
2951 
2952 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2953 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2954 {
2955    PROF_EVENT(123, "die_mem_stack_16");
2956    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2957       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2958       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2959       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2960    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2961       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2962       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2963       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2964       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2965    } else {
2966       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2967    }
2968 }
2969 
2970 /*--------------- adjustment by 32 bytes ---------------*/
2971 
2972 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2973 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2974 {
2975    UInt otag = ecu | MC_OKIND_STACK;
2976    PROF_EVENT(114, "new_mem_stack_32");
2977    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978       /* Straightforward */
2979       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2980       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2981       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2982       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2983    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2984       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2985          +0,+28. */
2986       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2990       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2991    } else {
2992       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2993    }
2994 }
2995 
2996 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2997 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2998 {
2999    PROF_EVENT(114, "new_mem_stack_32");
3000    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3001       /* Straightforward */
3002       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3003       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3004       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3005       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3006    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3008          +0,+28. */
3009       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3010       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3011       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3012       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3013       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3014    } else {
3015       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3016    }
3017 }
3018 
3019 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3020 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3021 {
3022    PROF_EVENT(124, "die_mem_stack_32");
3023    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3024       /* Straightforward */
3025       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3026       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3027       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3029    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
3031          4 at -32,-4. */
3032       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3033       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3034       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3035       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3036       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3037    } else {
3038       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3039    }
3040 }
3041 
3042 /*--------------- adjustment by 112 bytes ---------------*/
3043 
3044 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3045 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3046 {
3047    UInt otag = ecu | MC_OKIND_STACK;
3048    PROF_EVENT(115, "new_mem_stack_112");
3049    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3050       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3051       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3052       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3053       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3054       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3055       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3059       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3060       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3061       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3062       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3063       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3064    } else {
3065       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3066    }
3067 }
3068 
3069 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3070 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3071 {
3072    PROF_EVENT(115, "new_mem_stack_112");
3073    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3075       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3076       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3077       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3078       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3082       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3083       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3084       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3085       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3086       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3087       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3088    } else {
3089       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3090    }
3091 }
3092 
3093 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3094 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3095 {
3096    PROF_EVENT(125, "die_mem_stack_112");
3097    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3098       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3099       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3100       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3101       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3110       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3111       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3112    } else {
3113       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3114    }
3115 }
3116 
3117 /*--------------- adjustment by 128 bytes ---------------*/
3118 
3119 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3120 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3121 {
3122    UInt otag = ecu | MC_OKIND_STACK;
3123    PROF_EVENT(116, "new_mem_stack_128");
3124    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3125       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3126       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3127       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3128       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3129       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3130       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3133       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3134       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3135       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3136       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3137       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3138       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3141    } else {
3142       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3143    }
3144 }
3145 
3146 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3147 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3148 {
3149    PROF_EVENT(116, "new_mem_stack_128");
3150    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3151       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3152       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3153       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3154       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3155       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3156       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3157       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3158       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3159       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3160       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3161       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3162       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3163       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3164       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3167    } else {
3168       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3169    }
3170 }
3171 
3172 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3173 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3174 {
3175    PROF_EVENT(126, "die_mem_stack_128");
3176    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3177       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3178       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3179       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3180       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3181       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3182       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3183       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3184       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3185       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3186       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3187       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3188       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3189       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3190       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3193    } else {
3194       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3195    }
3196 }
3197 
3198 /*--------------- adjustment by 144 bytes ---------------*/
3199 
3200 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3201 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3202 {
3203    UInt otag = ecu | MC_OKIND_STACK;
3204    PROF_EVENT(117, "new_mem_stack_144");
3205    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3206       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3207       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3208       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3209       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3210       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3211       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3212       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3213       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3214       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3215       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3216       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3217       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3218       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3219       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3224    } else {
3225       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3226    }
3227 }
3228 
3229 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3230 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3231 {
3232    PROF_EVENT(117, "new_mem_stack_144");
3233    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3234       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3235       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3236       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3237       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3238       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3239       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3240       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3241       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3242       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3243       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3244       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3245       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3246       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3247       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3252    } else {
3253       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3254    }
3255 }
3256 
3257 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3258 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3259 {
3260    PROF_EVENT(127, "die_mem_stack_144");
3261    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3262       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3263       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3264       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3265       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3266       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3267       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3268       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3269       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3270       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3271       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3272       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3273       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3274       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3275       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3280    } else {
3281       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3282    }
3283 }
3284 
3285 /*--------------- adjustment by 160 bytes ---------------*/
3286 
3287 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3288 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3289 {
3290    UInt otag = ecu | MC_OKIND_STACK;
3291    PROF_EVENT(118, "new_mem_stack_160");
3292    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3293       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3294       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3295       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3296       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3297       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3298       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3299       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3300       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3301       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3302       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3303       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3304       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3305       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3306       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3313    } else {
3314       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3315    }
3316 }
3317 
3318 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3319 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3320 {
3321    PROF_EVENT(118, "new_mem_stack_160");
3322    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3323       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3324       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3325       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3326       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3327       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3328       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3329       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3330       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3331       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3332       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3333       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3334       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3335       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3336       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3343    } else {
3344       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3345    }
3346 }
3347 
3348 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3349 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3350 {
3351    PROF_EVENT(128, "die_mem_stack_160");
3352    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3353       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3354       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3355       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3356       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3357       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3358       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3359       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3360       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3361       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3362       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3363       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3364       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3365       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3366       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3373    } else {
3374       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3375    }
3376 }
3377 
3378 /*--------------- adjustment by N bytes ---------------*/
3379 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3380 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3381 {
3382    UInt otag = ecu | MC_OKIND_STACK;
3383    PROF_EVENT(115, "new_mem_stack_w_otag");
3384    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3385 }
3386 
mc_new_mem_stack(Addr a,SizeT len)3387 static void mc_new_mem_stack ( Addr a, SizeT len )
3388 {
3389    PROF_EVENT(115, "new_mem_stack");
3390    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3391 }
3392 
mc_die_mem_stack(Addr a,SizeT len)3393 static void mc_die_mem_stack ( Addr a, SizeT len )
3394 {
3395    PROF_EVENT(125, "die_mem_stack");
3396    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3397 }
3398 
3399 
3400 /* The AMD64 ABI says:
3401 
3402    "The 128-byte area beyond the location pointed to by %rsp is considered
3403     to be reserved and shall not be modified by signal or interrupt
3404     handlers.  Therefore, functions may use this area for temporary data
3405     that is not needed across function calls.  In particular, leaf functions
3406     may use this area for their entire stack frame, rather than adjusting
3407     the stack pointer in the prologue and epilogue.  This area is known as
3408     red zone [sic]."
3409 
3410    So after any call or return we need to mark this redzone as containing
3411    undefined values.
3412 
3413    Consider this:  we're in function f.  f calls g.  g moves rsp down
3414    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3415    defined.  g returns.  f is buggy and reads from parts of the red zone
3416    that it didn't write on.  But because g filled that area in, f is going
3417    to be picking up defined V bits and so any errors from reading bits of
3418    the red zone it didn't write, will be missed.  The only solution I could
3419    think of was to make the red zone undefined when g returns to f.
3420 
3421    This is in accordance with the ABI, which makes it clear the redzone
3422    is volatile across function calls.
3423 
3424    The problem occurs the other way round too: f could fill the RZ up
3425    with defined values and g could mistakenly read them.  So the RZ
3426    also needs to be nuked on function calls.
3427 */
3428 
3429 
3430 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3431    improved so as to have a lower miss rate. */
3432 
3433 static UWord stats__nia_cache_queries = 0;
3434 static UWord stats__nia_cache_misses  = 0;
3435 
3436 typedef
3437    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3438             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3439    WCacheEnt;
3440 
3441 #define N_NIA_TO_ECU_CACHE 511
3442 
3443 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3444 
init_nia_to_ecu_cache(void)3445 static void init_nia_to_ecu_cache ( void )
3446 {
3447    UWord       i;
3448    Addr        zero_addr = 0;
3449    ExeContext* zero_ec;
3450    UInt        zero_ecu;
3451    /* Fill all the slots with an entry for address zero, and the
3452       relevant otags accordingly.  Hence the cache is initially filled
3453       with valid data. */
3454    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3455    tl_assert(zero_ec);
3456    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3457    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3458    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3459       nia_to_ecu_cache[i].nia0 = zero_addr;
3460       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3461       nia_to_ecu_cache[i].nia1 = zero_addr;
3462       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3463    }
3464 }
3465 
convert_nia_to_ecu(Addr nia)3466 static inline UInt convert_nia_to_ecu ( Addr nia )
3467 {
3468    UWord i;
3469    UInt        ecu;
3470    ExeContext* ec;
3471 
3472    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3473 
3474    stats__nia_cache_queries++;
3475    i = nia % N_NIA_TO_ECU_CACHE;
3476    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3477 
3478    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3479       return nia_to_ecu_cache[i].ecu0;
3480 
3481    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3482 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3483       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3484       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3485 #     undef SWAP
3486       return nia_to_ecu_cache[i].ecu0;
3487    }
3488 
3489    stats__nia_cache_misses++;
3490    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3491    tl_assert(ec);
3492    ecu = VG_(get_ECU_from_ExeContext)(ec);
3493    tl_assert(VG_(is_plausible_ECU)(ecu));
3494 
3495    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3496    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3497 
3498    nia_to_ecu_cache[i].nia0 = nia;
3499    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3500    return ecu;
3501 }
3502 
3503 
3504 /* Note that this serves both the origin-tracking and
3505    no-origin-tracking modes.  We assume that calls to it are
3506    sufficiently infrequent that it isn't worth specialising for the
3507    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3508 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3509 {
3510    UInt otag;
3511    tl_assert(sizeof(UWord) == sizeof(SizeT));
3512    if (0)
3513       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3514                   base, len, nia );
3515 
3516    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3517       UInt ecu = convert_nia_to_ecu ( nia );
3518       tl_assert(VG_(is_plausible_ECU)(ecu));
3519       otag = ecu | MC_OKIND_STACK;
3520    } else {
3521       tl_assert(nia == 0);
3522       otag = 0;
3523    }
3524 
3525 #  if 0
3526    /* Really slow version */
3527    MC_(make_mem_undefined)(base, len, otag);
3528 #  endif
3529 
3530 #  if 0
3531    /* Slow(ish) version, which is fairly easily seen to be correct.
3532    */
3533    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3534       make_aligned_word64_undefined(base +   0, otag);
3535       make_aligned_word64_undefined(base +   8, otag);
3536       make_aligned_word64_undefined(base +  16, otag);
3537       make_aligned_word64_undefined(base +  24, otag);
3538 
3539       make_aligned_word64_undefined(base +  32, otag);
3540       make_aligned_word64_undefined(base +  40, otag);
3541       make_aligned_word64_undefined(base +  48, otag);
3542       make_aligned_word64_undefined(base +  56, otag);
3543 
3544       make_aligned_word64_undefined(base +  64, otag);
3545       make_aligned_word64_undefined(base +  72, otag);
3546       make_aligned_word64_undefined(base +  80, otag);
3547       make_aligned_word64_undefined(base +  88, otag);
3548 
3549       make_aligned_word64_undefined(base +  96, otag);
3550       make_aligned_word64_undefined(base + 104, otag);
3551       make_aligned_word64_undefined(base + 112, otag);
3552       make_aligned_word64_undefined(base + 120, otag);
3553    } else {
3554       MC_(make_mem_undefined)(base, len, otag);
3555    }
3556 #  endif
3557 
3558    /* Idea is: go fast when
3559          * 8-aligned and length is 128
3560          * the sm is available in the main primary map
3561          * the address range falls entirely with a single secondary map
3562       If all those conditions hold, just update the V+A bits by writing
3563       directly into the vabits array.  (If the sm was distinguished, this
3564       will make a copy and then write to it.)
3565    */
3566 
3567    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3568       /* Now we know the address range is suitably sized and aligned. */
3569       UWord a_lo = (UWord)(base);
3570       UWord a_hi = (UWord)(base + 128 - 1);
3571       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3572       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3573          // Now we know the entire range is within the main primary map.
3574          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3575          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3576          /* Now we know that the entire address range falls within a
3577             single secondary map, and that that secondary 'lives' in
3578             the main primary map. */
3579          if (LIKELY(sm == sm_hi)) {
3580             // Finally, we know that the range is entirely within one secmap.
3581             UWord   v_off = SM_OFF(a_lo);
3582             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3583             p[ 0] = VA_BITS16_UNDEFINED;
3584             p[ 1] = VA_BITS16_UNDEFINED;
3585             p[ 2] = VA_BITS16_UNDEFINED;
3586             p[ 3] = VA_BITS16_UNDEFINED;
3587             p[ 4] = VA_BITS16_UNDEFINED;
3588             p[ 5] = VA_BITS16_UNDEFINED;
3589             p[ 6] = VA_BITS16_UNDEFINED;
3590             p[ 7] = VA_BITS16_UNDEFINED;
3591             p[ 8] = VA_BITS16_UNDEFINED;
3592             p[ 9] = VA_BITS16_UNDEFINED;
3593             p[10] = VA_BITS16_UNDEFINED;
3594             p[11] = VA_BITS16_UNDEFINED;
3595             p[12] = VA_BITS16_UNDEFINED;
3596             p[13] = VA_BITS16_UNDEFINED;
3597             p[14] = VA_BITS16_UNDEFINED;
3598             p[15] = VA_BITS16_UNDEFINED;
3599             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3600                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3601                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3602                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3603                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3604                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3605                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3606                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3607                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3608                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3609                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3610                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3611                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3612                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3613                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3614                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3615                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3616             }
3617             return;
3618          }
3619       }
3620    }
3621 
3622    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3623    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3624       /* Now we know the address range is suitably sized and aligned. */
3625       UWord a_lo = (UWord)(base);
3626       UWord a_hi = (UWord)(base + 288 - 1);
3627       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3628       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3629          // Now we know the entire range is within the main primary map.
3630          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3631          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3632          /* Now we know that the entire address range falls within a
3633             single secondary map, and that that secondary 'lives' in
3634             the main primary map. */
3635          if (LIKELY(sm == sm_hi)) {
3636             // Finally, we know that the range is entirely within one secmap.
3637             UWord   v_off = SM_OFF(a_lo);
3638             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3639             p[ 0] = VA_BITS16_UNDEFINED;
3640             p[ 1] = VA_BITS16_UNDEFINED;
3641             p[ 2] = VA_BITS16_UNDEFINED;
3642             p[ 3] = VA_BITS16_UNDEFINED;
3643             p[ 4] = VA_BITS16_UNDEFINED;
3644             p[ 5] = VA_BITS16_UNDEFINED;
3645             p[ 6] = VA_BITS16_UNDEFINED;
3646             p[ 7] = VA_BITS16_UNDEFINED;
3647             p[ 8] = VA_BITS16_UNDEFINED;
3648             p[ 9] = VA_BITS16_UNDEFINED;
3649             p[10] = VA_BITS16_UNDEFINED;
3650             p[11] = VA_BITS16_UNDEFINED;
3651             p[12] = VA_BITS16_UNDEFINED;
3652             p[13] = VA_BITS16_UNDEFINED;
3653             p[14] = VA_BITS16_UNDEFINED;
3654             p[15] = VA_BITS16_UNDEFINED;
3655             p[16] = VA_BITS16_UNDEFINED;
3656             p[17] = VA_BITS16_UNDEFINED;
3657             p[18] = VA_BITS16_UNDEFINED;
3658             p[19] = VA_BITS16_UNDEFINED;
3659             p[20] = VA_BITS16_UNDEFINED;
3660             p[21] = VA_BITS16_UNDEFINED;
3661             p[22] = VA_BITS16_UNDEFINED;
3662             p[23] = VA_BITS16_UNDEFINED;
3663             p[24] = VA_BITS16_UNDEFINED;
3664             p[25] = VA_BITS16_UNDEFINED;
3665             p[26] = VA_BITS16_UNDEFINED;
3666             p[27] = VA_BITS16_UNDEFINED;
3667             p[28] = VA_BITS16_UNDEFINED;
3668             p[29] = VA_BITS16_UNDEFINED;
3669             p[30] = VA_BITS16_UNDEFINED;
3670             p[31] = VA_BITS16_UNDEFINED;
3671             p[32] = VA_BITS16_UNDEFINED;
3672             p[33] = VA_BITS16_UNDEFINED;
3673             p[34] = VA_BITS16_UNDEFINED;
3674             p[35] = VA_BITS16_UNDEFINED;
3675             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3676                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3677                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3678                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3679                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3680                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3681                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3682                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3683                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3684                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3685                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3686                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3687                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3688                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3689                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3690                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3691                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3692                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3693                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3694                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3695                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3696                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3697                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3698                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3699                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3700                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3701                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3702                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3703                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3704                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3705                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3706                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3707                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3708                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3709                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3710                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3711                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3712             }
3713             return;
3714          }
3715       }
3716    }
3717 
3718    /* else fall into slow case */
3719    MC_(make_mem_undefined_w_otag)(base, len, otag);
3720 }
3721 
3722 
3723 /*------------------------------------------------------------*/
3724 /*--- Checking memory                                      ---*/
3725 /*------------------------------------------------------------*/
3726 
3727 typedef
3728    enum {
3729       MC_Ok = 5,
3730       MC_AddrErr = 6,
3731       MC_ValueErr = 7
3732    }
3733    MC_ReadResult;
3734 
3735 
3736 /* Check permissions for address range.  If inadequate permissions
3737    exist, *bad_addr is set to the offending address, so the caller can
3738    know what it is. */
3739 
3740 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3741    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3742    indicate the lowest failing address.  Functions below are
3743    similar. */
MC_(check_mem_is_noaccess)3744 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3745 {
3746    SizeT i;
3747    UWord vabits2;
3748 
3749    PROF_EVENT(60, "check_mem_is_noaccess");
3750    for (i = 0; i < len; i++) {
3751       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3752       vabits2 = get_vabits2(a);
3753       if (VA_BITS2_NOACCESS != vabits2) {
3754          if (bad_addr != NULL) *bad_addr = a;
3755          return False;
3756       }
3757       a++;
3758    }
3759    return True;
3760 }
3761 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3762 static Bool is_mem_addressable ( Addr a, SizeT len,
3763                                  /*OUT*/Addr* bad_addr )
3764 {
3765    SizeT i;
3766    UWord vabits2;
3767 
3768    PROF_EVENT(62, "is_mem_addressable");
3769    for (i = 0; i < len; i++) {
3770       PROF_EVENT(63, "is_mem_addressable(loop)");
3771       vabits2 = get_vabits2(a);
3772       if (VA_BITS2_NOACCESS == vabits2) {
3773          if (bad_addr != NULL) *bad_addr = a;
3774          return False;
3775       }
3776       a++;
3777    }
3778    return True;
3779 }
3780 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3781 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3782                                       /*OUT*/Addr* bad_addr,
3783                                       /*OUT*/UInt* otag )
3784 {
3785    SizeT i;
3786    UWord vabits2;
3787 
3788    PROF_EVENT(64, "is_mem_defined");
3789    DEBUG("is_mem_defined\n");
3790 
3791    if (otag)     *otag = 0;
3792    if (bad_addr) *bad_addr = 0;
3793    for (i = 0; i < len; i++) {
3794       PROF_EVENT(65, "is_mem_defined(loop)");
3795       vabits2 = get_vabits2(a);
3796       if (VA_BITS2_DEFINED != vabits2) {
3797          // Error!  Nb: Report addressability errors in preference to
3798          // definedness errors.  And don't report definedeness errors unless
3799          // --undef-value-errors=yes.
3800          if (bad_addr) {
3801             *bad_addr = a;
3802          }
3803          if (VA_BITS2_NOACCESS == vabits2) {
3804             return MC_AddrErr;
3805          }
3806          if (MC_(clo_mc_level) >= 2) {
3807             if (otag && MC_(clo_mc_level) == 3) {
3808                *otag = MC_(helperc_b_load1)( a );
3809             }
3810             return MC_ValueErr;
3811          }
3812       }
3813       a++;
3814    }
3815    return MC_Ok;
3816 }
3817 
3818 
3819 /* Like is_mem_defined but doesn't give up at the first uninitialised
3820    byte -- the entire range is always checked.  This is important for
3821    detecting errors in the case where a checked range strays into
3822    invalid memory, but that fact is not detected by the ordinary
3823    is_mem_defined(), because of an undefined section that precedes the
3824    out of range section, possibly as a result of an alignment hole in
3825    the checked data.  This version always checks the entire range and
3826    can report both a definedness and an accessbility error, if
3827    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3828 static void is_mem_defined_comprehensive (
3829                Addr a, SizeT len,
3830                /*OUT*/Bool* errorV,    /* is there a definedness err? */
3831                /*OUT*/Addr* bad_addrV, /* if so where? */
3832                /*OUT*/UInt* otagV,     /* and what's its otag? */
3833                /*OUT*/Bool* errorA,    /* is there an addressability err? */
3834                /*OUT*/Addr* bad_addrA  /* if so where? */
3835             )
3836 {
3837    SizeT i;
3838    UWord vabits2;
3839    Bool  already_saw_errV = False;
3840 
3841    PROF_EVENT(64, "is_mem_defined"); // fixme
3842    DEBUG("is_mem_defined_comprehensive\n");
3843 
3844    tl_assert(!(*errorV || *errorA));
3845 
3846    for (i = 0; i < len; i++) {
3847       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3848       vabits2 = get_vabits2(a);
3849       switch (vabits2) {
3850          case VA_BITS2_DEFINED:
3851             a++;
3852             break;
3853          case VA_BITS2_UNDEFINED:
3854          case VA_BITS2_PARTDEFINED:
3855             if (!already_saw_errV) {
3856                *errorV    = True;
3857                *bad_addrV = a;
3858                if (MC_(clo_mc_level) == 3) {
3859                   *otagV = MC_(helperc_b_load1)( a );
3860                } else {
3861                   *otagV = 0;
3862                }
3863                already_saw_errV = True;
3864             }
3865             a++; /* keep going */
3866             break;
3867          case VA_BITS2_NOACCESS:
3868             *errorA    = True;
3869             *bad_addrA = a;
3870             return; /* give up now. */
3871          default:
3872             tl_assert(0);
3873       }
3874    }
3875 }
3876 
3877 
3878 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3879    examine the actual bytes, to find the end, until we're sure it is
3880    safe to do so. */
3881 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3882 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3883 {
3884    UWord vabits2;
3885 
3886    PROF_EVENT(66, "mc_is_defined_asciiz");
3887    DEBUG("mc_is_defined_asciiz\n");
3888 
3889    if (otag)     *otag = 0;
3890    if (bad_addr) *bad_addr = 0;
3891    while (True) {
3892       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3893       vabits2 = get_vabits2(a);
3894       if (VA_BITS2_DEFINED != vabits2) {
3895          // Error!  Nb: Report addressability errors in preference to
3896          // definedness errors.  And don't report definedeness errors unless
3897          // --undef-value-errors=yes.
3898          if (bad_addr) {
3899             *bad_addr = a;
3900          }
3901          if (VA_BITS2_NOACCESS == vabits2) {
3902             return MC_AddrErr;
3903          }
3904          if (MC_(clo_mc_level) >= 2) {
3905             if (otag && MC_(clo_mc_level) == 3) {
3906                *otag = MC_(helperc_b_load1)( a );
3907             }
3908             return MC_ValueErr;
3909          }
3910       }
3911       /* Ok, a is safe to read. */
3912       if (* ((UChar*)a) == 0) {
3913          return MC_Ok;
3914       }
3915       a++;
3916    }
3917 }
3918 
3919 
3920 /*------------------------------------------------------------*/
3921 /*--- Memory event handlers                                ---*/
3922 /*------------------------------------------------------------*/
3923 
3924 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3925 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3926                                 Addr base, SizeT size )
3927 {
3928    Addr bad_addr;
3929    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3930 
3931    if (!ok) {
3932       switch (part) {
3933       case Vg_CoreSysCall:
3934          MC_(record_memparam_error) ( tid, bad_addr,
3935                                       /*isAddrErr*/True, s, 0/*otag*/ );
3936          break;
3937 
3938       case Vg_CoreSignal:
3939          MC_(record_core_mem_error)( tid, s );
3940          break;
3941 
3942       default:
3943          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3944       }
3945    }
3946 }
3947 
3948 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3949 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3950                             Addr base, SizeT size )
3951 {
3952    UInt otag = 0;
3953    Addr bad_addr;
3954    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3955 
3956    if (MC_Ok != res) {
3957       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3958 
3959       switch (part) {
3960       case Vg_CoreSysCall:
3961          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3962                                       isAddrErr ? 0 : otag );
3963          break;
3964 
3965       case Vg_CoreSysCallArgInMem:
3966          MC_(record_regparam_error) ( tid, s, otag );
3967          break;
3968 
3969       /* If we're being asked to jump to a silly address, record an error
3970          message before potentially crashing the entire system. */
3971       case Vg_CoreTranslate:
3972          MC_(record_jump_error)( tid, bad_addr );
3973          break;
3974 
3975       default:
3976          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3977       }
3978    }
3979 }
3980 
3981 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)3982 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3983                                    const HChar* s, Addr str )
3984 {
3985    MC_ReadResult res;
3986    Addr bad_addr = 0;   // shut GCC up
3987    UInt otag = 0;
3988 
3989    tl_assert(part == Vg_CoreSysCall);
3990    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3991    if (MC_Ok != res) {
3992       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3993       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3994                                    isAddrErr ? 0 : otag );
3995    }
3996 }
3997 
3998 /* Handling of mmap and mprotect is not as simple as it seems.
3999 
4000    The underlying semantics are that memory obtained from mmap is
4001    always initialised, but may be inaccessible.  And changes to the
4002    protection of memory do not change its contents and hence not its
4003    definedness state.  Problem is we can't model
4004    inaccessible-but-with-some-definedness state; once we mark memory
4005    as inaccessible we lose all info about definedness, and so can't
4006    restore that if it is later made accessible again.
4007 
4008    One obvious thing to do is this:
4009 
4010       mmap/mprotect NONE  -> noaccess
4011       mmap/mprotect other -> defined
4012 
4013    The problem case here is: taking accessible memory, writing
4014    uninitialised data to it, mprotecting it NONE and later mprotecting
4015    it back to some accessible state causes the undefinedness to be
4016    lost.
4017 
4018    A better proposal is:
4019 
4020      (1) mmap NONE       ->  make noaccess
4021      (2) mmap other      ->  make defined
4022 
4023      (3) mprotect NONE   ->  # no change
4024      (4) mprotect other  ->  change any "noaccess" to "defined"
4025 
4026    (2) is OK because memory newly obtained from mmap really is defined
4027        (zeroed out by the kernel -- doing anything else would
4028        constitute a massive security hole.)
4029 
4030    (1) is OK because the only way to make the memory usable is via
4031        (4), in which case we also wind up correctly marking it all as
4032        defined.
4033 
4034    (3) is the weak case.  We choose not to change memory state.
4035        (presumably the range is in some mixture of "defined" and
4036        "undefined", viz, accessible but with arbitrary V bits).  Doing
4037        nothing means we retain the V bits, so that if the memory is
4038        later mprotected "other", the V bits remain unchanged, so there
4039        can be no false negatives.  The bad effect is that if there's
4040        an access in the area, then MC cannot warn; but at least we'll
4041        get a SEGV to show, so it's better than nothing.
4042 
4043    Consider the sequence (3) followed by (4).  Any memory that was
4044    "defined" or "undefined" previously retains its state (as
4045    required).  Any memory that was "noaccess" before can only have
4046    been made that way by (1), and so it's OK to change it to
4047    "defined".
4048 
4049    See https://bugs.kde.org/show_bug.cgi?id=205541
4050    and https://bugs.kde.org/show_bug.cgi?id=210268
4051 */
4052 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4053 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4054                        ULong di_handle )
4055 {
4056    if (rr || ww || xx) {
4057       /* (2) mmap/mprotect other -> defined */
4058       MC_(make_mem_defined)(a, len);
4059    } else {
4060       /* (1) mmap/mprotect NONE  -> noaccess */
4061       MC_(make_mem_noaccess)(a, len);
4062    }
4063 }
4064 
4065 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4066 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4067 {
4068    if (rr || ww || xx) {
4069       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
4070       make_mem_defined_if_noaccess(a, len);
4071    } else {
4072       /* (3) mprotect NONE   ->  # no change */
4073       /* do nothing */
4074    }
4075 }
4076 
4077 
4078 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4079 void mc_new_mem_startup( Addr a, SizeT len,
4080                          Bool rr, Bool ww, Bool xx, ULong di_handle )
4081 {
4082    // Because code is defined, initialised variables get put in the data
4083    // segment and are defined, and uninitialised variables get put in the
4084    // bss segment and are auto-zeroed (and so defined).
4085    //
4086    // It's possible that there will be padding between global variables.
4087    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
4088    // a program uses it, Memcheck will not complain.  This is arguably a
4089    // false negative, but it's a grey area -- the behaviour is defined (the
4090    // padding is zeroed) but it's probably not what the user intended.  And
4091    // we can't avoid it.
4092    //
4093    // Note: we generally ignore RWX permissions, because we can't track them
4094    // without requiring more than one A bit which would slow things down a
4095    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
4096    // So we mark any such pages as "unaddressable".
4097    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4098          a, (ULong)len, rr, ww, xx);
4099    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4100 }
4101 
4102 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4103 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4104 {
4105    MC_(make_mem_defined)(a, len);
4106 }
4107 
4108 
4109 /*------------------------------------------------------------*/
4110 /*--- Register event handlers                              ---*/
4111 /*------------------------------------------------------------*/
4112 
4113 /* Try and get a nonzero origin for the guest state section of thread
4114    tid characterised by (offset,size).  Return 0 if nothing to show
4115    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4116 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4117                                              Int offset, SizeT size )
4118 {
4119    Int   sh2off;
4120    UInt  area[3];
4121    UInt  otag;
4122    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4123    if (sh2off == -1)
4124       return 0;  /* This piece of guest state is not tracked */
4125    tl_assert(sh2off >= 0);
4126    tl_assert(0 == (sh2off % 4));
4127    area[0] = 0x31313131;
4128    area[2] = 0x27272727;
4129    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4130    tl_assert(area[0] == 0x31313131);
4131    tl_assert(area[2] == 0x27272727);
4132    otag = area[1];
4133    return otag;
4134 }
4135 
4136 
4137 /* When some chunk of guest state is written, mark the corresponding
4138    shadow area as valid.  This is used to initialise arbitrarily large
4139    chunks of guest state, hence the _SIZE value, which has to be as
4140    big as the biggest guest state.
4141 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4142 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4143                                 PtrdiffT offset, SizeT size)
4144 {
4145 #  define MAX_REG_WRITE_SIZE 1712
4146    UChar area[MAX_REG_WRITE_SIZE];
4147    tl_assert(size <= MAX_REG_WRITE_SIZE);
4148    VG_(memset)(area, V_BITS8_DEFINED, size);
4149    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4150 #  undef MAX_REG_WRITE_SIZE
4151 }
4152 
4153 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4154 void mc_post_reg_write_clientcall ( ThreadId tid,
4155                                     PtrdiffT offset, SizeT size, Addr f)
4156 {
4157    mc_post_reg_write(/*dummy*/0, tid, offset, size);
4158 }
4159 
4160 /* Look at the definedness of the guest's shadow state for
4161    [offset, offset+len).  If any part of that is undefined, record
4162    a parameter error.
4163 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4164 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4165                               PtrdiffT offset, SizeT size)
4166 {
4167    Int   i;
4168    Bool  bad;
4169    UInt  otag;
4170 
4171    UChar area[16];
4172    tl_assert(size <= 16);
4173 
4174    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4175 
4176    bad = False;
4177    for (i = 0; i < size; i++) {
4178       if (area[i] != V_BITS8_DEFINED) {
4179          bad = True;
4180          break;
4181       }
4182    }
4183 
4184    if (!bad)
4185       return;
4186 
4187    /* We've found some undefinedness.  See if we can also find an
4188       origin for it. */
4189    otag = mb_get_origin_for_guest_offset( tid, offset, size );
4190    MC_(record_regparam_error) ( tid, s, otag );
4191 }
4192 
4193 
4194 /*------------------------------------------------------------*/
4195 /*--- Functions called directly from generated code:       ---*/
4196 /*--- Load/store handlers.                                 ---*/
4197 /*------------------------------------------------------------*/
4198 
4199 /* Types:  LOADV32, LOADV16, LOADV8 are:
4200                UWord fn ( Addr a )
4201    so they return 32-bits on 32-bit machines and 64-bits on
4202    64-bit machines.  Addr has the same size as a host word.
4203 
4204    LOADV64 is always  ULong fn ( Addr a )
4205 
4206    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4207    are a UWord, and for STOREV64 they are a ULong.
4208 */
4209 
4210 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4211    naturally '_sz/8'-aligned, or it exceeds the range covered by the
4212    primary map.  This is all very tricky (and important!), so let's
4213    work through the maths by hand (below), *and* assert for these
4214    values at startup. */
4215 #define MASK(_szInBytes) \
4216    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4217 
4218 /* MASK only exists so as to define this macro. */
4219 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4220    ((_a) & MASK((_szInBits>>3)))
4221 
4222 /* On a 32-bit machine:
4223 
4224    N_PRIMARY_BITS          == 16, so
4225    N_PRIMARY_MAP           == 0x10000, so
4226    N_PRIMARY_MAP-1         == 0xFFFF, so
4227    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4228 
4229    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4230            = ~ ( 0xFFFF | 0xFFFF0000 )
4231            = ~ 0xFFFF'FFFF
4232            = 0
4233 
4234    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4235            = ~ ( 0xFFFE | 0xFFFF0000 )
4236            = ~ 0xFFFF'FFFE
4237            = 1
4238 
4239    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4240            = ~ ( 0xFFFC | 0xFFFF0000 )
4241            = ~ 0xFFFF'FFFC
4242            = 3
4243 
4244    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4245            = ~ ( 0xFFF8 | 0xFFFF0000 )
4246            = ~ 0xFFFF'FFF8
4247            = 7
4248 
4249    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4250    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4251    the 1-byte alignment case, it is always a zero value, since MASK(1)
4252    is zero.  All as expected.
4253 
4254    On a 64-bit machine, it's more complex, since we're testing
4255    simultaneously for misalignment and for the address being at or
4256    above 64G:
4257 
4258    N_PRIMARY_BITS          == 20, so
4259    N_PRIMARY_MAP           == 0x100000, so
4260    N_PRIMARY_MAP-1         == 0xFFFFF, so
4261    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4262 
4263    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4264            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4265            = ~ 0xF'FFFF'FFFF
4266            = 0xFFFF'FFF0'0000'0000
4267 
4268    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4269            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4270            = ~ 0xF'FFFF'FFFE
4271            = 0xFFFF'FFF0'0000'0001
4272 
4273    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4274            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4275            = ~ 0xF'FFFF'FFFC
4276            = 0xFFFF'FFF0'0000'0003
4277 
4278    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4279            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4280            = ~ 0xF'FFFF'FFF8
4281            = 0xFFFF'FFF0'0000'0007
4282 */
4283 
4284 
4285 /* ------------------------ Size = 16 ------------------------ */
4286 
4287 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4288 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4289                            Addr a, SizeT nBits, Bool isBigEndian )
4290 {
4291    PROF_EVENT(200, "mc_LOADV_128_or_256");
4292 
4293 #ifndef PERF_FAST_LOADV
4294    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4295    return;
4296 #else
4297    {
4298       UWord   sm_off16, vabits16, j;
4299       UWord   nBytes  = nBits / 8;
4300       UWord   nULongs = nBytes / 8;
4301       SecMap* sm;
4302 
4303       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4304          PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
4305          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4306          return;
4307       }
4308 
4309       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4310          suitably aligned, is mapped, and addressible. */
4311       for (j = 0; j < nULongs; j++) {
4312          sm       = get_secmap_for_reading_low(a + 8*j);
4313          sm_off16 = SM_OFF_16(a + 8*j);
4314          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4315 
4316          // Convert V bits from compact memory form to expanded
4317          // register form.
4318          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4319             res[j] = V_BITS64_DEFINED;
4320          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4321             res[j] = V_BITS64_UNDEFINED;
4322          } else {
4323             /* Slow case: some block of 8 bytes are not all-defined or
4324                all-undefined. */
4325             PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
4326             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4327             return;
4328          }
4329       }
4330       return;
4331    }
4332 #endif
4333 }
4334 
MC_(helperc_LOADV256be)4335 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4336 {
4337    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4338 }
MC_(helperc_LOADV256le)4339 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4340 {
4341    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4342 }
4343 
MC_(helperc_LOADV128be)4344 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4345 {
4346    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4347 }
MC_(helperc_LOADV128le)4348 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4349 {
4350    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4351 }
4352 
4353 /* ------------------------ Size = 8 ------------------------ */
4354 
4355 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4356 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4357 {
4358    PROF_EVENT(200, "mc_LOADV64");
4359 
4360 #ifndef PERF_FAST_LOADV
4361    return mc_LOADVn_slow( a, 64, isBigEndian );
4362 #else
4363    {
4364       UWord   sm_off16, vabits16;
4365       SecMap* sm;
4366 
4367       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4368          PROF_EVENT(201, "mc_LOADV64-slow1");
4369          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4370       }
4371 
4372       sm       = get_secmap_for_reading_low(a);
4373       sm_off16 = SM_OFF_16(a);
4374       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4375 
4376       // Handle common case quickly: a is suitably aligned, is mapped, and
4377       // addressible.
4378       // Convert V bits from compact memory form to expanded register form.
4379       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4380          return V_BITS64_DEFINED;
4381       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4382          return V_BITS64_UNDEFINED;
4383       } else {
4384          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4385          PROF_EVENT(202, "mc_LOADV64-slow2");
4386          return mc_LOADVn_slow( a, 64, isBigEndian );
4387       }
4388    }
4389 #endif
4390 }
4391 
MC_(helperc_LOADV64be)4392 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4393 {
4394    return mc_LOADV64(a, True);
4395 }
MC_(helperc_LOADV64le)4396 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4397 {
4398    return mc_LOADV64(a, False);
4399 }
4400 
4401 
4402 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4403 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4404 {
4405    PROF_EVENT(210, "mc_STOREV64");
4406 
4407 #ifndef PERF_FAST_STOREV
4408    // XXX: this slow case seems to be marginally faster than the fast case!
4409    // Investigate further.
4410    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4411 #else
4412    {
4413       UWord   sm_off16, vabits16;
4414       SecMap* sm;
4415 
4416       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4417          PROF_EVENT(211, "mc_STOREV64-slow1");
4418          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4419          return;
4420       }
4421 
4422       sm       = get_secmap_for_reading_low(a);
4423       sm_off16 = SM_OFF_16(a);
4424       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4425 
4426       // To understand the below cleverness, see the extensive comments
4427       // in MC_(helperc_STOREV8).
4428       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4429          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4430             return;
4431          }
4432          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4433             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4434             return;
4435          }
4436          PROF_EVENT(232, "mc_STOREV64-slow2");
4437          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4438          return;
4439       }
4440       if (V_BITS64_UNDEFINED == vbits64) {
4441          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4442             return;
4443          }
4444          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4445             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4446             return;
4447          }
4448          PROF_EVENT(232, "mc_STOREV64-slow3");
4449          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4450          return;
4451       }
4452 
4453       PROF_EVENT(212, "mc_STOREV64-slow4");
4454       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4455    }
4456 #endif
4457 }
4458 
MC_(helperc_STOREV64be)4459 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4460 {
4461    mc_STOREV64(a, vbits64, True);
4462 }
MC_(helperc_STOREV64le)4463 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4464 {
4465    mc_STOREV64(a, vbits64, False);
4466 }
4467 
4468 
4469 /* ------------------------ Size = 4 ------------------------ */
4470 
4471 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4472 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4473 {
4474    PROF_EVENT(220, "mc_LOADV32");
4475 
4476 #ifndef PERF_FAST_LOADV
4477    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4478 #else
4479    {
4480       UWord   sm_off, vabits8;
4481       SecMap* sm;
4482 
4483       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4484          PROF_EVENT(221, "mc_LOADV32-slow1");
4485          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4486       }
4487 
4488       sm      = get_secmap_for_reading_low(a);
4489       sm_off  = SM_OFF(a);
4490       vabits8 = sm->vabits8[sm_off];
4491 
4492       // Handle common case quickly: a is suitably aligned, is mapped, and the
4493       // entire word32 it lives in is addressible.
4494       // Convert V bits from compact memory form to expanded register form.
4495       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4496       // Almost certainly not necessary, but be paranoid.
4497       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4498          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4499       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4500          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4501       } else {
4502          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4503          PROF_EVENT(222, "mc_LOADV32-slow2");
4504          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4505       }
4506    }
4507 #endif
4508 }
4509 
MC_(helperc_LOADV32be)4510 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4511 {
4512    return mc_LOADV32(a, True);
4513 }
MC_(helperc_LOADV32le)4514 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4515 {
4516    return mc_LOADV32(a, False);
4517 }
4518 
4519 
4520 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4521 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4522 {
4523    PROF_EVENT(230, "mc_STOREV32");
4524 
4525 #ifndef PERF_FAST_STOREV
4526    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4527 #else
4528    {
4529       UWord   sm_off, vabits8;
4530       SecMap* sm;
4531 
4532       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4533          PROF_EVENT(231, "mc_STOREV32-slow1");
4534          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4535          return;
4536       }
4537 
4538       sm      = get_secmap_for_reading_low(a);
4539       sm_off  = SM_OFF(a);
4540       vabits8 = sm->vabits8[sm_off];
4541 
4542       // To understand the below cleverness, see the extensive comments
4543       // in MC_(helperc_STOREV8).
4544       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4545          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4546             return;
4547          }
4548          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
4549             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4550             return;
4551          }
4552          PROF_EVENT(232, "mc_STOREV32-slow2");
4553          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4554          return;
4555       }
4556       if (V_BITS32_UNDEFINED == vbits32) {
4557          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4558             return;
4559          }
4560          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4561             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4562             return;
4563          }
4564          PROF_EVENT(233, "mc_STOREV32-slow3");
4565          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4566          return;
4567       }
4568 
4569       PROF_EVENT(234, "mc_STOREV32-slow4");
4570       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4571    }
4572 #endif
4573 }
4574 
MC_(helperc_STOREV32be)4575 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4576 {
4577    mc_STOREV32(a, vbits32, True);
4578 }
MC_(helperc_STOREV32le)4579 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4580 {
4581    mc_STOREV32(a, vbits32, False);
4582 }
4583 
4584 
4585 /* ------------------------ Size = 2 ------------------------ */
4586 
4587 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4588 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4589 {
4590    PROF_EVENT(240, "mc_LOADV16");
4591 
4592 #ifndef PERF_FAST_LOADV
4593    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4594 #else
4595    {
4596       UWord   sm_off, vabits8;
4597       SecMap* sm;
4598 
4599       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4600          PROF_EVENT(241, "mc_LOADV16-slow1");
4601          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4602       }
4603 
4604       sm      = get_secmap_for_reading_low(a);
4605       sm_off  = SM_OFF(a);
4606       vabits8 = sm->vabits8[sm_off];
4607       // Handle common case quickly: a is suitably aligned, is mapped, and is
4608       // addressible.
4609       // Convert V bits from compact memory form to expanded register form
4610       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4611       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4612       else {
4613          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4614          // the two sub-bytes.
4615          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4616          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4617          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4618          else {
4619             /* Slow case: the two bytes are not all-defined or all-undefined. */
4620             PROF_EVENT(242, "mc_LOADV16-slow2");
4621             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4622          }
4623       }
4624    }
4625 #endif
4626 }
4627 
MC_(helperc_LOADV16be)4628 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4629 {
4630    return mc_LOADV16(a, True);
4631 }
MC_(helperc_LOADV16le)4632 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4633 {
4634    return mc_LOADV16(a, False);
4635 }
4636 
4637 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
4638 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)4639 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
4640 {
4641    UInt shift;
4642    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
4643    shift = (a & 2) << 1;               // shift by 0 or 4
4644    vabits8 >>= shift;                  // shift the four bits to the bottom
4645     // check 2 x vabits2 != VA_BITS2_NOACCESS
4646    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
4647       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
4648 }
4649 
4650 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4651 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4652 {
4653    PROF_EVENT(250, "mc_STOREV16");
4654 
4655 #ifndef PERF_FAST_STOREV
4656    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4657 #else
4658    {
4659       UWord   sm_off, vabits8;
4660       SecMap* sm;
4661 
4662       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4663          PROF_EVENT(251, "mc_STOREV16-slow1");
4664          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4665          return;
4666       }
4667 
4668       sm      = get_secmap_for_reading_low(a);
4669       sm_off  = SM_OFF(a);
4670       vabits8 = sm->vabits8[sm_off];
4671 
4672       // To understand the below cleverness, see the extensive comments
4673       // in MC_(helperc_STOREV8).
4674       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
4675          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4676             return;
4677          }
4678          if (!is_distinguished_sm(sm)
4679              && accessible_vabits4_in_vabits8(a, vabits8)) {
4680             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
4681                                          &(sm->vabits8[sm_off]) );
4682             return;
4683          }
4684          PROF_EVENT(232, "mc_STOREV16-slow2");
4685          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4686       }
4687       if (V_BITS16_UNDEFINED == vbits16) {
4688          if (vabits8 == VA_BITS8_UNDEFINED) {
4689             return;
4690          }
4691          if (!is_distinguished_sm(sm)
4692              && accessible_vabits4_in_vabits8(a, vabits8)) {
4693             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4694                                          &(sm->vabits8[sm_off]) );
4695             return;
4696          }
4697          PROF_EVENT(233, "mc_STOREV16-slow3");
4698          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4699          return;
4700       }
4701 
4702       PROF_EVENT(234, "mc_STOREV16-slow4");
4703       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4704    }
4705 #endif
4706 }
4707 
MC_(helperc_STOREV16be)4708 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4709 {
4710    mc_STOREV16(a, vbits16, True);
4711 }
MC_(helperc_STOREV16le)4712 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4713 {
4714    mc_STOREV16(a, vbits16, False);
4715 }
4716 
4717 
4718 /* ------------------------ Size = 1 ------------------------ */
4719 /* Note: endianness is irrelevant for size == 1 */
4720 
4721 VG_REGPARM(1)
MC_(helperc_LOADV8)4722 UWord MC_(helperc_LOADV8) ( Addr a )
4723 {
4724    PROF_EVENT(260, "mc_LOADV8");
4725 
4726 #ifndef PERF_FAST_LOADV
4727    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4728 #else
4729    {
4730       UWord   sm_off, vabits8;
4731       SecMap* sm;
4732 
4733       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4734          PROF_EVENT(261, "mc_LOADV8-slow1");
4735          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4736       }
4737 
4738       sm      = get_secmap_for_reading_low(a);
4739       sm_off  = SM_OFF(a);
4740       vabits8 = sm->vabits8[sm_off];
4741       // Convert V bits from compact memory form to expanded register form
4742       // Handle common case quickly: a is mapped, and the entire
4743       // word32 it lives in is addressible.
4744       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4745       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4746       else {
4747          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4748          // the single byte.
4749          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4750          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4751          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4752          else {
4753             /* Slow case: the byte is not all-defined or all-undefined. */
4754             PROF_EVENT(262, "mc_LOADV8-slow2");
4755             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4756          }
4757       }
4758    }
4759 #endif
4760 }
4761 
4762 
4763 VG_REGPARM(2)
MC_(helperc_STOREV8)4764 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4765 {
4766    PROF_EVENT(270, "mc_STOREV8");
4767 
4768 #ifndef PERF_FAST_STOREV
4769    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4770 #else
4771    {
4772       UWord   sm_off, vabits8;
4773       SecMap* sm;
4774 
4775       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4776          PROF_EVENT(271, "mc_STOREV8-slow1");
4777          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4778          return;
4779       }
4780 
4781       sm      = get_secmap_for_reading_low(a);
4782       sm_off  = SM_OFF(a);
4783       vabits8 = sm->vabits8[sm_off];
4784 
4785       // Clevernesses to speed up storing V bits.
4786       // The 64/32/16 bit cases also have similar clevernesses, but it
4787       // works a little differently to the code below.
4788       //
4789       // Cleverness 1:  sometimes we don't have to write the shadow memory at
4790       // all, if we can tell that what we want to write is the same as what is
4791       // already there. These cases are marked below as "defined on defined" and
4792       // "undefined on undefined".
4793       //
4794       // Cleverness 2:
4795       // We also avoid to call mc_STOREVn_slow if the V bits can directly
4796       // be written in the secondary map. V bits can be directly written
4797       // if 4 conditions are respected:
4798       //   * The address for which V bits are written is naturally aligned
4799       //        on 1 byte  for STOREV8 (this is always true)
4800       //        on 2 bytes for STOREV16
4801       //        on 4 bytes for STOREV32
4802       //        on 8 bytes for STOREV64.
4803       //   * V bits being written are either fully defined or fully undefined.
4804       //     (for partially defined V bits, V bits cannot be directly written,
4805       //      as the secondary vbits table must be maintained).
4806       //   * the secmap is not distinguished (distinguished maps cannot be
4807       //     modified).
4808       //   * the memory corresponding to the V bits being written is
4809       //     accessible (if one or more bytes are not accessible,
4810       //     we must call mc_STOREVn_slow in order to report accessibility
4811       //     errors).
4812       //     Note that for STOREV32 and STOREV64, it is too expensive
4813       //     to verify the accessibility of each byte for the benefit it
4814       //     brings. Instead, a quicker check is done by comparing to
4815       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
4816       //     but misses some opportunity of direct modifications.
4817       //     Checking each byte accessibility was measured for
4818       //     STOREV32+perf tests and was slowing down all perf tests.
4819       // The cases corresponding to cleverness 2 are marked below as
4820       // "direct mod".
4821       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
4822          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4823             return; // defined on defined
4824          }
4825          if (!is_distinguished_sm(sm)
4826              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
4827             // direct mod
4828             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4829                                          &(sm->vabits8[sm_off]) );
4830             return;
4831          }
4832          PROF_EVENT(232, "mc_STOREV8-slow2");
4833          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4834          return;
4835       }
4836       if (V_BITS8_UNDEFINED == vbits8) {
4837          if (vabits8 == VA_BITS8_UNDEFINED) {
4838             return; // undefined on undefined
4839          }
4840          if (!is_distinguished_sm(sm)
4841              && (VA_BITS2_NOACCESS
4842                  != extract_vabits2_from_vabits8(a, vabits8))) {
4843             // direct mod
4844             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4845                                          &(sm->vabits8[sm_off]) );
4846             return;
4847          }
4848          PROF_EVENT(233, "mc_STOREV8-slow3");
4849          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4850          return;
4851       }
4852 
4853       // Partially defined word
4854       PROF_EVENT(234, "mc_STOREV8-slow4");
4855       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4856    }
4857 #endif
4858 }
4859 
4860 
4861 /*------------------------------------------------------------*/
4862 /*--- Functions called directly from generated code:       ---*/
4863 /*--- Value-check failure handlers.                        ---*/
4864 /*------------------------------------------------------------*/
4865 
4866 /* Call these ones when an origin is available ... */
4867 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4868 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4869    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4870 }
4871 
4872 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4873 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4874    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4875 }
4876 
4877 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4878 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4879    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4880 }
4881 
4882 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4883 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4884    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4885 }
4886 
4887 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4888 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4889    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4890 }
4891 
4892 /* ... and these when an origin isn't available. */
4893 
4894 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4895 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4896    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4897 }
4898 
4899 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4900 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4901    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4902 }
4903 
4904 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4905 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4906    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4907 }
4908 
4909 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4910 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4911    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4912 }
4913 
4914 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4915 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4916    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4917 }
4918 
4919 
4920 /*------------------------------------------------------------*/
4921 /*--- Metadata get/set functions, for client requests.     ---*/
4922 /*------------------------------------------------------------*/
4923 
4924 // Nb: this expands the V+A bits out into register-form V bits, even though
4925 // they're in memory.  This is for backward compatibility, and because it's
4926 // probably what the user wants.
4927 
4928 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4929    error [no longer used], 3 == addressing error. */
4930 /* Nb: We used to issue various definedness/addressability errors from here,
4931    but we took them out because they ranged from not-very-helpful to
4932    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4933 static Int mc_get_or_set_vbits_for_client (
4934    Addr a,
4935    Addr vbits,
4936    SizeT szB,
4937    Bool setting, /* True <=> set vbits,  False <=> get vbits */
4938    Bool is_client_request /* True <=> real user request
4939                              False <=> internal call from gdbserver */
4940 )
4941 {
4942    SizeT i;
4943    Bool  ok;
4944    UChar vbits8;
4945 
4946    /* Check that arrays are addressible before doing any getting/setting.
4947       vbits to be checked only for real user request. */
4948    for (i = 0; i < szB; i++) {
4949       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4950           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4951          return 3;
4952       }
4953    }
4954 
4955    /* Do the copy */
4956    if (setting) {
4957       /* setting */
4958       for (i = 0; i < szB; i++) {
4959          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4960          tl_assert(ok);
4961       }
4962    } else {
4963       /* getting */
4964       for (i = 0; i < szB; i++) {
4965          ok = get_vbits8(a + i, &vbits8);
4966          tl_assert(ok);
4967          ((UChar*)vbits)[i] = vbits8;
4968       }
4969       if (is_client_request)
4970         // The bytes in vbits[] have now been set, so mark them as such.
4971         MC_(make_mem_defined)(vbits, szB);
4972    }
4973 
4974    return 1;
4975 }
4976 
4977 
4978 /*------------------------------------------------------------*/
4979 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4980 /*------------------------------------------------------------*/
4981 
4982 /* For the memory leak detector, say whether an entire 64k chunk of
4983    address space is possibly in use, or not.  If in doubt return
4984    True.
4985 */
MC_(is_within_valid_secondary)4986 Bool MC_(is_within_valid_secondary) ( Addr a )
4987 {
4988    SecMap* sm = maybe_get_secmap_for ( a );
4989    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4990       /* Definitely not in use. */
4991       return False;
4992    } else {
4993       return True;
4994    }
4995 }
4996 
4997 
4998 /* For the memory leak detector, say whether or not a given word
4999    address is to be regarded as valid. */
MC_(is_valid_aligned_word)5000 Bool MC_(is_valid_aligned_word) ( Addr a )
5001 {
5002    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5003    tl_assert(VG_IS_WORD_ALIGNED(a));
5004    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5005       return False;
5006    if (sizeof(UWord) == 8) {
5007       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5008          return False;
5009    }
5010    if (UNLIKELY(MC_(in_ignored_range)(a)))
5011       return False;
5012    else
5013       return True;
5014 }
5015 
5016 
5017 /*------------------------------------------------------------*/
5018 /*--- Initialisation                                       ---*/
5019 /*------------------------------------------------------------*/
5020 
init_shadow_memory(void)5021 static void init_shadow_memory ( void )
5022 {
5023    Int     i;
5024    SecMap* sm;
5025 
5026    tl_assert(V_BIT_UNDEFINED   == 1);
5027    tl_assert(V_BIT_DEFINED     == 0);
5028    tl_assert(V_BITS8_UNDEFINED == 0xFF);
5029    tl_assert(V_BITS8_DEFINED   == 0);
5030 
5031    /* Build the 3 distinguished secondaries */
5032    sm = &sm_distinguished[SM_DIST_NOACCESS];
5033    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5034 
5035    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5036    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5037 
5038    sm = &sm_distinguished[SM_DIST_DEFINED];
5039    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5040 
5041    /* Set up the primary map. */
5042    /* These entries gradually get overwritten as the used address
5043       space expands. */
5044    for (i = 0; i < N_PRIMARY_MAP; i++)
5045       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5046 
5047    /* Auxiliary primary maps */
5048    init_auxmap_L1_L2();
5049 
5050    /* auxmap_size = auxmap_used = 0;
5051       no ... these are statically initialised */
5052 
5053    /* Secondary V bit table */
5054    secVBitTable = createSecVBitTable();
5055 }
5056 
5057 
5058 /*------------------------------------------------------------*/
5059 /*--- Sanity check machinery (permanently engaged)         ---*/
5060 /*------------------------------------------------------------*/
5061 
mc_cheap_sanity_check(void)5062 static Bool mc_cheap_sanity_check ( void )
5063 {
5064    n_sanity_cheap++;
5065    PROF_EVENT(490, "cheap_sanity_check");
5066    /* Check for sane operating level */
5067    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5068       return False;
5069    /* nothing else useful we can rapidly check */
5070    return True;
5071 }
5072 
mc_expensive_sanity_check(void)5073 static Bool mc_expensive_sanity_check ( void )
5074 {
5075    Int     i;
5076    Word    n_secmaps_found;
5077    SecMap* sm;
5078    const HChar*  errmsg;
5079    Bool    bad = False;
5080 
5081    if (0) VG_(printf)("expensive sanity check\n");
5082    if (0) return True;
5083 
5084    n_sanity_expensive++;
5085    PROF_EVENT(491, "expensive_sanity_check");
5086 
5087    /* Check for sane operating level */
5088    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5089       return False;
5090 
5091    /* Check that the 3 distinguished SMs are still as they should be. */
5092 
5093    /* Check noaccess DSM. */
5094    sm = &sm_distinguished[SM_DIST_NOACCESS];
5095    for (i = 0; i < SM_CHUNKS; i++)
5096       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5097          bad = True;
5098 
5099    /* Check undefined DSM. */
5100    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5101    for (i = 0; i < SM_CHUNKS; i++)
5102       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5103          bad = True;
5104 
5105    /* Check defined DSM. */
5106    sm = &sm_distinguished[SM_DIST_DEFINED];
5107    for (i = 0; i < SM_CHUNKS; i++)
5108       if (sm->vabits8[i] != VA_BITS8_DEFINED)
5109          bad = True;
5110 
5111    if (bad) {
5112       VG_(printf)("memcheck expensive sanity: "
5113                   "distinguished_secondaries have changed\n");
5114       return False;
5115    }
5116 
5117    /* If we're not checking for undefined value errors, the secondary V bit
5118     * table should be empty. */
5119    if (MC_(clo_mc_level) == 1) {
5120       if (0 != VG_(OSetGen_Size)(secVBitTable))
5121          return False;
5122    }
5123 
5124    /* check the auxiliary maps, very thoroughly */
5125    n_secmaps_found = 0;
5126    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5127    if (errmsg) {
5128       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5129       return False;
5130    }
5131 
5132    /* n_secmaps_found is now the number referred to by the auxiliary
5133       primary map.  Now add on the ones referred to by the main
5134       primary map. */
5135    for (i = 0; i < N_PRIMARY_MAP; i++) {
5136       if (primary_map[i] == NULL) {
5137          bad = True;
5138       } else {
5139          if (!is_distinguished_sm(primary_map[i]))
5140             n_secmaps_found++;
5141       }
5142    }
5143 
5144    /* check that the number of secmaps issued matches the number that
5145       are reachable (iow, no secmap leaks) */
5146    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5147       bad = True;
5148 
5149    if (bad) {
5150       VG_(printf)("memcheck expensive sanity: "
5151                   "apparent secmap leakage\n");
5152       return False;
5153    }
5154 
5155    if (bad) {
5156       VG_(printf)("memcheck expensive sanity: "
5157                   "auxmap covers wrong address space\n");
5158       return False;
5159    }
5160 
5161    /* there is only one pointer to each secmap (expensive) */
5162 
5163    return True;
5164 }
5165 
5166 /*------------------------------------------------------------*/
5167 /*--- Command line args                                    ---*/
5168 /*------------------------------------------------------------*/
5169 
5170 /* --partial-loads-ok: enable by default on MacOS.  The MacOS system
5171    graphics libraries are heavily vectorised, and not enabling this by
5172    default causes lots of false errors. */
5173 #if defined(VGO_darwin)
5174 Bool          MC_(clo_partial_loads_ok)       = True;
5175 #else
5176 Bool          MC_(clo_partial_loads_ok)       = False;
5177 #endif
5178 
5179 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
5180 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
5181 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
5182 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
5183 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
5184 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
5185 UInt          MC_(clo_leak_check_heuristics)  = 0;
5186 Bool          MC_(clo_workaround_gcc296_bugs) = False;
5187 Int           MC_(clo_malloc_fill)            = -1;
5188 Int           MC_(clo_free_fill)              = -1;
5189 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
5190 Int           MC_(clo_mc_level)               = 2;
5191 Bool          MC_(clo_show_mismatched_frees)  = True;
5192 
5193 static const HChar * MC_(parse_leak_heuristics_tokens) =
5194    "-,stdstring,length64,newarray,multipleinheritance";
5195 /* The first heuristic value (LchNone) has no keyword, as this is
5196    a fake heuristic used to collect the blocks found without any
5197    heuristic. */
5198 
mc_process_cmd_line_options(const HChar * arg)5199 static Bool mc_process_cmd_line_options(const HChar* arg)
5200 {
5201    const HChar* tmp_str;
5202    Int   tmp_show;
5203 
5204    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5205 
5206    /* Set MC_(clo_mc_level):
5207          1 = A bit tracking only
5208          2 = A and V bit tracking, but no V bit origins
5209          3 = A and V bit tracking, and V bit origins
5210 
5211       Do this by inspecting --undef-value-errors= and
5212       --track-origins=.  Reject the case --undef-value-errors=no
5213       --track-origins=yes as meaningless.
5214    */
5215    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5216       if (MC_(clo_mc_level) == 3) {
5217          goto bad_level;
5218       } else {
5219          MC_(clo_mc_level) = 1;
5220          return True;
5221       }
5222    }
5223    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5224       if (MC_(clo_mc_level) == 1)
5225          MC_(clo_mc_level) = 2;
5226       return True;
5227    }
5228    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5229       if (MC_(clo_mc_level) == 3)
5230          MC_(clo_mc_level) = 2;
5231       return True;
5232    }
5233    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5234       if (MC_(clo_mc_level) == 1) {
5235          goto bad_level;
5236       } else {
5237          MC_(clo_mc_level) = 3;
5238          return True;
5239       }
5240    }
5241 
5242         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5243    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5244                        MC_(parse_leak_kinds_tokens),
5245                        MC_(clo_error_for_leak_kinds)) {}
5246    else if VG_USET_CLO(arg, "--show-leak-kinds",
5247                        MC_(parse_leak_kinds_tokens),
5248                        MC_(clo_show_leak_kinds)) {}
5249    else if VG_USET_CLO(arg, "--leak-check-heuristics",
5250                        MC_(parse_leak_heuristics_tokens),
5251                        MC_(clo_leak_check_heuristics)) {}
5252    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5253       if (tmp_show) {
5254          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5255       } else {
5256          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5257       }
5258    }
5259    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5260       if (tmp_show) {
5261          MC_(clo_show_leak_kinds) |= R2S(Possible);
5262       } else {
5263          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5264       }
5265    }
5266    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5267                                             MC_(clo_workaround_gcc296_bugs)) {}
5268 
5269    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
5270                                                0, 10*1000*1000*1000LL) {}
5271 
5272    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5273                        MC_(clo_freelist_big_blocks),
5274                        0, 10*1000*1000*1000LL) {}
5275 
5276    else if VG_XACT_CLO(arg, "--leak-check=no",
5277                             MC_(clo_leak_check), LC_Off) {}
5278    else if VG_XACT_CLO(arg, "--leak-check=summary",
5279                             MC_(clo_leak_check), LC_Summary) {}
5280    else if VG_XACT_CLO(arg, "--leak-check=yes",
5281                             MC_(clo_leak_check), LC_Full) {}
5282    else if VG_XACT_CLO(arg, "--leak-check=full",
5283                             MC_(clo_leak_check), LC_Full) {}
5284 
5285    else if VG_XACT_CLO(arg, "--leak-resolution=low",
5286                             MC_(clo_leak_resolution), Vg_LowRes) {}
5287    else if VG_XACT_CLO(arg, "--leak-resolution=med",
5288                             MC_(clo_leak_resolution), Vg_MedRes) {}
5289    else if VG_XACT_CLO(arg, "--leak-resolution=high",
5290                             MC_(clo_leak_resolution), Vg_HighRes) {}
5291 
5292    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5293       Bool ok = parse_ignore_ranges(tmp_str);
5294       if (!ok) {
5295          VG_(message)(Vg_DebugMsg,
5296             "ERROR: --ignore-ranges: "
5297             "invalid syntax, or end <= start in range\n");
5298          return False;
5299       }
5300       if (gIgnoredAddressRanges) {
5301          Word i;
5302          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5303             UWord val     = IAR_INVALID;
5304             UWord key_min = ~(UWord)0;
5305             UWord key_max = (UWord)0;
5306             VG_(indexRangeMap)( &key_min, &key_max, &val,
5307                                 gIgnoredAddressRanges, i );
5308             tl_assert(key_min <= key_max);
5309             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5310             if (key_max - key_min > limit) {
5311                VG_(message)(Vg_DebugMsg,
5312                   "ERROR: --ignore-ranges: suspiciously large range:\n");
5313                VG_(message)(Vg_DebugMsg,
5314                    "       0x%lx-0x%lx (size %ld)\n", key_min, key_max,
5315                    key_max - key_min + 1);
5316                return False;
5317             }
5318          }
5319       }
5320    }
5321 
5322    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5323    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
5324 
5325    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5326                        MC_(clo_keep_stacktraces), KS_alloc) {}
5327    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5328                        MC_(clo_keep_stacktraces), KS_free) {}
5329    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5330                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5331    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5332                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5333    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5334                        MC_(clo_keep_stacktraces), KS_none) {}
5335 
5336    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5337                        MC_(clo_show_mismatched_frees)) {}
5338 
5339    else
5340       return VG_(replacement_malloc_process_cmd_line_option)(arg);
5341 
5342    return True;
5343 
5344 
5345   bad_level:
5346    VG_(fmsg_bad_option)(arg,
5347       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5348 }
5349 
mc_print_usage(void)5350 static void mc_print_usage(void)
5351 {
5352    const HChar* plo_default = "no";
5353 #  if defined(VGO_darwin)
5354    plo_default = "yes";
5355 #  endif
5356 
5357    VG_(printf)(
5358 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
5359 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
5360 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5361 "                                            [definite,possible]\n"
5362 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
5363 "                                            [definite,possible]\n"
5364 "        where kind is one of:\n"
5365 "          definite indirect possible reachable all none\n"
5366 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5367 "        improving leak search false positive [none]\n"
5368 "        where heur is one of:\n"
5369 "          stdstring length64 newarray multipleinheritance all none\n"
5370 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
5371 "    --show-reachable=no --show-possibly-lost=yes\n"
5372 "                                     same as --show-leak-kinds=definite,possible\n"
5373 "    --show-reachable=no --show-possibly-lost=no\n"
5374 "                                     same as --show-leak-kinds=definite\n"
5375 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
5376 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
5377 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [%s]\n"
5378 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
5379 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
5380 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
5381 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
5382 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
5383 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
5384 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5385 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
5386 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
5387 , plo_default
5388    );
5389 }
5390 
mc_print_debug_usage(void)5391 static void mc_print_debug_usage(void)
5392 {
5393    VG_(printf)(
5394 "    (none)\n"
5395    );
5396 }
5397 
5398 
5399 /*------------------------------------------------------------*/
5400 /*--- Client blocks                                        ---*/
5401 /*------------------------------------------------------------*/
5402 
5403 /* Client block management:
5404 
5405    This is managed as an expanding array of client block descriptors.
5406    Indices of live descriptors are issued to the client, so it can ask
5407    to free them later.  Therefore we cannot slide live entries down
5408    over dead ones.  Instead we must use free/inuse flags and scan for
5409    an empty slot at allocation time.  This in turn means allocation is
5410    relatively expensive, so we hope this does not happen too often.
5411 
5412    An unused block has start == size == 0
5413 */
5414 
5415 /* type CGenBlock is defined in mc_include.h */
5416 
5417 /* This subsystem is self-initialising. */
5418 static UWord      cgb_size = 0;
5419 static UWord      cgb_used = 0;
5420 static CGenBlock* cgbs     = NULL;
5421 
5422 /* Stats for this subsystem. */
5423 static ULong cgb_used_MAX = 0;   /* Max in use. */
5424 static ULong cgb_allocs   = 0;   /* Number of allocs. */
5425 static ULong cgb_discards = 0;   /* Number of discards. */
5426 static ULong cgb_search   = 0;   /* Number of searches. */
5427 
5428 
5429 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5430 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5431                                  /*OUT*/UWord* nBlocks )
5432 {
5433    *blocks  = cgbs;
5434    *nBlocks = cgb_used;
5435 }
5436 
5437 
5438 static
alloc_client_block(void)5439 Int alloc_client_block ( void )
5440 {
5441    UWord      i, sz_new;
5442    CGenBlock* cgbs_new;
5443 
5444    cgb_allocs++;
5445 
5446    for (i = 0; i < cgb_used; i++) {
5447       cgb_search++;
5448       if (cgbs[i].start == 0 && cgbs[i].size == 0)
5449          return i;
5450    }
5451 
5452    /* Not found.  Try to allocate one at the end. */
5453    if (cgb_used < cgb_size) {
5454       cgb_used++;
5455       return cgb_used-1;
5456    }
5457 
5458    /* Ok, we have to allocate a new one. */
5459    tl_assert(cgb_used == cgb_size);
5460    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5461 
5462    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5463    for (i = 0; i < cgb_used; i++)
5464       cgbs_new[i] = cgbs[i];
5465 
5466    if (cgbs != NULL)
5467       VG_(free)( cgbs );
5468    cgbs = cgbs_new;
5469 
5470    cgb_size = sz_new;
5471    cgb_used++;
5472    if (cgb_used > cgb_used_MAX)
5473       cgb_used_MAX = cgb_used;
5474    return cgb_used-1;
5475 }
5476 
5477 
show_client_block_stats(void)5478 static void show_client_block_stats ( void )
5479 {
5480    VG_(message)(Vg_DebugMsg,
5481       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5482       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5483    );
5484 }
print_monitor_help(void)5485 static void print_monitor_help ( void )
5486 {
5487    VG_(gdb_printf)
5488       (
5489 "\n"
5490 "memcheck monitor commands:\n"
5491 "  get_vbits <addr> [<len>]\n"
5492 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
5493 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5494 "        Example: get_vbits 0x8049c78 10\n"
5495 "  make_memory [noaccess|undefined\n"
5496 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
5497 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5498 "  check_memory [addressable|defined] <addr> [<len>]\n"
5499 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5500 "            and outputs a description of <addr>\n"
5501 "  leak_check [full*|summary]\n"
5502 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
5503 "                [heuristics heur1,heur2,...]\n"
5504 "                [increased*|changed|any]\n"
5505 "                [unlimited*|limited <max_loss_records_output>]\n"
5506 "            * = defaults\n"
5507 "       where kind is one of:\n"
5508 "         definite indirect possible reachable all none\n"
5509 "       where heur is one of:\n"
5510 "         stdstring length64 newarray multipleinheritance all none*\n"
5511 "       Examples: leak_check\n"
5512 "                 leak_check summary any\n"
5513 "                 leak_check full kinds indirect,possible\n"
5514 "                 leak_check full reachable any limited 100\n"
5515 "  block_list <loss_record_nr>\n"
5516 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
5517 "  who_points_at <addr> [<len>]\n"
5518 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
5519 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5520 "         with len > 1, will also show \"interior pointers\")\n"
5521 "\n");
5522 }
5523 
5524 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)5525 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
5526 {
5527    HChar* wcmd;
5528    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
5529    HChar *ssaveptr;
5530 
5531    VG_(strcpy) (s, req);
5532 
5533    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5534    /* NB: if possible, avoid introducing a new command below which
5535       starts with the same first letter(s) as an already existing
5536       command. This ensures a shorter abbreviation for the user. */
5537    switch (VG_(keyword_id)
5538            ("help get_vbits leak_check make_memory check_memory "
5539             "block_list who_points_at",
5540             wcmd, kwd_report_duplicated_matches)) {
5541    case -2: /* multiple matches */
5542       return True;
5543    case -1: /* not found */
5544       return False;
5545    case  0: /* help */
5546       print_monitor_help();
5547       return True;
5548    case  1: { /* get_vbits */
5549       Addr address;
5550       SizeT szB = 1;
5551       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
5552          UChar vbits;
5553          Int i;
5554          Int unaddressable = 0;
5555          for (i = 0; i < szB; i++) {
5556             Int res = mc_get_or_set_vbits_for_client
5557                (address+i, (Addr) &vbits, 1,
5558                 False, /* get them */
5559                 False  /* is client request */ );
5560             /* we are before the first character on next line, print a \n. */
5561             if ((i % 32) == 0 && i != 0)
5562                VG_(printf) ("\n");
5563             /* we are before the next block of 4 starts, print a space. */
5564             else if ((i % 4) == 0 && i != 0)
5565                VG_(printf) (" ");
5566             if (res == 1) {
5567                VG_(printf) ("%02x", vbits);
5568             } else {
5569                tl_assert(3 == res);
5570                unaddressable++;
5571                VG_(printf) ("__");
5572             }
5573          }
5574          VG_(printf) ("\n");
5575          if (unaddressable) {
5576             VG_(printf)
5577                ("Address %p len %ld has %d bytes unaddressable\n",
5578                 (void *)address, szB, unaddressable);
5579          }
5580       }
5581       return True;
5582    }
5583    case  2: { /* leak_check */
5584       Int err = 0;
5585       LeakCheckParams lcp;
5586       HChar* kw;
5587 
5588       lcp.mode               = LC_Full;
5589       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
5590       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
5591       lcp.heuristics         = 0;
5592       lcp.deltamode          = LCD_Increased;
5593       lcp.max_loss_records_output = 999999999;
5594       lcp.requested_by_monitor_command = True;
5595 
5596       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5597            kw != NULL;
5598            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5599          switch (VG_(keyword_id)
5600                  ("full summary "
5601                   "kinds reachable possibleleak definiteleak "
5602                   "heuristics "
5603                   "increased changed any "
5604                   "unlimited limited ",
5605                   kw, kwd_report_all)) {
5606          case -2: err++; break;
5607          case -1: err++; break;
5608          case  0: /* full */
5609             lcp.mode = LC_Full; break;
5610          case  1: /* summary */
5611             lcp.mode = LC_Summary; break;
5612          case  2: { /* kinds */
5613             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5614             if (wcmd == NULL
5615                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
5616                                         True/*allow_all*/,
5617                                         wcmd,
5618                                         &lcp.show_leak_kinds)) {
5619                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
5620                err++;
5621             }
5622             break;
5623          }
5624          case  3: /* reachable */
5625             lcp.show_leak_kinds = MC_(all_Reachedness)();
5626             break;
5627          case  4: /* possibleleak */
5628             lcp.show_leak_kinds
5629                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
5630             break;
5631          case  5: /* definiteleak */
5632             lcp.show_leak_kinds = R2S(Unreached);
5633             break;
5634          case  6: { /* heuristics */
5635             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5636             if (wcmd == NULL
5637                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
5638                                         True,/*allow_all*/
5639                                         wcmd,
5640                                         &lcp.heuristics)) {
5641                VG_(gdb_printf) ("missing or malformed heuristics set\n");
5642                err++;
5643             }
5644             break;
5645          }
5646          case  7: /* increased */
5647             lcp.deltamode = LCD_Increased; break;
5648          case  8: /* changed */
5649             lcp.deltamode = LCD_Changed; break;
5650          case  9: /* any */
5651             lcp.deltamode = LCD_Any; break;
5652          case 10: /* unlimited */
5653             lcp.max_loss_records_output = 999999999; break;
5654          case 11: { /* limited */
5655             Int int_value;
5656             const HChar* endptr;
5657 
5658             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5659             if (wcmd == NULL) {
5660                int_value = 0;
5661                endptr = "empty"; /* to report an error below */
5662             } else {
5663                HChar *the_end;
5664                int_value = VG_(strtoll10) (wcmd, &the_end);
5665                endptr = the_end;
5666             }
5667             if (*endptr != '\0')
5668                VG_(gdb_printf) ("missing or malformed integer value\n");
5669             else if (int_value > 0)
5670                lcp.max_loss_records_output = (UInt) int_value;
5671             else
5672                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5673                                 int_value);
5674             break;
5675          }
5676          default:
5677             tl_assert (0);
5678          }
5679       }
5680       if (!err)
5681          MC_(detect_memory_leaks)(tid, &lcp);
5682       return True;
5683    }
5684 
5685    case  3: { /* make_memory */
5686       Addr address;
5687       SizeT szB = 1;
5688       Int kwdid = VG_(keyword_id)
5689          ("noaccess undefined defined Definedifaddressable",
5690           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5691       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5692          return True;
5693       switch (kwdid) {
5694       case -2: break;
5695       case -1: break;
5696       case  0: MC_(make_mem_noaccess) (address, szB); break;
5697       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5698                                                     MC_OKIND_USER ); break;
5699       case  2: MC_(make_mem_defined) ( address, szB ); break;
5700       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5701       default: tl_assert(0);
5702       }
5703       return True;
5704    }
5705 
5706    case  4: { /* check_memory */
5707       Addr address;
5708       SizeT szB = 1;
5709       Addr bad_addr;
5710       UInt okind;
5711       const HChar* src;
5712       UInt otag;
5713       UInt ecu;
5714       ExeContext* origin_ec;
5715       MC_ReadResult res;
5716 
5717       Int kwdid = VG_(keyword_id)
5718          ("addressable defined",
5719           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5720       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5721          return True;
5722       switch (kwdid) {
5723       case -2: break;
5724       case -1: break;
5725       case  0: /* addressable */
5726          if (is_mem_addressable ( address, szB, &bad_addr ))
5727             VG_(printf) ("Address %p len %ld addressable\n",
5728                              (void *)address, szB);
5729          else
5730             VG_(printf)
5731                ("Address %p len %ld not addressable:\nbad address %p\n",
5732                 (void *)address, szB, (void *) bad_addr);
5733          MC_(pp_describe_addr) (address);
5734          break;
5735       case  1: /* defined */
5736          res = is_mem_defined ( address, szB, &bad_addr, &otag );
5737          if (MC_AddrErr == res)
5738             VG_(printf)
5739                ("Address %p len %ld not addressable:\nbad address %p\n",
5740                 (void *)address, szB, (void *) bad_addr);
5741          else if (MC_ValueErr == res) {
5742             okind = otag & 3;
5743             switch (okind) {
5744             case MC_OKIND_STACK:
5745                src = " was created by a stack allocation"; break;
5746             case MC_OKIND_HEAP:
5747                src = " was created by a heap allocation"; break;
5748             case MC_OKIND_USER:
5749                src = " was created by a client request"; break;
5750             case MC_OKIND_UNKNOWN:
5751                src = ""; break;
5752             default: tl_assert(0);
5753             }
5754             VG_(printf)
5755                ("Address %p len %ld not defined:\n"
5756                 "Uninitialised value at %p%s\n",
5757                 (void *)address, szB, (void *) bad_addr, src);
5758             ecu = otag & ~3;
5759             if (VG_(is_plausible_ECU)(ecu)) {
5760                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5761                VG_(pp_ExeContext)( origin_ec );
5762             }
5763          }
5764          else
5765             VG_(printf) ("Address %p len %ld defined\n",
5766                          (void *)address, szB);
5767          MC_(pp_describe_addr) (address);
5768          break;
5769       default: tl_assert(0);
5770       }
5771       return True;
5772    }
5773 
5774    case  5: { /* block_list */
5775       HChar* wl;
5776       HChar *endptr;
5777       UInt lr_nr = 0;
5778       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5779       if (wl != NULL)
5780          lr_nr = VG_(strtoull10) (wl, &endptr);
5781       if (wl == NULL || *endptr != '\0') {
5782          VG_(gdb_printf) ("malformed or missing integer\n");
5783       } else {
5784          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5785          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5786             VG_(gdb_printf) ("invalid loss record nr\n");
5787       }
5788       return True;
5789    }
5790 
5791    case  6: { /* who_points_at */
5792       Addr address;
5793       SizeT szB = 1;
5794 
5795       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5796          return True;
5797       if (address == (Addr) 0) {
5798          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5799          return True;
5800       }
5801       MC_(who_points_at) (address, szB);
5802       return True;
5803    }
5804 
5805    default:
5806       tl_assert(0);
5807       return False;
5808    }
5809 }
5810 
5811 /*------------------------------------------------------------*/
5812 /*--- Client requests                                      ---*/
5813 /*------------------------------------------------------------*/
5814 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5815 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5816 {
5817    Int   i;
5818    Addr  bad_addr;
5819 
5820    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5821        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5822        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5823        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5824        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5825        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5826        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5827        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5828        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5829        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5830        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5831        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5832        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
5833        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
5834        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
5835       return False;
5836 
5837    switch (arg[0]) {
5838       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
5839          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5840          if (!ok)
5841             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5842          *ret = ok ? (UWord)NULL : bad_addr;
5843          break;
5844       }
5845 
5846       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5847          Bool errorV    = False;
5848          Addr bad_addrV = 0;
5849          UInt otagV     = 0;
5850          Bool errorA    = False;
5851          Addr bad_addrA = 0;
5852          is_mem_defined_comprehensive(
5853             arg[1], arg[2],
5854             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5855          );
5856          if (errorV) {
5857             MC_(record_user_error) ( tid, bad_addrV,
5858                                      /*isAddrErr*/False, otagV );
5859          }
5860          if (errorA) {
5861             MC_(record_user_error) ( tid, bad_addrA,
5862                                      /*isAddrErr*/True, 0 );
5863          }
5864          /* Return the lower of the two erring addresses, if any. */
5865          *ret = 0;
5866          if (errorV && !errorA) {
5867             *ret = bad_addrV;
5868          }
5869          if (!errorV && errorA) {
5870             *ret = bad_addrA;
5871          }
5872          if (errorV && errorA) {
5873             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5874          }
5875          break;
5876       }
5877 
5878       case VG_USERREQ__DO_LEAK_CHECK: {
5879          LeakCheckParams lcp;
5880 
5881          if (arg[1] == 0)
5882             lcp.mode = LC_Full;
5883          else if (arg[1] == 1)
5884             lcp.mode = LC_Summary;
5885          else {
5886             VG_(message)(Vg_UserMsg,
5887                          "Warning: unknown memcheck leak search mode\n");
5888             lcp.mode = LC_Full;
5889          }
5890 
5891          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
5892          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
5893          lcp.heuristics = MC_(clo_leak_check_heuristics);
5894 
5895          if (arg[2] == 0)
5896             lcp.deltamode = LCD_Any;
5897          else if (arg[2] == 1)
5898             lcp.deltamode = LCD_Increased;
5899          else if (arg[2] == 2)
5900             lcp.deltamode = LCD_Changed;
5901          else {
5902             VG_(message)
5903                (Vg_UserMsg,
5904                 "Warning: unknown memcheck leak search deltamode\n");
5905             lcp.deltamode = LCD_Any;
5906          }
5907          lcp.max_loss_records_output = 999999999;
5908          lcp.requested_by_monitor_command = False;
5909 
5910          MC_(detect_memory_leaks)(tid, &lcp);
5911          *ret = 0; /* return value is meaningless */
5912          break;
5913       }
5914 
5915       case VG_USERREQ__MAKE_MEM_NOACCESS:
5916          MC_(make_mem_noaccess) ( arg[1], arg[2] );
5917          *ret = -1;
5918          break;
5919 
5920       case VG_USERREQ__MAKE_MEM_UNDEFINED:
5921          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5922                                               MC_OKIND_USER );
5923          *ret = -1;
5924          break;
5925 
5926       case VG_USERREQ__MAKE_MEM_DEFINED:
5927          MC_(make_mem_defined) ( arg[1], arg[2] );
5928          *ret = -1;
5929          break;
5930 
5931       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5932          make_mem_defined_if_addressable ( arg[1], arg[2] );
5933          *ret = -1;
5934          break;
5935 
5936       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5937          if (arg[1] != 0 && arg[2] != 0) {
5938             i = alloc_client_block();
5939             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5940             cgbs[i].start = arg[1];
5941             cgbs[i].size  = arg[2];
5942             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
5943             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5944             *ret = i;
5945          } else
5946             *ret = -1;
5947          break;
5948 
5949       case VG_USERREQ__DISCARD: /* discard */
5950          if (cgbs == NULL
5951              || arg[2] >= cgb_used ||
5952              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5953             *ret = 1;
5954          } else {
5955             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5956             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5957             VG_(free)(cgbs[arg[2]].desc);
5958             cgb_discards++;
5959             *ret = 0;
5960          }
5961          break;
5962 
5963       case VG_USERREQ__GET_VBITS:
5964          *ret = mc_get_or_set_vbits_for_client
5965                    ( arg[1], arg[2], arg[3],
5966                      False /* get them */,
5967                      True /* is client request */ );
5968          break;
5969 
5970       case VG_USERREQ__SET_VBITS:
5971          *ret = mc_get_or_set_vbits_for_client
5972                    ( arg[1], arg[2], arg[3],
5973                      True /* set them */,
5974                      True /* is client request */ );
5975          break;
5976 
5977       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5978          UWord** argp = (UWord**)arg;
5979          // MC_(bytes_leaked) et al were set by the last leak check (or zero
5980          // if no prior leak checks performed).
5981          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5982          *argp[2] = MC_(bytes_dubious);
5983          *argp[3] = MC_(bytes_reachable);
5984          *argp[4] = MC_(bytes_suppressed);
5985          // there is no argp[5]
5986          //*argp[5] = MC_(bytes_indirect);
5987          // XXX need to make *argp[1-4] defined;  currently done in the
5988          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5989          *ret = 0;
5990          return True;
5991       }
5992       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5993          UWord** argp = (UWord**)arg;
5994          // MC_(blocks_leaked) et al were set by the last leak check (or zero
5995          // if no prior leak checks performed).
5996          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5997          *argp[2] = MC_(blocks_dubious);
5998          *argp[3] = MC_(blocks_reachable);
5999          *argp[4] = MC_(blocks_suppressed);
6000          // there is no argp[5]
6001          //*argp[5] = MC_(blocks_indirect);
6002          // XXX need to make *argp[1-4] defined;  currently done in the
6003          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6004          *ret = 0;
6005          return True;
6006       }
6007       case VG_USERREQ__MALLOCLIKE_BLOCK: {
6008          Addr p         = (Addr)arg[1];
6009          SizeT sizeB    =       arg[2];
6010          UInt rzB       =       arg[3];
6011          Bool is_zeroed = (Bool)arg[4];
6012 
6013          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6014                           MC_AllocCustom, MC_(malloc_list) );
6015          if (rzB > 0) {
6016             MC_(make_mem_noaccess) ( p - rzB, rzB);
6017             MC_(make_mem_noaccess) ( p + sizeB, rzB);
6018          }
6019          return True;
6020       }
6021       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6022          Addr p         = (Addr)arg[1];
6023          SizeT oldSizeB =       arg[2];
6024          SizeT newSizeB =       arg[3];
6025          UInt rzB       =       arg[4];
6026 
6027          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6028          return True;
6029       }
6030       case VG_USERREQ__FREELIKE_BLOCK: {
6031          Addr p         = (Addr)arg[1];
6032          UInt rzB       =       arg[2];
6033 
6034          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6035          return True;
6036       }
6037 
6038       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6039          HChar* s  = (HChar*)arg[1];
6040          Addr  dst = (Addr) arg[2];
6041          Addr  src = (Addr) arg[3];
6042          SizeT len = (SizeT)arg[4];
6043          MC_(record_overlap_error)(tid, s, src, dst, len);
6044          return True;
6045       }
6046 
6047       case VG_USERREQ__CREATE_MEMPOOL: {
6048          Addr pool      = (Addr)arg[1];
6049          UInt rzB       =       arg[2];
6050          Bool is_zeroed = (Bool)arg[3];
6051 
6052          MC_(create_mempool) ( pool, rzB, is_zeroed );
6053          return True;
6054       }
6055 
6056       case VG_USERREQ__DESTROY_MEMPOOL: {
6057          Addr pool      = (Addr)arg[1];
6058 
6059          MC_(destroy_mempool) ( pool );
6060          return True;
6061       }
6062 
6063       case VG_USERREQ__MEMPOOL_ALLOC: {
6064          Addr pool      = (Addr)arg[1];
6065          Addr addr      = (Addr)arg[2];
6066          UInt size      =       arg[3];
6067 
6068          MC_(mempool_alloc) ( tid, pool, addr, size );
6069          return True;
6070       }
6071 
6072       case VG_USERREQ__MEMPOOL_FREE: {
6073          Addr pool      = (Addr)arg[1];
6074          Addr addr      = (Addr)arg[2];
6075 
6076          MC_(mempool_free) ( pool, addr );
6077          return True;
6078       }
6079 
6080       case VG_USERREQ__MEMPOOL_TRIM: {
6081          Addr pool      = (Addr)arg[1];
6082          Addr addr      = (Addr)arg[2];
6083          UInt size      =       arg[3];
6084 
6085          MC_(mempool_trim) ( pool, addr, size );
6086          return True;
6087       }
6088 
6089       case VG_USERREQ__MOVE_MEMPOOL: {
6090          Addr poolA     = (Addr)arg[1];
6091          Addr poolB     = (Addr)arg[2];
6092 
6093          MC_(move_mempool) ( poolA, poolB );
6094          return True;
6095       }
6096 
6097       case VG_USERREQ__MEMPOOL_CHANGE: {
6098          Addr pool      = (Addr)arg[1];
6099          Addr addrA     = (Addr)arg[2];
6100          Addr addrB     = (Addr)arg[3];
6101          UInt size      =       arg[4];
6102 
6103          MC_(mempool_change) ( pool, addrA, addrB, size );
6104          return True;
6105       }
6106 
6107       case VG_USERREQ__MEMPOOL_EXISTS: {
6108          Addr pool      = (Addr)arg[1];
6109 
6110          *ret = (UWord) MC_(mempool_exists) ( pool );
6111 	 return True;
6112       }
6113 
6114       case VG_USERREQ__GDB_MONITOR_COMMAND: {
6115          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6116          if (handled)
6117             *ret = 1;
6118          else
6119             *ret = 0;
6120          return handled;
6121       }
6122 
6123       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6124       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6125          Bool addRange
6126             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6127          Bool ok
6128             = modify_ignore_ranges(addRange, arg[1], arg[2]);
6129          *ret = ok ? 1 : 0;
6130          return True;
6131       }
6132 
6133       default:
6134          VG_(message)(
6135             Vg_UserMsg,
6136             "Warning: unknown memcheck client request code %llx\n",
6137             (ULong)arg[0]
6138          );
6139          return False;
6140    }
6141    return True;
6142 }
6143 
6144 
6145 /*------------------------------------------------------------*/
6146 /*--- Crude profiling machinery.                           ---*/
6147 /*------------------------------------------------------------*/
6148 
6149 // We track a number of interesting events (using PROF_EVENT)
6150 // if MC_PROFILE_MEMORY is defined.
6151 
6152 #ifdef MC_PROFILE_MEMORY
6153 
6154 UInt   MC_(event_ctr)[N_PROF_EVENTS];
6155 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
6156 
init_prof_mem(void)6157 static void init_prof_mem ( void )
6158 {
6159    Int i;
6160    for (i = 0; i < N_PROF_EVENTS; i++) {
6161       MC_(event_ctr)[i] = 0;
6162       MC_(event_ctr_name)[i] = NULL;
6163    }
6164 }
6165 
done_prof_mem(void)6166 static void done_prof_mem ( void )
6167 {
6168    Int  i;
6169    Bool spaced = False;
6170    for (i = 0; i < N_PROF_EVENTS; i++) {
6171       if (!spaced && (i % 10) == 0) {
6172          VG_(printf)("\n");
6173          spaced = True;
6174       }
6175       if (MC_(event_ctr)[i] > 0) {
6176          spaced = False;
6177          VG_(printf)( "prof mem event %3d: %9d   %s\n",
6178                       i, MC_(event_ctr)[i],
6179                       MC_(event_ctr_name)[i]
6180                          ? MC_(event_ctr_name)[i] : "unnamed");
6181       }
6182    }
6183 }
6184 
6185 #else
6186 
init_prof_mem(void)6187 static void init_prof_mem ( void ) { }
done_prof_mem(void)6188 static void done_prof_mem ( void ) { }
6189 
6190 #endif
6191 
6192 
6193 /*------------------------------------------------------------*/
6194 /*--- Origin tracking stuff                                ---*/
6195 /*------------------------------------------------------------*/
6196 
6197 /*--------------------------------------------*/
6198 /*--- Origin tracking: load handlers       ---*/
6199 /*--------------------------------------------*/
6200 
merge_origins(UInt or1,UInt or2)6201 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
6202    return or1 > or2 ? or1 : or2;
6203 }
6204 
MC_(helperc_b_load1)6205 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
6206    OCacheLine* line;
6207    UChar descr;
6208    UWord lineoff = oc_line_offset(a);
6209    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6210 
6211    if (OC_ENABLE_ASSERTIONS) {
6212       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6213    }
6214 
6215    line = find_OCacheLine( a );
6216 
6217    descr = line->descr[lineoff];
6218    if (OC_ENABLE_ASSERTIONS) {
6219       tl_assert(descr < 0x10);
6220    }
6221 
6222    if (LIKELY(0 == (descr & (1 << byteoff))))  {
6223       return 0;
6224    } else {
6225       return line->w32[lineoff];
6226    }
6227 }
6228 
MC_(helperc_b_load2)6229 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
6230    OCacheLine* line;
6231    UChar descr;
6232    UWord lineoff, byteoff;
6233 
6234    if (UNLIKELY(a & 1)) {
6235       /* Handle misaligned case, slowly. */
6236       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
6237       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
6238       return merge_origins(oLo, oHi);
6239    }
6240 
6241    lineoff = oc_line_offset(a);
6242    byteoff = a & 3; /* 0 or 2 */
6243 
6244    if (OC_ENABLE_ASSERTIONS) {
6245       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6246    }
6247    line = find_OCacheLine( a );
6248 
6249    descr = line->descr[lineoff];
6250    if (OC_ENABLE_ASSERTIONS) {
6251       tl_assert(descr < 0x10);
6252    }
6253 
6254    if (LIKELY(0 == (descr & (3 << byteoff)))) {
6255       return 0;
6256    } else {
6257       return line->w32[lineoff];
6258    }
6259 }
6260 
MC_(helperc_b_load4)6261 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
6262    OCacheLine* line;
6263    UChar descr;
6264    UWord lineoff;
6265 
6266    if (UNLIKELY(a & 3)) {
6267       /* Handle misaligned case, slowly. */
6268       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
6269       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
6270       return merge_origins(oLo, oHi);
6271    }
6272 
6273    lineoff = oc_line_offset(a);
6274    if (OC_ENABLE_ASSERTIONS) {
6275       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6276    }
6277 
6278    line = find_OCacheLine( a );
6279 
6280    descr = line->descr[lineoff];
6281    if (OC_ENABLE_ASSERTIONS) {
6282       tl_assert(descr < 0x10);
6283    }
6284 
6285    if (LIKELY(0 == descr)) {
6286       return 0;
6287    } else {
6288       return line->w32[lineoff];
6289    }
6290 }
6291 
MC_(helperc_b_load8)6292 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
6293    OCacheLine* line;
6294    UChar descrLo, descrHi, descr;
6295    UWord lineoff;
6296 
6297    if (UNLIKELY(a & 7)) {
6298       /* Handle misaligned case, slowly. */
6299       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
6300       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
6301       return merge_origins(oLo, oHi);
6302    }
6303 
6304    lineoff = oc_line_offset(a);
6305    if (OC_ENABLE_ASSERTIONS) {
6306       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6307    }
6308 
6309    line = find_OCacheLine( a );
6310 
6311    descrLo = line->descr[lineoff + 0];
6312    descrHi = line->descr[lineoff + 1];
6313    descr   = descrLo | descrHi;
6314    if (OC_ENABLE_ASSERTIONS) {
6315       tl_assert(descr < 0x10);
6316    }
6317 
6318    if (LIKELY(0 == descr)) {
6319       return 0; /* both 32-bit chunks are defined */
6320    } else {
6321       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
6322       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
6323       return merge_origins(oLo, oHi);
6324    }
6325 }
6326 
MC_(helperc_b_load16)6327 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
6328    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
6329    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
6330    UInt oBoth = merge_origins(oLo, oHi);
6331    return (UWord)oBoth;
6332 }
6333 
MC_(helperc_b_load32)6334 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
6335    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
6336    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
6337    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
6338    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
6339    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
6340                               merge_origins(oQ2, oQ3));
6341    return (UWord)oAll;
6342 }
6343 
6344 
6345 /*--------------------------------------------*/
6346 /*--- Origin tracking: store handlers      ---*/
6347 /*--------------------------------------------*/
6348 
MC_(helperc_b_store1)6349 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
6350    OCacheLine* line;
6351    UWord lineoff = oc_line_offset(a);
6352    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6353 
6354    if (OC_ENABLE_ASSERTIONS) {
6355       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6356    }
6357 
6358    line = find_OCacheLine( a );
6359 
6360    if (d32 == 0) {
6361       line->descr[lineoff] &= ~(1 << byteoff);
6362    } else {
6363       line->descr[lineoff] |= (1 << byteoff);
6364       line->w32[lineoff] = d32;
6365    }
6366 }
6367 
MC_(helperc_b_store2)6368 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
6369    OCacheLine* line;
6370    UWord lineoff, byteoff;
6371 
6372    if (UNLIKELY(a & 1)) {
6373       /* Handle misaligned case, slowly. */
6374       MC_(helperc_b_store1)( a + 0, d32 );
6375       MC_(helperc_b_store1)( a + 1, d32 );
6376       return;
6377    }
6378 
6379    lineoff = oc_line_offset(a);
6380    byteoff = a & 3; /* 0 or 2 */
6381 
6382    if (OC_ENABLE_ASSERTIONS) {
6383       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6384    }
6385 
6386    line = find_OCacheLine( a );
6387 
6388    if (d32 == 0) {
6389       line->descr[lineoff] &= ~(3 << byteoff);
6390    } else {
6391       line->descr[lineoff] |= (3 << byteoff);
6392       line->w32[lineoff] = d32;
6393    }
6394 }
6395 
MC_(helperc_b_store4)6396 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
6397    OCacheLine* line;
6398    UWord lineoff;
6399 
6400    if (UNLIKELY(a & 3)) {
6401       /* Handle misaligned case, slowly. */
6402       MC_(helperc_b_store2)( a + 0, d32 );
6403       MC_(helperc_b_store2)( a + 2, d32 );
6404       return;
6405    }
6406 
6407    lineoff = oc_line_offset(a);
6408    if (OC_ENABLE_ASSERTIONS) {
6409       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6410    }
6411 
6412    line = find_OCacheLine( a );
6413 
6414    if (d32 == 0) {
6415       line->descr[lineoff] = 0;
6416    } else {
6417       line->descr[lineoff] = 0xF;
6418       line->w32[lineoff] = d32;
6419    }
6420 }
6421 
MC_(helperc_b_store8)6422 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
6423    OCacheLine* line;
6424    UWord lineoff;
6425 
6426    if (UNLIKELY(a & 7)) {
6427       /* Handle misaligned case, slowly. */
6428       MC_(helperc_b_store4)( a + 0, d32 );
6429       MC_(helperc_b_store4)( a + 4, d32 );
6430       return;
6431    }
6432 
6433    lineoff = oc_line_offset(a);
6434    if (OC_ENABLE_ASSERTIONS) {
6435       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6436    }
6437 
6438    line = find_OCacheLine( a );
6439 
6440    if (d32 == 0) {
6441       line->descr[lineoff + 0] = 0;
6442       line->descr[lineoff + 1] = 0;
6443    } else {
6444       line->descr[lineoff + 0] = 0xF;
6445       line->descr[lineoff + 1] = 0xF;
6446       line->w32[lineoff + 0] = d32;
6447       line->w32[lineoff + 1] = d32;
6448    }
6449 }
6450 
MC_(helperc_b_store16)6451 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
6452    MC_(helperc_b_store8)( a + 0, d32 );
6453    MC_(helperc_b_store8)( a + 8, d32 );
6454 }
6455 
MC_(helperc_b_store32)6456 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
6457    MC_(helperc_b_store8)( a +  0, d32 );
6458    MC_(helperc_b_store8)( a +  8, d32 );
6459    MC_(helperc_b_store8)( a + 16, d32 );
6460    MC_(helperc_b_store8)( a + 24, d32 );
6461 }
6462 
6463 
6464 /*--------------------------------------------*/
6465 /*--- Origin tracking: sarp handlers       ---*/
6466 /*--------------------------------------------*/
6467 
6468 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)6469 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6470    if ((a & 1) && len >= 1) {
6471       MC_(helperc_b_store1)( a, otag );
6472       a++;
6473       len--;
6474    }
6475    if ((a & 2) && len >= 2) {
6476       MC_(helperc_b_store2)( a, otag );
6477       a += 2;
6478       len -= 2;
6479    }
6480    if (len >= 4)
6481       tl_assert(0 == (a & 3));
6482    while (len >= 4) {
6483       MC_(helperc_b_store4)( a, otag );
6484       a += 4;
6485       len -= 4;
6486    }
6487    if (len >= 2) {
6488       MC_(helperc_b_store2)( a, otag );
6489       a += 2;
6490       len -= 2;
6491    }
6492    if (len >= 1) {
6493       MC_(helperc_b_store1)( a, otag );
6494       //a++;
6495       len--;
6496    }
6497    tl_assert(len == 0);
6498 }
6499 
6500 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)6501 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6502    if ((a & 1) && len >= 1) {
6503       MC_(helperc_b_store1)( a, 0 );
6504       a++;
6505       len--;
6506    }
6507    if ((a & 2) && len >= 2) {
6508       MC_(helperc_b_store2)( a, 0 );
6509       a += 2;
6510       len -= 2;
6511    }
6512    if (len >= 4)
6513       tl_assert(0 == (a & 3));
6514    while (len >= 4) {
6515       MC_(helperc_b_store4)( a, 0 );
6516       a += 4;
6517       len -= 4;
6518    }
6519    if (len >= 2) {
6520       MC_(helperc_b_store2)( a, 0 );
6521       a += 2;
6522       len -= 2;
6523    }
6524    if (len >= 1) {
6525       MC_(helperc_b_store1)( a, 0 );
6526       //a++;
6527       len--;
6528    }
6529    tl_assert(len == 0);
6530 }
6531 
6532 
6533 /*------------------------------------------------------------*/
6534 /*--- Setup and finalisation                               ---*/
6535 /*------------------------------------------------------------*/
6536 
mc_post_clo_init(void)6537 static void mc_post_clo_init ( void )
6538 {
6539    /* If we've been asked to emit XML, mash around various other
6540       options so as to constrain the output somewhat. */
6541    if (VG_(clo_xml)) {
6542       /* Extract as much info as possible from the leak checker. */
6543       MC_(clo_leak_check) = LC_Full;
6544    }
6545 
6546    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6547       VG_(message)(Vg_UserMsg,
6548                    "Warning: --freelist-big-blocks value %lld has no effect\n"
6549                    "as it is >= to --freelist-vol value %lld\n",
6550                    MC_(clo_freelist_big_blocks),
6551                    MC_(clo_freelist_vol));
6552 
6553    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6554 
6555    if (MC_(clo_mc_level) == 3) {
6556       /* We're doing origin tracking. */
6557 #     ifdef PERF_FAST_STACK
6558       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
6559       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
6560       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
6561       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
6562       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
6563       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6564       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6565       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6566       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6567 #     endif
6568       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
6569       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
6570    } else {
6571       /* Not doing origin tracking */
6572 #     ifdef PERF_FAST_STACK
6573       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
6574       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
6575       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
6576       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
6577       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
6578       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6579       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6580       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6581       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6582 #     endif
6583       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
6584       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
6585    }
6586 
6587    // We assume that brk()/sbrk() does not initialise new memory.  Is this
6588    // accurate?  John Reiser says:
6589    //
6590    //   0) sbrk() can *decrease* process address space.  No zero fill is done
6591    //   for a decrease, not even the fragment on the high end of the last page
6592    //   that is beyond the new highest address.  For maximum safety and
6593    //   portability, then the bytes in the last page that reside above [the
6594    //   new] sbrk(0) should be considered to be uninitialized, but in practice
6595    //   it is exceedingly likely that they will retain their previous
6596    //   contents.
6597    //
6598    //   1) If an increase is large enough to require new whole pages, then
6599    //   those new whole pages (like all new pages) are zero-filled by the
6600    //   operating system.  So if sbrk(0) already is page aligned, then
6601    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6602    //
6603    //   2) Any increase that lies within an existing allocated page is not
6604    //   changed.  So if (x = sbrk(0)) is not page aligned, then
6605    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6606    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6607    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6608    //   of them come along for the ride because the operating system deals
6609    //   only in whole pages.  Again, for maximum safety and portability, then
6610    //   anything that lives above [the new] sbrk(0) should be considered
6611    //   uninitialized, but in practice will retain previous contents [zero in
6612    //   this case.]"
6613    //
6614    // In short:
6615    //
6616    //   A key property of sbrk/brk is that new whole pages that are supplied
6617    //   by the operating system *do* get initialized to zero.
6618    //
6619    // As for the portability of all this:
6620    //
6621    //   sbrk and brk are not POSIX.  However, any system that is a derivative
6622    //   of *nix has sbrk and brk because there are too many softwares (such as
6623    //   the Bourne shell) which rely on the traditional memory map (.text,
6624    //   .data+.bss, stack) and the existence of sbrk/brk.
6625    //
6626    // So we should arguably observe all this.  However:
6627    // - The current inaccuracy has caused maybe one complaint in seven years(?)
6628    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6629    //   doubt most programmers know the above information.
6630    // So I'm not terribly unhappy with marking it as undefined. --njn.
6631    //
6632    // [More:  I think most of what John said only applies to sbrk().  It seems
6633    // that brk() always deals in whole pages.  And since this event deals
6634    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6635    // just mark all memory it allocates as defined.]
6636    //
6637    if (MC_(clo_mc_level) == 3)
6638       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
6639    else
6640       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
6641 
6642    /* This origin tracking cache is huge (~100M), so only initialise
6643       if we need it. */
6644    if (MC_(clo_mc_level) >= 3) {
6645       init_OCache();
6646       tl_assert(ocacheL1 != NULL);
6647       tl_assert(ocacheL2 != NULL);
6648    } else {
6649       tl_assert(ocacheL1 == NULL);
6650       tl_assert(ocacheL2 == NULL);
6651    }
6652 
6653    MC_(chunk_poolalloc) = VG_(newPA)
6654       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
6655        1000,
6656        VG_(malloc),
6657        "mc.cMC.1 (MC_Chunk pools)",
6658        VG_(free));
6659 
6660    /* Do not check definedness of guest state if --undef-value-errors=no */
6661    if (MC_(clo_mc_level) >= 2)
6662       VG_(track_pre_reg_read) ( mc_pre_reg_read );
6663 }
6664 
print_SM_info(const HChar * type,Int n_SMs)6665 static void print_SM_info(const HChar* type, Int n_SMs)
6666 {
6667    VG_(message)(Vg_DebugMsg,
6668       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6669       type,
6670       n_SMs,
6671       n_SMs * sizeof(SecMap) / 1024UL,
6672       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6673 }
6674 
mc_print_stats(void)6675 static void mc_print_stats (void)
6676 {
6677    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6678 
6679    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
6680                 VG_(free_queue_volume), VG_(free_queue_length));
6681    VG_(message)(Vg_DebugMsg,
6682       " memcheck: sanity checks: %d cheap, %d expensive\n",
6683       n_sanity_cheap, n_sanity_expensive );
6684    VG_(message)(Vg_DebugMsg,
6685       " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6686       n_auxmap_L2_nodes,
6687       n_auxmap_L2_nodes * 64,
6688       n_auxmap_L2_nodes / 16 );
6689    VG_(message)(Vg_DebugMsg,
6690       " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6691       n_auxmap_L1_searches, n_auxmap_L1_cmps,
6692       (10ULL * n_auxmap_L1_cmps)
6693          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6694    );
6695    VG_(message)(Vg_DebugMsg,
6696       " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6697       n_auxmap_L2_searches, n_auxmap_L2_nodes
6698    );
6699 
6700    print_SM_info("n_issued     ", n_issued_SMs);
6701    print_SM_info("n_deissued   ", n_deissued_SMs);
6702    print_SM_info("max_noaccess ", max_noaccess_SMs);
6703    print_SM_info("max_undefined", max_undefined_SMs);
6704    print_SM_info("max_defined  ", max_defined_SMs);
6705    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6706 
6707    // Three DSMs, plus the non-DSM ones
6708    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6709    // The 3*sizeof(Word) bytes is the AVL node metadata size.
6710    // The VG_ROUNDUP is because the OSet pool allocator will/must align
6711    // the elements on pointer size.
6712    // Note that the pool allocator has some additional small overhead
6713    // which is not counted in the below.
6714    // Hardwiring this logic sucks, but I don't see how else to do it.
6715    max_secVBit_szB = max_secVBit_nodes *
6716          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6717    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6718 
6719    VG_(message)(Vg_DebugMsg,
6720       " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6721       max_secVBit_nodes, max_secVBit_szB / 1024,
6722                          max_secVBit_szB / (1024 * 1024));
6723    VG_(message)(Vg_DebugMsg,
6724       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6725       sec_vbits_new_nodes + sec_vbits_updates,
6726       sec_vbits_new_nodes, sec_vbits_updates );
6727    VG_(message)(Vg_DebugMsg,
6728       " memcheck: max shadow mem size:   %ldk, %ldM\n",
6729       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6730 
6731    if (MC_(clo_mc_level) >= 3) {
6732       VG_(message)(Vg_DebugMsg,
6733                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6734                    stats_ocacheL1_find,
6735                    stats_ocacheL1_misses,
6736                    stats_ocacheL1_lossage );
6737       VG_(message)(Vg_DebugMsg,
6738                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6739                    stats_ocacheL1_find - stats_ocacheL1_misses
6740                       - stats_ocacheL1_found_at_1
6741                       - stats_ocacheL1_found_at_N,
6742                    stats_ocacheL1_found_at_1 );
6743       VG_(message)(Vg_DebugMsg,
6744                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6745                    stats_ocacheL1_found_at_N,
6746                    stats_ocacheL1_movefwds );
6747       VG_(message)(Vg_DebugMsg,
6748                    " ocacheL1: %'12lu sizeB  %'12u useful\n",
6749                    (UWord)sizeof(OCache),
6750                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6751       VG_(message)(Vg_DebugMsg,
6752                    " ocacheL2: %'12lu refs   %'12lu misses\n",
6753                    stats__ocacheL2_refs,
6754                    stats__ocacheL2_misses );
6755       VG_(message)(Vg_DebugMsg,
6756                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6757                    stats__ocacheL2_n_nodes_max,
6758                    stats__ocacheL2_n_nodes );
6759       VG_(message)(Vg_DebugMsg,
6760                    " niacache: %'12lu refs   %'12lu misses\n",
6761                    stats__nia_cache_queries, stats__nia_cache_misses);
6762    } else {
6763       tl_assert(ocacheL1 == NULL);
6764       tl_assert(ocacheL2 == NULL);
6765    }
6766 }
6767 
6768 
mc_fini(Int exitcode)6769 static void mc_fini ( Int exitcode )
6770 {
6771    MC_(print_malloc_stats)();
6772 
6773    if (MC_(clo_leak_check) != LC_Off) {
6774       LeakCheckParams lcp;
6775       lcp.mode = MC_(clo_leak_check);
6776       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6777       lcp.heuristics = MC_(clo_leak_check_heuristics);
6778       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6779       lcp.deltamode = LCD_Any;
6780       lcp.max_loss_records_output = 999999999;
6781       lcp.requested_by_monitor_command = False;
6782       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6783    } else {
6784       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6785          VG_(umsg)(
6786             "For a detailed leak analysis, rerun with: --leak-check=full\n"
6787             "\n"
6788          );
6789       }
6790    }
6791 
6792    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6793       VG_(message)(Vg_UserMsg,
6794                    "For counts of detected and suppressed errors, rerun with: -v\n");
6795    }
6796 
6797    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6798        && MC_(clo_mc_level) == 2) {
6799       VG_(message)(Vg_UserMsg,
6800                    "Use --track-origins=yes to see where "
6801                    "uninitialised values come from\n");
6802    }
6803 
6804    /* Print a warning if any client-request generated ignore-ranges
6805       still exist.  It would be reasonable to expect that a properly
6806       written program would remove any such ranges before exiting, and
6807       since they are a bit on the dangerous side, let's comment.  By
6808       contrast ranges which are specified on the command line normally
6809       pertain to hardware mapped into the address space, and so we
6810       can't expect the client to have got rid of them. */
6811    if (gIgnoredAddressRanges) {
6812       Word i, nBad = 0;
6813       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6814          UWord val     = IAR_INVALID;
6815          UWord key_min = ~(UWord)0;
6816          UWord key_max = (UWord)0;
6817          VG_(indexRangeMap)( &key_min, &key_max, &val,
6818                              gIgnoredAddressRanges, i );
6819          if (val != IAR_ClientReq)
6820            continue;
6821          /* Print the offending range.  Also, if it is the first,
6822             print a banner before it. */
6823          nBad++;
6824          if (nBad == 1) {
6825             VG_(umsg)(
6826               "WARNING: exiting program has the following client-requested\n"
6827               "WARNING: address error disablement range(s) still in force,\n"
6828               "WARNING: "
6829                  "possibly as a result of some mistake in the use of the\n"
6830               "WARNING: "
6831                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
6832             );
6833          }
6834          VG_(umsg)("   [%ld]  0x%016llx-0x%016llx  %s\n",
6835                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
6836       }
6837    }
6838 
6839    done_prof_mem();
6840 
6841    if (VG_(clo_stats))
6842       mc_print_stats();
6843 
6844    if (0) {
6845       VG_(message)(Vg_DebugMsg,
6846         "------ Valgrind's client block stats follow ---------------\n" );
6847       show_client_block_stats();
6848    }
6849 }
6850 
6851 /* mark the given addr/len unaddressable for watchpoint implementation
6852    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6853 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6854                                                   Addr addr, SizeT len)
6855 {
6856    /* GDBTD this is somewhat fishy. We might rather have to save the previous
6857       accessibility and definedness in gdbserver so as to allow restoring it
6858       properly. Currently, we assume that the user only watches things
6859       which are properly addressable and defined */
6860    if (insert)
6861       MC_(make_mem_noaccess) (addr, len);
6862    else
6863       MC_(make_mem_defined)  (addr, len);
6864    return True;
6865 }
6866 
mc_pre_clo_init(void)6867 static void mc_pre_clo_init(void)
6868 {
6869    VG_(details_name)            ("Memcheck");
6870    VG_(details_version)         (NULL);
6871    VG_(details_description)     ("a memory error detector");
6872    VG_(details_copyright_author)(
6873       "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
6874    VG_(details_bug_reports_to)  (VG_BUGS_TO);
6875    VG_(details_avg_translation_sizeB) ( 640 );
6876 
6877    VG_(basic_tool_funcs)          (mc_post_clo_init,
6878                                    MC_(instrument),
6879                                    mc_fini);
6880 
6881    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6882 
6883 
6884    VG_(needs_core_errors)         ();
6885    VG_(needs_tool_errors)         (MC_(eq_Error),
6886                                    MC_(before_pp_Error),
6887                                    MC_(pp_Error),
6888                                    True,/*show TIDs for errors*/
6889                                    MC_(update_Error_extra),
6890                                    MC_(is_recognised_suppression),
6891                                    MC_(read_extra_suppression_info),
6892                                    MC_(error_matches_suppression),
6893                                    MC_(get_error_name),
6894                                    MC_(get_extra_suppression_info),
6895                                    MC_(print_extra_suppression_use),
6896                                    MC_(update_extra_suppression_use));
6897    VG_(needs_libc_freeres)        ();
6898    VG_(needs_command_line_options)(mc_process_cmd_line_options,
6899                                    mc_print_usage,
6900                                    mc_print_debug_usage);
6901    VG_(needs_client_requests)     (mc_handle_client_request);
6902    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6903                                    mc_expensive_sanity_check);
6904    VG_(needs_print_stats)         (mc_print_stats);
6905    VG_(needs_info_location)       (MC_(pp_describe_addr));
6906    VG_(needs_malloc_replacement)  (MC_(malloc),
6907                                    MC_(__builtin_new),
6908                                    MC_(__builtin_vec_new),
6909                                    MC_(memalign),
6910                                    MC_(calloc),
6911                                    MC_(free),
6912                                    MC_(__builtin_delete),
6913                                    MC_(__builtin_vec_delete),
6914                                    MC_(realloc),
6915                                    MC_(malloc_usable_size),
6916                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
6917    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6918 
6919    VG_(needs_xml_output)          ();
6920 
6921    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6922 
6923    // Handling of mmap and mprotect isn't simple (well, it is simple,
6924    // but the justification isn't.)  See comments above, just prior to
6925    // mc_new_mem_mmap.
6926    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6927    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6928 
6929    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6930 
6931    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6932    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6933    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6934 
6935    /* Defer the specification of the new_mem_stack functions to the
6936       post_clo_init function, since we need to first parse the command
6937       line before deciding which set to use. */
6938 
6939 #  ifdef PERF_FAST_STACK
6940    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6941    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6942    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6943    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6944    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6945    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6946    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6947    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6948    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6949 #  endif
6950    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6951 
6952    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6953 
6954    VG_(track_pre_mem_read)        ( check_mem_is_defined );
6955    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6956    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6957    VG_(track_post_mem_write)      ( mc_post_mem_write );
6958 
6959    VG_(track_post_reg_write)                  ( mc_post_reg_write );
6960    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6961 
6962    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6963 
6964    init_shadow_memory();
6965    // MC_(chunk_poolalloc) must be allocated in post_clo_init
6966    tl_assert(MC_(chunk_poolalloc) == NULL);
6967    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6968    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6969    init_prof_mem();
6970 
6971    tl_assert( mc_expensive_sanity_check() );
6972 
6973    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6974    tl_assert(sizeof(UWord) == sizeof(Addr));
6975    // Call me paranoid.  I don't care.
6976    tl_assert(sizeof(void*) == sizeof(Addr));
6977 
6978    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6979    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6980 
6981    /* This is small.  Always initialise it. */
6982    init_nia_to_ecu_cache();
6983 
6984    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6985       if we need to, since the command line args haven't been
6986       processed yet.  Hence defer it to mc_post_clo_init. */
6987    tl_assert(ocacheL1 == NULL);
6988    tl_assert(ocacheL2 == NULL);
6989 
6990    /* Check some important stuff.  See extensive comments above
6991       re UNALIGNED_OR_HIGH for background. */
6992 #  if VG_WORDSIZE == 4
6993    tl_assert(sizeof(void*) == 4);
6994    tl_assert(sizeof(Addr)  == 4);
6995    tl_assert(sizeof(UWord) == 4);
6996    tl_assert(sizeof(Word)  == 4);
6997    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6998    tl_assert(MASK(1) == 0UL);
6999    tl_assert(MASK(2) == 1UL);
7000    tl_assert(MASK(4) == 3UL);
7001    tl_assert(MASK(8) == 7UL);
7002 #  else
7003    tl_assert(VG_WORDSIZE == 8);
7004    tl_assert(sizeof(void*) == 8);
7005    tl_assert(sizeof(Addr)  == 8);
7006    tl_assert(sizeof(UWord) == 8);
7007    tl_assert(sizeof(Word)  == 8);
7008    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7009    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7010    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7011    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7012    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7013 #  endif
7014 }
7015 
7016 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7017 
7018 /*--------------------------------------------------------------------*/
7019 /*--- end                                                mc_main.c ---*/
7020 /*--------------------------------------------------------------------*/
7021