1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
5 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
6 /*---                                                    mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
8 
9 /*
10    This file is part of MemCheck, a heavyweight Valgrind tool for
11    detecting memory errors.
12 
13    Copyright (C) 2000-2015 Julian Seward
14       jseward@acm.org
15 
16    This program is free software; you can redistribute it and/or
17    modify it under the terms of the GNU General Public License as
18    published by the Free Software Foundation; either version 2 of the
19    License, or (at your option) any later version.
20 
21    This program is distributed in the hope that it will be useful, but
22    WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received a copy of the GNU General Public License
27    along with this program; if not, write to the Free Software
28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29    02111-1307, USA.
30 
31    The GNU General Public License is contained in the file COPYING.
32 */
33 
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h"     // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 
51 #include "mc_include.h"
52 #include "memcheck.h"   /* for client requests */
53 
54 
55 /* Set to 1 to enable handwritten assembly helpers on targets for
56    which it is supported. */
57 #define ENABLE_ASSEMBLY_HELPERS 1
58 
59 /* Set to 1 to do a little more sanity checking */
60 #define VG_DEBUG_MEMORY 0
61 
62 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
63 
64 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
65 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
66 
67 
68 /*------------------------------------------------------------*/
69 /*--- Fast-case knobs                                      ---*/
70 /*------------------------------------------------------------*/
71 
72 // Comment these out to disable the fast cases (don't just set them to zero).
73 
74 #define PERF_FAST_LOADV    1
75 #define PERF_FAST_STOREV   1
76 
77 #define PERF_FAST_SARP     1
78 
79 #define PERF_FAST_STACK    1
80 #define PERF_FAST_STACK2   1
81 
82 /* Change this to 1 to enable assertions on origin tracking cache fast
83    paths */
84 #define OC_ENABLE_ASSERTIONS 0
85 
86 
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation       ---*/
89 /*------------------------------------------------------------*/
90 
91 /* See detailed comment entitled
92    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93    which is contained further on in this file. */
94 
95 
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits                                    ---*/
98 /*------------------------------------------------------------*/
99 
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101    thinks the corresponding value bit is defined.  And every memory byte
102    has an A bit, which tracks whether Memcheck thinks the program can access
103    it safely (ie. it's mapped, and has at least one of the RWX permission bits
104    set).  So every N-bit register is shadowed with N V bits, and every memory
105    byte is shadowed with 8 V bits and one A bit.
106 
107    In the implementation, we use two forms of compression (compressed V bits
108    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109    for memory.
110 
111    Memcheck also tracks extra information about each heap block that is
112    allocated, for detecting memory leaks and other purposes.
113 */
114 
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation.                     ---*/
117 /*------------------------------------------------------------*/
118 
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120    memory), which records the state of all memory in the process.
121 
122    On 32-bit machines the memory map is organised as follows.
123    The top 16 bits of an address are used to index into a top-level
124    map table, containing 65536 entries.  Each entry is a pointer to a
125    second-level map, which records the accesibililty and validity
126    permissions for the 65536 bytes indexed by the lower 16 bits of the
127    address.  Each byte is represented by two bits (details are below).  So
128    each second-level map contains 16384 bytes.  This two-level arrangement
129    conveniently divides the 4G address space into 64k lumps, each size 64k
130    bytes.
131 
132    All entries in the primary (top-level) map must point to a valid
133    secondary (second-level) map.  Since many of the 64kB chunks will
134    have the same status for every bit -- ie. noaccess (for unused
135    address space) or entirely addressable and defined (for code segments) --
136    there are three distinguished secondary maps, which indicate 'noaccess',
137    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
138    map entry points to the relevant distinguished map.  In practice,
139    typically more than half of the addressable memory is represented with
140    the 'undefined' or 'defined' distinguished secondary map, so it gives a
141    good saving.  It also lets us set the V+A bits of large address regions
142    quickly in set_address_range_perms().
143 
144    On 64-bit machines it's more complicated.  If we followed the same basic
145    scheme we'd have a four-level table which would require too many memory
146    accesses.  So instead the top-level map table has 2^20 entries (indexed
147    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
148    accesses above 64GB are handled with a slow, sparse auxiliary table.
149    Valgrind's address space manager tries very hard to keep things below
150    this 64GB barrier so that performance doesn't suffer too much.
151 
152    Note that this file has a lot of different functions for reading and
153    writing shadow memory.  Only a couple are strictly necessary (eg.
154    get_vabits2 and set_vabits2), most are just specialised for specific
155    common cases to improve performance.
156 
157    Aside: the V+A bits are less precise than they could be -- we have no way
158    of marking memory as read-only.  It would be great if we could add an
159    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
160    which requires 2.3 bits to hold, and there's no way to do that elegantly
161    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
162    seem worth it.
163 */
164 
165 /* --------------- Basic configuration --------------- */
166 
167 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
168 
169 #if VG_WORDSIZE == 4
170 
171 /* cover the entire address space */
172 #  define N_PRIMARY_BITS  16
173 
174 #else
175 
176 /* Just handle the first 64G fast and the rest via auxiliary
177    primaries.  If you change this, Memcheck will assert at startup.
178    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 #  define N_PRIMARY_BITS  20
180 
181 #endif
182 
183 
184 /* Do not change this. */
185 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
186 
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
189 
190 
191 /* --------------- Secondary maps --------------- */
192 
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
195 //
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory.  Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
199 //
200 //   00:  noaccess    (unaddressable but treated as fully defined)
201 //   01:  undefined   (addressable and fully undefined)
202 //   10:  defined     (addressable and fully defined)
203 //   11:  partdefined (addressable and partially defined)
204 //
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // bits.
208 //
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk.  Hence the name
211 // "vabits8".  This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation.  (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
215 //
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
218 //
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
223 //
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses;  in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
229 //
230 // But note that we don't compress the V bits stored in registers;  they
231 // need to be explicit to made the shadow operations possible.  Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format.  This isn't so difficult, it just requires careful attention in a
235 // few places.
236 
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS     0x0      // 00b
239 #define VA_BITS2_UNDEFINED    0x1      // 01b
240 #define VA_BITS2_DEFINED      0x2      // 10b
241 #define VA_BITS2_PARTDEFINED  0x3      // 11b
242 
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS     0x0      // 00_00b
245 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
246 #define VA_BITS4_DEFINED      0xa      // 10_10b
247 
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
251 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
252 
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
257 
258 
259 #define SM_CHUNKS             16384    // Each SM covers 64k of memory.
260 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
261 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
262 
263 // Paranoia:  it's critical for performance that the requested inlining
264 // occurs.  So try extra hard.
265 #define INLINE    inline __attribute__((always_inline))
266 
start_of_this_sm(Addr a)267 static INLINE Addr start_of_this_sm ( Addr a ) {
268    return (a & (~SM_MASK));
269 }
is_start_of_sm(Addr a)270 static INLINE Bool is_start_of_sm ( Addr a ) {
271    return (start_of_this_sm(a) == a);
272 }
273 
274 typedef
275    struct {
276       UChar vabits8[SM_CHUNKS];
277    }
278    SecMap;
279 
280 // 3 distinguished secondary maps, one for no-access, one for
281 // accessible but undefined, and one for accessible and defined.
282 // Distinguished secondaries may never be modified.
283 #define SM_DIST_NOACCESS   0
284 #define SM_DIST_UNDEFINED  1
285 #define SM_DIST_DEFINED    2
286 
287 static SecMap sm_distinguished[3];
288 
is_distinguished_sm(SecMap * sm)289 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
290    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
291 }
292 
293 // Forward declaration
294 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
295 
296 /* dist_sm points to one of our three distinguished secondaries.  Make
297    a copy of it so that we can write to it.
298 */
copy_for_writing(SecMap * dist_sm)299 static SecMap* copy_for_writing ( SecMap* dist_sm )
300 {
301    SecMap* new_sm;
302    tl_assert(dist_sm == &sm_distinguished[0]
303           || dist_sm == &sm_distinguished[1]
304           || dist_sm == &sm_distinguished[2]);
305 
306    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
307    if (new_sm == NULL)
308       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
309                                    sizeof(SecMap) );
310    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
311    update_SM_counts(dist_sm, new_sm);
312    return new_sm;
313 }
314 
315 /* --------------- Stats --------------- */
316 
317 static Int   n_issued_SMs      = 0;
318 static Int   n_deissued_SMs    = 0;
319 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
320 static Int   n_undefined_SMs   = 0;
321 static Int   n_defined_SMs     = 0;
322 static Int   n_non_DSM_SMs     = 0;
323 static Int   max_noaccess_SMs  = 0;
324 static Int   max_undefined_SMs = 0;
325 static Int   max_defined_SMs   = 0;
326 static Int   max_non_DSM_SMs   = 0;
327 
328 /* # searches initiated in auxmap_L1, and # base cmps required */
329 static ULong n_auxmap_L1_searches  = 0;
330 static ULong n_auxmap_L1_cmps      = 0;
331 /* # of searches that missed in auxmap_L1 and therefore had to
332    be handed to auxmap_L2. And the number of nodes inserted. */
333 static ULong n_auxmap_L2_searches  = 0;
334 static ULong n_auxmap_L2_nodes     = 0;
335 
336 static Int   n_sanity_cheap     = 0;
337 static Int   n_sanity_expensive = 0;
338 
339 static Int   n_secVBit_nodes   = 0;
340 static Int   max_secVBit_nodes = 0;
341 
update_SM_counts(SecMap * oldSM,SecMap * newSM)342 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
343 {
344    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
345    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
346    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
347    else                                                  { n_non_DSM_SMs  --;
348                                                            n_deissued_SMs ++; }
349 
350    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
351    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
352    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
353    else                                                  { n_non_DSM_SMs  ++;
354                                                            n_issued_SMs   ++; }
355 
356    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
357    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
358    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
359    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
360 }
361 
362 /* --------------- Primary maps --------------- */
363 
364 /* The main primary map.  This covers some initial part of the address
365    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
366    handled using the auxiliary primary map.
367 */
368 static SecMap* primary_map[N_PRIMARY_MAP];
369 
370 
371 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
372    value, and sm points at the relevant secondary map.  As with the
373    main primary map, the secondary may be either a real secondary, or
374    one of the three distinguished secondaries.  DO NOT CHANGE THIS
375    LAYOUT: the first word has to be the key for OSet fast lookups.
376 */
377 typedef
378    struct {
379       Addr    base;
380       SecMap* sm;
381    }
382    AuxMapEnt;
383 
384 /* Tunable parameter: How big is the L1 queue? */
385 #define N_AUXMAP_L1 24
386 
387 /* Tunable parameter: How far along the L1 queue to insert
388    entries resulting from L2 lookups? */
389 #define AUXMAP_L1_INSERT_IX 12
390 
391 static struct {
392           Addr       base;
393           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
394        }
395        auxmap_L1[N_AUXMAP_L1];
396 
397 static OSet* auxmap_L2 = NULL;
398 
init_auxmap_L1_L2(void)399 static void init_auxmap_L1_L2 ( void )
400 {
401    Int i;
402    for (i = 0; i < N_AUXMAP_L1; i++) {
403       auxmap_L1[i].base = 0;
404       auxmap_L1[i].ent  = NULL;
405    }
406 
407    tl_assert(0 == offsetof(AuxMapEnt,base));
408    tl_assert(sizeof(Addr) == sizeof(void*));
409    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
410                                     /*fastCmp*/ NULL,
411                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
412 }
413 
414 /* Check representation invariants; if OK return NULL; else a
415    descriptive bit of text.  Also return the number of
416    non-distinguished secondary maps referred to from the auxiliary
417    primary maps. */
418 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)419 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
420 {
421    Word i, j;
422    /* On a 32-bit platform, the L2 and L1 tables should
423       both remain empty forever.
424 
425       On a 64-bit platform:
426       In the L2 table:
427        all .base & 0xFFFF == 0
428        all .base > MAX_PRIMARY_ADDRESS
429       In the L1 table:
430        all .base & 0xFFFF == 0
431        all (.base > MAX_PRIMARY_ADDRESS
432             .base & 0xFFFF == 0
433             and .ent points to an AuxMapEnt with the same .base)
434            or
435            (.base == 0 and .ent == NULL)
436    */
437    *n_secmaps_found = 0;
438    if (sizeof(void*) == 4) {
439       /* 32-bit platform */
440       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
441          return "32-bit: auxmap_L2 is non-empty";
442       for (i = 0; i < N_AUXMAP_L1; i++)
443         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
444       return "32-bit: auxmap_L1 is non-empty";
445    } else {
446       /* 64-bit platform */
447       UWord elems_seen = 0;
448       AuxMapEnt *elem, *res;
449       AuxMapEnt key;
450       /* L2 table */
451       VG_(OSetGen_ResetIter)(auxmap_L2);
452       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
453          elems_seen++;
454          if (0 != (elem->base & (Addr)0xFFFF))
455             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
456          if (elem->base <= MAX_PRIMARY_ADDRESS)
457             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
458          if (elem->sm == NULL)
459             return "64-bit: .sm in _L2 is NULL";
460          if (!is_distinguished_sm(elem->sm))
461             (*n_secmaps_found)++;
462       }
463       if (elems_seen != n_auxmap_L2_nodes)
464          return "64-bit: disagreement on number of elems in _L2";
465       /* Check L1-L2 correspondence */
466       for (i = 0; i < N_AUXMAP_L1; i++) {
467          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
468             continue;
469          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
470             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
471          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
472             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
473          if (auxmap_L1[i].ent == NULL)
474             return "64-bit: .ent is NULL in auxmap_L1";
475          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
476             return "64-bit: _L1 and _L2 bases are inconsistent";
477          /* Look it up in auxmap_L2. */
478          key.base = auxmap_L1[i].base;
479          key.sm   = 0;
480          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
481          if (res == NULL)
482             return "64-bit: _L1 .base not found in _L2";
483          if (res != auxmap_L1[i].ent)
484             return "64-bit: _L1 .ent disagrees with _L2 entry";
485       }
486       /* Check L1 contains no duplicates */
487       for (i = 0; i < N_AUXMAP_L1; i++) {
488          if (auxmap_L1[i].base == 0)
489             continue;
490 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
491             if (auxmap_L1[j].base == 0)
492                continue;
493             if (auxmap_L1[j].base == auxmap_L1[i].base)
494                return "64-bit: duplicate _L1 .base entries";
495          }
496       }
497    }
498    return NULL; /* ok */
499 }
500 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)501 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
502 {
503    Word i;
504    tl_assert(ent);
505    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
506    for (i = N_AUXMAP_L1-1; i > rank; i--)
507       auxmap_L1[i] = auxmap_L1[i-1];
508    auxmap_L1[rank].base = ent->base;
509    auxmap_L1[rank].ent  = ent;
510 }
511 
maybe_find_in_auxmap(Addr a)512 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
513 {
514    AuxMapEnt  key;
515    AuxMapEnt* res;
516    Word       i;
517 
518    tl_assert(a > MAX_PRIMARY_ADDRESS);
519    a &= ~(Addr)0xFFFF;
520 
521    /* First search the front-cache, which is a self-organising
522       list containing the most popular entries. */
523 
524    if (LIKELY(auxmap_L1[0].base == a))
525       return auxmap_L1[0].ent;
526    if (LIKELY(auxmap_L1[1].base == a)) {
527       Addr       t_base = auxmap_L1[0].base;
528       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
529       auxmap_L1[0].base = auxmap_L1[1].base;
530       auxmap_L1[0].ent  = auxmap_L1[1].ent;
531       auxmap_L1[1].base = t_base;
532       auxmap_L1[1].ent  = t_ent;
533       return auxmap_L1[0].ent;
534    }
535 
536    n_auxmap_L1_searches++;
537 
538    for (i = 0; i < N_AUXMAP_L1; i++) {
539       if (auxmap_L1[i].base == a) {
540          break;
541       }
542    }
543    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
544 
545    n_auxmap_L1_cmps += (ULong)(i+1);
546 
547    if (i < N_AUXMAP_L1) {
548       if (i > 0) {
549          Addr       t_base = auxmap_L1[i-1].base;
550          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
551          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
552          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
553          auxmap_L1[i-0].base = t_base;
554          auxmap_L1[i-0].ent  = t_ent;
555          i--;
556       }
557       return auxmap_L1[i].ent;
558    }
559 
560    n_auxmap_L2_searches++;
561 
562    /* First see if we already have it. */
563    key.base = a;
564    key.sm   = 0;
565 
566    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
567    if (res)
568       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
569    return res;
570 }
571 
find_or_alloc_in_auxmap(Addr a)572 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
573 {
574    AuxMapEnt *nyu, *res;
575 
576    /* First see if we already have it. */
577    res = maybe_find_in_auxmap( a );
578    if (LIKELY(res))
579       return res;
580 
581    /* Ok, there's no entry in the secondary map, so we'll have
582       to allocate one. */
583    a &= ~(Addr)0xFFFF;
584 
585    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
586    nyu->base = a;
587    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
588    VG_(OSetGen_Insert)( auxmap_L2, nyu );
589    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
590    n_auxmap_L2_nodes++;
591    return nyu;
592 }
593 
594 /* --------------- SecMap fundamentals --------------- */
595 
596 // In all these, 'low' means it's definitely in the main primary map,
597 // 'high' means it's definitely in the auxiliary table.
598 
get_secmap_low_ptr(Addr a)599 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
600 {
601    UWord pm_off = a >> 16;
602 #  if VG_DEBUG_MEMORY >= 1
603    tl_assert(pm_off < N_PRIMARY_MAP);
604 #  endif
605    return &primary_map[ pm_off ];
606 }
607 
get_secmap_high_ptr(Addr a)608 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
609 {
610    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
611    return &am->sm;
612 }
613 
get_secmap_ptr(Addr a)614 static INLINE SecMap** get_secmap_ptr ( Addr a )
615 {
616    return ( a <= MAX_PRIMARY_ADDRESS
617           ? get_secmap_low_ptr(a)
618           : get_secmap_high_ptr(a));
619 }
620 
get_secmap_for_reading_low(Addr a)621 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
622 {
623    return *get_secmap_low_ptr(a);
624 }
625 
get_secmap_for_reading_high(Addr a)626 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
627 {
628    return *get_secmap_high_ptr(a);
629 }
630 
get_secmap_for_writing_low(Addr a)631 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
632 {
633    SecMap** p = get_secmap_low_ptr(a);
634    if (UNLIKELY(is_distinguished_sm(*p)))
635       *p = copy_for_writing(*p);
636    return *p;
637 }
638 
get_secmap_for_writing_high(Addr a)639 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
640 {
641    SecMap** p = get_secmap_high_ptr(a);
642    if (UNLIKELY(is_distinguished_sm(*p)))
643       *p = copy_for_writing(*p);
644    return *p;
645 }
646 
647 /* Produce the secmap for 'a', either from the primary map or by
648    ensuring there is an entry for it in the aux primary map.  The
649    secmap may be a distinguished one as the caller will only want to
650    be able to read it.
651 */
get_secmap_for_reading(Addr a)652 static INLINE SecMap* get_secmap_for_reading ( Addr a )
653 {
654    return ( a <= MAX_PRIMARY_ADDRESS
655           ? get_secmap_for_reading_low (a)
656           : get_secmap_for_reading_high(a) );
657 }
658 
659 /* Produce the secmap for 'a', either from the primary map or by
660    ensuring there is an entry for it in the aux primary map.  The
661    secmap may not be a distinguished one, since the caller will want
662    to be able to write it.  If it is a distinguished secondary, make a
663    writable copy of it, install it, and return the copy instead.  (COW
664    semantics).
665 */
get_secmap_for_writing(Addr a)666 static INLINE SecMap* get_secmap_for_writing ( Addr a )
667 {
668    return ( a <= MAX_PRIMARY_ADDRESS
669           ? get_secmap_for_writing_low (a)
670           : get_secmap_for_writing_high(a) );
671 }
672 
673 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
674    allocate one if one doesn't already exist.  This is used by the
675    leak checker.
676 */
maybe_get_secmap_for(Addr a)677 static SecMap* maybe_get_secmap_for ( Addr a )
678 {
679    if (a <= MAX_PRIMARY_ADDRESS) {
680       return get_secmap_for_reading_low(a);
681    } else {
682       AuxMapEnt* am = maybe_find_in_auxmap(a);
683       return am ? am->sm : NULL;
684    }
685 }
686 
687 /* --------------- Fundamental functions --------------- */
688 
689 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)690 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
691 {
692    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
693    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
694    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
695 }
696 
697 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)698 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
699 {
700    UInt shift;
701    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
702    shift     =  (a & 2)   << 1;        // shift by 0 or 4
703    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
704    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
705 }
706 
707 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)708 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
709 {
710    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
711    vabits8 >>= shift;                  // shift the two bits to the bottom
712    return 0x3 & vabits8;               // mask out the rest
713 }
714 
715 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)716 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
717 {
718    UInt shift;
719    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
720    shift = (a & 2) << 1;               // shift by 0 or 4
721    vabits8 >>= shift;                  // shift the four bits to the bottom
722    return 0xf & vabits8;               // mask out the rest
723 }
724 
725 // Note that these four are only used in slow cases.  The fast cases do
726 // clever things like combine the auxmap check (in
727 // get_secmap_{read,writ}able) with alignment checks.
728 
729 // *** WARNING! ***
730 // Any time this function is called, if it is possible that vabits2
731 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
732 // sec-V-bits table must also be set!
733 static INLINE
set_vabits2(Addr a,UChar vabits2)734 void set_vabits2 ( Addr a, UChar vabits2 )
735 {
736    SecMap* sm       = get_secmap_for_writing(a);
737    UWord   sm_off   = SM_OFF(a);
738    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
739 }
740 
741 static INLINE
get_vabits2(Addr a)742 UChar get_vabits2 ( Addr a )
743 {
744    SecMap* sm       = get_secmap_for_reading(a);
745    UWord   sm_off   = SM_OFF(a);
746    UChar   vabits8  = sm->vabits8[sm_off];
747    return extract_vabits2_from_vabits8(a, vabits8);
748 }
749 
750 // *** WARNING! ***
751 // Any time this function is called, if it is possible that any of the
752 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
753 // corresponding entry(s) in the sec-V-bits table must also be set!
754 static INLINE
get_vabits8_for_aligned_word32(Addr a)755 UChar get_vabits8_for_aligned_word32 ( Addr a )
756 {
757    SecMap* sm       = get_secmap_for_reading(a);
758    UWord   sm_off   = SM_OFF(a);
759    UChar   vabits8  = sm->vabits8[sm_off];
760    return vabits8;
761 }
762 
763 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)764 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
765 {
766    SecMap* sm       = get_secmap_for_writing(a);
767    UWord   sm_off   = SM_OFF(a);
768    sm->vabits8[sm_off] = vabits8;
769 }
770 
771 
772 // Forward declarations
773 static UWord get_sec_vbits8(Addr a);
774 static void  set_sec_vbits8(Addr a, UWord vbits8);
775 
776 // Returns False if there was an addressability error.
777 static INLINE
set_vbits8(Addr a,UChar vbits8)778 Bool set_vbits8 ( Addr a, UChar vbits8 )
779 {
780    Bool  ok      = True;
781    UChar vabits2 = get_vabits2(a);
782    if ( VA_BITS2_NOACCESS != vabits2 ) {
783       // Addressable.  Convert in-register format to in-memory format.
784       // Also remove any existing sec V bit entry for the byte if no
785       // longer necessary.
786       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
787       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
788       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
789                                                 set_sec_vbits8(a, vbits8);  }
790       set_vabits2(a, vabits2);
791 
792    } else {
793       // Unaddressable!  Do nothing -- when writing to unaddressable
794       // memory it acts as a black hole, and the V bits can never be seen
795       // again.  So we don't have to write them at all.
796       ok = False;
797    }
798    return ok;
799 }
800 
801 // Returns False if there was an addressability error.  In that case, we put
802 // all defined bits into vbits8.
803 static INLINE
get_vbits8(Addr a,UChar * vbits8)804 Bool get_vbits8 ( Addr a, UChar* vbits8 )
805 {
806    Bool  ok      = True;
807    UChar vabits2 = get_vabits2(a);
808 
809    // Convert the in-memory format to in-register format.
810    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
811    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
812    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
813       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
814       ok = False;
815    } else {
816       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
817       *vbits8 = get_sec_vbits8(a);
818    }
819    return ok;
820 }
821 
822 
823 /* --------------- Secondary V bit table ------------ */
824 
825 // This table holds the full V bit pattern for partially-defined bytes
826 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
827 // memory.
828 //
829 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
830 // then overwrite the same address with a fully defined byte, the sec-V-bit
831 // node will not necessarily be removed.  This is because checking for
832 // whether removal is necessary would slow down the fast paths.
833 //
834 // To avoid the stale nodes building up too much, we periodically (once the
835 // table reaches a certain size) garbage collect (GC) the table by
836 // traversing it and evicting any nodes not having PDB.
837 // If more than a certain proportion of nodes survived, we increase the
838 // table size so that GCs occur less often.
839 //
840 // This policy is designed to avoid bad table bloat in the worst case where
841 // a program creates huge numbers of stale PDBs -- we would get this bloat
842 // if we had no GC -- while handling well the case where a node becomes
843 // stale but shortly afterwards is rewritten with a PDB and so becomes
844 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
845 // remove all stale nodes as soon as possible, we just end up re-adding a
846 // lot of them in later again.  The "sufficiently stale" approach avoids
847 // this.  (If a program has many live PDBs, performance will just suck,
848 // there's no way around that.)
849 //
850 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
851 // holding on to stale entries for 2 GCs before discarding them can lead
852 // to massive space leaks.  So we're changing to an arrangement where
853 // lines are evicted as soon as they are observed to be stale during a
854 // GC.  This also has a side benefit of allowing the sufficiently_stale
855 // field to be removed from the SecVBitNode struct, reducing its size by
856 // 8 bytes, which is a substantial space saving considering that the
857 // struct was previously 32 or so bytes, on a 64 bit target.
858 //
859 // In order to try and mitigate the problem that the "sufficiently stale"
860 // heuristic was designed to avoid, the table size is allowed to drift
861 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
862 // means that nodes will exist in the table longer on average, and hopefully
863 // will be deleted and re-added less frequently.
864 //
865 // The previous scaling up mechanism (now called STEPUP) is retained:
866 // if residency exceeds 50%, the table is scaled up, although by a
867 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
868 // frequency of GCs when there are many PDBs at reduces the tendency of
869 // stale PDBs to reside for long periods in the table.
870 
871 static OSet* secVBitTable;
872 
873 // Stats
874 static ULong sec_vbits_new_nodes = 0;
875 static ULong sec_vbits_updates   = 0;
876 
877 // This must be a power of two;  this is checked in mc_pre_clo_init().
878 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
879 // a larger address range) they take more space but we can get multiple
880 // partially-defined bytes in one if they are close to each other, reducing
881 // the number of total nodes.  In practice sometimes they are clustered (eg.
882 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
883 // row), but often not.  So we choose something intermediate.
884 #define BYTES_PER_SEC_VBIT_NODE     16
885 
886 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
887 // more than this many nodes survive a GC.
888 #define STEPUP_SURVIVOR_PROPORTION  0.5
889 #define STEPUP_GROWTH_FACTOR        1.414213562
890 
891 // If the above heuristic doesn't apply, then we may make the table
892 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
893 // this many nodes survive a GC, _and_ the total table size does
894 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
895 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
896 // effectively although gradually reduces residency and increases time
897 // between GCs for programs with small numbers of PDBs.  The 80000 limit
898 // effectively limits the table size to around 2MB for programs with
899 // small numbers of PDBs, whilst giving a reasonably long lifetime to
900 // entries, to try and reduce the costs resulting from deleting and
901 // re-adding of entries.
902 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
903 #define DRIFTUP_GROWTH_FACTOR       1.015
904 #define DRIFTUP_MAX_SIZE            80000
905 
906 // We GC the table when it gets this many nodes in it, ie. it's effectively
907 // the table size.  It can change.
908 static Int  secVBitLimit = 1000;
909 
910 // The number of GCs done, used to age sec-V-bit nodes for eviction.
911 // Because it's unsigned, wrapping doesn't matter -- the right answer will
912 // come out anyway.
913 static UInt GCs_done = 0;
914 
915 typedef
916    struct {
917       Addr  a;
918       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
919    }
920    SecVBitNode;
921 
createSecVBitTable(void)922 static OSet* createSecVBitTable(void)
923 {
924    OSet* newSecVBitTable;
925    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
926       ( offsetof(SecVBitNode, a),
927         NULL, // use fast comparisons
928         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
929         VG_(free),
930         1000,
931         sizeof(SecVBitNode));
932    return newSecVBitTable;
933 }
934 
gcSecVBitTable(void)935 static void gcSecVBitTable(void)
936 {
937    OSet*        secVBitTable2;
938    SecVBitNode* n;
939    Int          i, n_nodes = 0, n_survivors = 0;
940 
941    GCs_done++;
942 
943    // Create the new table.
944    secVBitTable2 = createSecVBitTable();
945 
946    // Traverse the table, moving fresh nodes into the new table.
947    VG_(OSetGen_ResetIter)(secVBitTable);
948    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
949       // Keep node if any of its bytes are non-stale.  Using
950       // get_vabits2() for the lookup is not very efficient, but I don't
951       // think it matters.
952       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
953          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
954             // Found a non-stale byte, so keep =>
955             // Insert a copy of the node into the new table.
956             SecVBitNode* n2 =
957                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
958             *n2 = *n;
959             VG_(OSetGen_Insert)(secVBitTable2, n2);
960             break;
961          }
962       }
963    }
964 
965    // Get the before and after sizes.
966    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
967    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
968 
969    // Destroy the old table, and put the new one in its place.
970    VG_(OSetGen_Destroy)(secVBitTable);
971    secVBitTable = secVBitTable2;
972 
973    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
974       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
975                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
976    }
977 
978    // Increase table size if necessary.
979    if ((Double)n_survivors
980        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
981       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
982       if (VG_(clo_verbosity) > 1)
983          VG_(message)(Vg_DebugMsg,
984                       "memcheck GC: %d new table size (stepup)\n",
985                       secVBitLimit);
986    }
987    else
988    if (secVBitLimit < DRIFTUP_MAX_SIZE
989        && (Double)n_survivors
990           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
991       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
992       if (VG_(clo_verbosity) > 1)
993          VG_(message)(Vg_DebugMsg,
994                       "memcheck GC: %d new table size (driftup)\n",
995                       secVBitLimit);
996    }
997 }
998 
get_sec_vbits8(Addr a)999 static UWord get_sec_vbits8(Addr a)
1000 {
1001    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1002    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
1003    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1004    UChar        vbits8;
1005    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1006    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1007    // make it to the secondary V bits table.
1008    vbits8 = n->vbits8[amod];
1009    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1010    return vbits8;
1011 }
1012 
set_sec_vbits8(Addr a,UWord vbits8)1013 static void set_sec_vbits8(Addr a, UWord vbits8)
1014 {
1015    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1016    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1017    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1018    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1019    // make it to the secondary V bits table.
1020    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1021    if (n) {
1022       n->vbits8[amod] = vbits8;     // update
1023       sec_vbits_updates++;
1024    } else {
1025       // Do a table GC if necessary.  Nb: do this before creating and
1026       // inserting the new node, to avoid erroneously GC'ing the new node.
1027       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1028          gcSecVBitTable();
1029       }
1030 
1031       // New node:  assign the specific byte, make the rest invalid (they
1032       // should never be read as-is, but be cautious).
1033       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1034       n->a            = aAligned;
1035       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1036          n->vbits8[i] = V_BITS8_UNDEFINED;
1037       }
1038       n->vbits8[amod] = vbits8;
1039 
1040       // Insert the new node.
1041       VG_(OSetGen_Insert)(secVBitTable, n);
1042       sec_vbits_new_nodes++;
1043 
1044       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1045       if (n_secVBit_nodes > max_secVBit_nodes)
1046          max_secVBit_nodes = n_secVBit_nodes;
1047    }
1048 }
1049 
1050 /* --------------- Endianness helpers --------------- */
1051 
1052 /* Returns the offset in memory of the byteno-th most significant byte
1053    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1054 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1055                                     UWord byteno ) {
1056    return bigendian ? (wordszB-1-byteno) : byteno;
1057 }
1058 
1059 
1060 /* --------------- Ignored address ranges --------------- */
1061 
1062 /* Denotes the address-error-reportability status for address ranges:
1063    IAR_NotIgnored:  the usual case -- report errors in this range
1064    IAR_CommandLine: don't report errors -- from command line setting
1065    IAR_ClientReq:   don't report errors -- from client request
1066 */
1067 typedef
1068    enum { IAR_INVALID=99,
1069           IAR_NotIgnored,
1070           IAR_CommandLine,
1071           IAR_ClientReq }
1072    IARKind;
1073 
showIARKind(IARKind iark)1074 static const HChar* showIARKind ( IARKind iark )
1075 {
1076    switch (iark) {
1077       case IAR_INVALID:     return "INVALID";
1078       case IAR_NotIgnored:  return "NotIgnored";
1079       case IAR_CommandLine: return "CommandLine";
1080       case IAR_ClientReq:   return "ClientReq";
1081       default:              return "???";
1082    }
1083 }
1084 
1085 // RangeMap<IARKind>
1086 static RangeMap* gIgnoredAddressRanges = NULL;
1087 
init_gIgnoredAddressRanges(void)1088 static void init_gIgnoredAddressRanges ( void )
1089 {
1090    if (LIKELY(gIgnoredAddressRanges != NULL))
1091       return;
1092    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1093                                              VG_(free), IAR_NotIgnored );
1094 }
1095 
MC_(in_ignored_range)1096 Bool MC_(in_ignored_range) ( Addr a )
1097 {
1098    if (LIKELY(gIgnoredAddressRanges == NULL))
1099       return False;
1100    UWord how     = IAR_INVALID;
1101    UWord key_min = ~(UWord)0;
1102    UWord key_max =  (UWord)0;
1103    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1104    tl_assert(key_min <= a && a <= key_max);
1105    switch (how) {
1106       case IAR_NotIgnored:  return False;
1107       case IAR_CommandLine: return True;
1108       case IAR_ClientReq:   return True;
1109       default: break; /* invalid */
1110    }
1111    VG_(tool_panic)("MC_(in_ignore_range)");
1112    /*NOTREACHED*/
1113 }
1114 
1115 /* Parse two Addr separated by a dash, or fail. */
1116 
parse_range(const HChar ** ppc,Addr * result1,Addr * result2)1117 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1118 {
1119    Bool ok = VG_(parse_Addr) (ppc, result1);
1120    if (!ok)
1121       return False;
1122    if (**ppc != '-')
1123       return False;
1124    (*ppc)++;
1125    ok = VG_(parse_Addr) (ppc, result2);
1126    if (!ok)
1127       return False;
1128    return True;
1129 }
1130 
1131 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1132    fail.  If they are valid, add them to the global set of ignored
1133    ranges. */
parse_ignore_ranges(const HChar * str0)1134 static Bool parse_ignore_ranges ( const HChar* str0 )
1135 {
1136    init_gIgnoredAddressRanges();
1137    const HChar*  str = str0;
1138    const HChar** ppc = &str;
1139    while (1) {
1140       Addr start = ~(Addr)0;
1141       Addr end   = (Addr)0;
1142       Bool ok    = parse_range(ppc, &start, &end);
1143       if (!ok)
1144          return False;
1145       if (start > end)
1146          return False;
1147       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1148       if (**ppc == 0)
1149          return True;
1150       if (**ppc != ',')
1151          return False;
1152       (*ppc)++;
1153    }
1154    /*NOTREACHED*/
1155    return False;
1156 }
1157 
1158 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1159 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1160 {
1161    init_gIgnoredAddressRanges();
1162    const Bool verbose = (VG_(clo_verbosity) > 1);
1163    if (len == 0) {
1164       return False;
1165    }
1166    if (addRange) {
1167       VG_(bindRangeMap)(gIgnoredAddressRanges,
1168                         start, start+len-1, IAR_ClientReq);
1169       if (verbose)
1170          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1171                    (void*)start, (void*)(start+len-1));
1172    } else {
1173       VG_(bindRangeMap)(gIgnoredAddressRanges,
1174                         start, start+len-1, IAR_NotIgnored);
1175       if (verbose)
1176          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1177                    (void*)start, (void*)(start+len-1));
1178    }
1179    if (verbose) {
1180       VG_(dmsg)("memcheck:   now have %u ranges:\n",
1181                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1182       UInt i;
1183       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1184          UWord val     = IAR_INVALID;
1185          UWord key_min = ~(UWord)0;
1186          UWord key_max = (UWord)0;
1187          VG_(indexRangeMap)( &key_min, &key_max, &val,
1188                              gIgnoredAddressRanges, i );
1189          VG_(dmsg)("memcheck:      [%u]  %016lx-%016lx  %s\n",
1190                    i, key_min, key_max, showIARKind(val));
1191       }
1192    }
1193    return True;
1194 }
1195 
1196 
1197 /* --------------- Load/store slow cases. --------------- */
1198 
1199 static
1200 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1201 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1202                                 Addr a, SizeT nBits, Bool bigendian )
1203 {
1204    ULong  pessim[4];     /* only used when p-l-ok=yes */
1205    SSizeT szB            = nBits / 8;
1206    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
1207    SSizeT i, j;          /* Must be signed. */
1208    SizeT  n_addrs_bad = 0;
1209    Addr   ai;
1210    UChar  vbits8;
1211    Bool   ok;
1212 
1213    /* Code below assumes load size is a power of two and at least 64
1214       bits. */
1215    tl_assert((szB & (szB-1)) == 0 && szL > 0);
1216 
1217    /* If this triggers, you probably just need to increase the size of
1218       the pessim array. */
1219    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1220 
1221    for (j = 0; j < szL; j++) {
1222       pessim[j] = V_BITS64_DEFINED;
1223       res[j] = V_BITS64_UNDEFINED;
1224    }
1225 
1226    /* Make up a result V word, which contains the loaded data for
1227       valid addresses and Defined for invalid addresses.  Iterate over
1228       the bytes in the word, from the most significant down to the
1229       least.  The vbits to return are calculated into vbits128.  Also
1230       compute the pessimising value to be used when
1231       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1232       info can be gleaned from the pessim array) but is used as a
1233       cross-check. */
1234    for (j = szL-1; j >= 0; j--) {
1235       ULong vbits64    = V_BITS64_UNDEFINED;
1236       ULong pessim64   = V_BITS64_DEFINED;
1237       UWord long_index = byte_offset_w(szL, bigendian, j);
1238       for (i = 8-1; i >= 0; i--) {
1239          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1240          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1241          ok = get_vbits8(ai, &vbits8);
1242          vbits64 <<= 8;
1243          vbits64 |= vbits8;
1244          if (!ok) n_addrs_bad++;
1245          pessim64 <<= 8;
1246          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1247       }
1248       res[long_index] = vbits64;
1249       pessim[long_index] = pessim64;
1250    }
1251 
1252    /* In the common case, all the addresses involved are valid, so we
1253       just return the computed V bits and have done. */
1254    if (LIKELY(n_addrs_bad == 0))
1255       return;
1256 
1257    /* If there's no possibility of getting a partial-loads-ok
1258       exemption, report the error and quit. */
1259    if (!MC_(clo_partial_loads_ok)) {
1260       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1261       return;
1262    }
1263 
1264    /* The partial-loads-ok excemption might apply.  Find out if it
1265       does.  If so, don't report an addressing error, but do return
1266       Undefined for the bytes that are out of range, so as to avoid
1267       false negatives.  If it doesn't apply, just report an addressing
1268       error in the usual way. */
1269 
1270    /* Some code steps along byte strings in aligned chunks
1271       even when there is only a partially defined word at the end (eg,
1272       optimised strlen).  This is allowed by the memory model of
1273       modern machines, since an aligned load cannot span two pages and
1274       thus cannot "partially fault".
1275 
1276       Therefore, a load from a partially-addressible place is allowed
1277       if all of the following hold:
1278       - the command-line flag is set [by default, it isn't]
1279       - it's an aligned load
1280       - at least one of the addresses in the word *is* valid
1281 
1282       Since this suppresses the addressing error, we avoid false
1283       negatives by marking bytes undefined when they come from an
1284       invalid address.
1285    */
1286 
1287    /* "at least one of the addresses is invalid" */
1288    ok = False;
1289    for (j = 0; j < szL; j++)
1290       ok |= pessim[j] != V_BITS64_DEFINED;
1291    tl_assert(ok);
1292 
1293    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1294       /* Exemption applies.  Use the previously computed pessimising
1295          value and return the combined result, but don't flag an
1296          addressing error.  The pessimising value is Defined for valid
1297          addresses and Undefined for invalid addresses. */
1298       /* for assumption that doing bitwise or implements UifU */
1299       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1300       /* (really need "UifU" here...)
1301          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
1302       for (j = szL-1; j >= 0; j--)
1303          res[j] |= pessim[j];
1304       return;
1305    }
1306 
1307    /* Exemption doesn't apply.  Flag an addressing error in the normal
1308       way. */
1309    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1310 }
1311 
1312 
1313 static
1314 __attribute__((noinline))
1315 __attribute__((used))
1316 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1317                  this function may get called from hand written assembly. */
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1318 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1319 {
1320    PROF_EVENT(MCPE_LOADVN_SLOW);
1321 
1322    /* ------------ BEGIN semi-fast cases ------------ */
1323    /* These deal quickly-ish with the common auxiliary primary map
1324       cases on 64-bit platforms.  Are merely a speedup hack; can be
1325       omitted without loss of correctness/functionality.  Note that in
1326       both cases the "sizeof(void*) == 8" causes these cases to be
1327       folded out by compilers on 32-bit platforms.  These are derived
1328       from LOADV64 and LOADV32.
1329    */
1330    if (LIKELY(sizeof(void*) == 8
1331                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1332       SecMap* sm       = get_secmap_for_reading(a);
1333       UWord   sm_off16 = SM_OFF_16(a);
1334       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1335       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1336          return V_BITS64_DEFINED;
1337       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1338          return V_BITS64_UNDEFINED;
1339       /* else fall into the slow case */
1340    }
1341    if (LIKELY(sizeof(void*) == 8
1342                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1343       SecMap* sm = get_secmap_for_reading(a);
1344       UWord sm_off = SM_OFF(a);
1345       UWord vabits8 = sm->vabits8[sm_off];
1346       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1347          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1348       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1349          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1350       /* else fall into slow case */
1351    }
1352    /* ------------ END semi-fast cases ------------ */
1353 
1354    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1355    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1356    SSizeT szB         = nBits / 8;
1357    SSizeT i;          /* Must be signed. */
1358    SizeT  n_addrs_bad = 0;
1359    Addr   ai;
1360    UChar  vbits8;
1361    Bool   ok;
1362 
1363    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1364 
1365    /* Make up a 64-bit result V word, which contains the loaded data
1366       for valid addresses and Defined for invalid addresses.  Iterate
1367       over the bytes in the word, from the most significant down to
1368       the least.  The vbits to return are calculated into vbits64.
1369       Also compute the pessimising value to be used when
1370       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1371       info can be gleaned from pessim64) but is used as a
1372       cross-check. */
1373    for (i = szB-1; i >= 0; i--) {
1374       PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1375       ai = a + byte_offset_w(szB, bigendian, i);
1376       ok = get_vbits8(ai, &vbits8);
1377       vbits64 <<= 8;
1378       vbits64 |= vbits8;
1379       if (!ok) n_addrs_bad++;
1380       pessim64 <<= 8;
1381       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1382    }
1383 
1384    /* In the common case, all the addresses involved are valid, so we
1385       just return the computed V bits and have done. */
1386    if (LIKELY(n_addrs_bad == 0))
1387       return vbits64;
1388 
1389    /* If there's no possibility of getting a partial-loads-ok
1390       exemption, report the error and quit. */
1391    if (!MC_(clo_partial_loads_ok)) {
1392       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1393       return vbits64;
1394    }
1395 
1396    /* The partial-loads-ok excemption might apply.  Find out if it
1397       does.  If so, don't report an addressing error, but do return
1398       Undefined for the bytes that are out of range, so as to avoid
1399       false negatives.  If it doesn't apply, just report an addressing
1400       error in the usual way. */
1401 
1402    /* Some code steps along byte strings in aligned word-sized chunks
1403       even when there is only a partially defined word at the end (eg,
1404       optimised strlen).  This is allowed by the memory model of
1405       modern machines, since an aligned load cannot span two pages and
1406       thus cannot "partially fault".  Despite such behaviour being
1407       declared undefined by ANSI C/C++.
1408 
1409       Therefore, a load from a partially-addressible place is allowed
1410       if all of the following hold:
1411       - the command-line flag is set [by default, it isn't]
1412       - it's a word-sized, word-aligned load
1413       - at least one of the addresses in the word *is* valid
1414 
1415       Since this suppresses the addressing error, we avoid false
1416       negatives by marking bytes undefined when they come from an
1417       invalid address.
1418    */
1419 
1420    /* "at least one of the addresses is invalid" */
1421    tl_assert(pessim64 != V_BITS64_DEFINED);
1422 
1423    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1424        && n_addrs_bad < VG_WORDSIZE) {
1425       /* Exemption applies.  Use the previously computed pessimising
1426          value for vbits64 and return the combined result, but don't
1427          flag an addressing error.  The pessimising value is Defined
1428          for valid addresses and Undefined for invalid addresses. */
1429       /* for assumption that doing bitwise or implements UifU */
1430       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1431       /* (really need "UifU" here...)
1432          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1433       vbits64 |= pessim64;
1434       return vbits64;
1435    }
1436 
1437    /* Also, in appears that gcc generates string-stepping code in
1438       32-bit chunks on 64 bit platforms.  So, also grant an exception
1439       for this case.  Note that the first clause of the conditional
1440       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1441       will get folded out in 32 bit builds. */
1442    if (VG_WORDSIZE == 8
1443        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1444       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1445       /* (really need "UifU" here...)
1446          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1447       vbits64 |= pessim64;
1448       /* Mark the upper 32 bits as undefined, just to be on the safe
1449          side. */
1450       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1451       return vbits64;
1452    }
1453 
1454    /* Exemption doesn't apply.  Flag an addressing error in the normal
1455       way. */
1456    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1457 
1458    return vbits64;
1459 }
1460 
1461 
1462 static
1463 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1464 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1465 {
1466    SizeT szB = nBits / 8;
1467    SizeT i, n_addrs_bad = 0;
1468    UChar vbits8;
1469    Addr  ai;
1470    Bool  ok;
1471 
1472    PROF_EVENT(MCPE_STOREVN_SLOW);
1473 
1474    /* ------------ BEGIN semi-fast cases ------------ */
1475    /* These deal quickly-ish with the common auxiliary primary map
1476       cases on 64-bit platforms.  Are merely a speedup hack; can be
1477       omitted without loss of correctness/functionality.  Note that in
1478       both cases the "sizeof(void*) == 8" causes these cases to be
1479       folded out by compilers on 32-bit platforms.  The logic below
1480       is somewhat similar to some cases extensively commented in
1481       MC_(helperc_STOREV8).
1482    */
1483    if (LIKELY(sizeof(void*) == 8
1484                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1485       SecMap* sm       = get_secmap_for_reading(a);
1486       UWord   sm_off16 = SM_OFF_16(a);
1487       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1488       if (LIKELY( !is_distinguished_sm(sm) &&
1489                           (VA_BITS16_DEFINED   == vabits16 ||
1490                            VA_BITS16_UNDEFINED == vabits16) )) {
1491          /* Handle common case quickly: a is suitably aligned, */
1492          /* is mapped, and is addressible. */
1493          // Convert full V-bits in register to compact 2-bit form.
1494          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1495             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1496             return;
1497          } else if (V_BITS64_UNDEFINED == vbytes) {
1498             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1499             return;
1500          }
1501          /* else fall into the slow case */
1502       }
1503       /* else fall into the slow case */
1504    }
1505    if (LIKELY(sizeof(void*) == 8
1506                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1507       SecMap* sm      = get_secmap_for_reading(a);
1508       UWord   sm_off  = SM_OFF(a);
1509       UWord   vabits8 = sm->vabits8[sm_off];
1510       if (LIKELY( !is_distinguished_sm(sm) &&
1511                           (VA_BITS8_DEFINED   == vabits8 ||
1512                            VA_BITS8_UNDEFINED == vabits8) )) {
1513          /* Handle common case quickly: a is suitably aligned, */
1514          /* is mapped, and is addressible. */
1515          // Convert full V-bits in register to compact 2-bit form.
1516          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1517             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1518             return;
1519          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1520             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1521             return;
1522          }
1523          /* else fall into the slow case */
1524       }
1525       /* else fall into the slow case */
1526    }
1527    /* ------------ END semi-fast cases ------------ */
1528 
1529    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1530 
1531    /* Dump vbytes in memory, iterating from least to most significant
1532       byte.  At the same time establish addressibility of the location. */
1533    for (i = 0; i < szB; i++) {
1534       PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1535       ai     = a + byte_offset_w(szB, bigendian, i);
1536       vbits8 = vbytes & 0xff;
1537       ok     = set_vbits8(ai, vbits8);
1538       if (!ok) n_addrs_bad++;
1539       vbytes >>= 8;
1540    }
1541 
1542    /* If an address error has happened, report it. */
1543    if (n_addrs_bad > 0)
1544       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1545 }
1546 
1547 
1548 /*------------------------------------------------------------*/
1549 /*--- Setting permissions over address ranges.             ---*/
1550 /*------------------------------------------------------------*/
1551 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1552 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1553                                       UWord dsm_num )
1554 {
1555    UWord    sm_off, sm_off16;
1556    UWord    vabits2 = vabits16 & 0x3;
1557    SizeT    lenA, lenB, len_to_next_secmap;
1558    Addr     aNext;
1559    SecMap*  sm;
1560    SecMap** sm_ptr;
1561    SecMap*  example_dsm;
1562 
1563    PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1564 
1565    /* Check the V+A bits make sense. */
1566    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1567              VA_BITS16_UNDEFINED == vabits16 ||
1568              VA_BITS16_DEFINED   == vabits16);
1569 
1570    // This code should never write PDBs;  ensure this.  (See comment above
1571    // set_vabits2().)
1572    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1573 
1574    if (lenT == 0)
1575       return;
1576 
1577    if (lenT > 256 * 1024 * 1024) {
1578       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1579          const HChar* s = "unknown???";
1580          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1581          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1582          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1583          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1584                                   "large range [0x%lx, 0x%lx) (%s)\n",
1585                                   a, a + lenT, s);
1586       }
1587    }
1588 
1589 #ifndef PERF_FAST_SARP
1590    /*------------------ debug-only case ------------------ */
1591    {
1592       // Endianness doesn't matter here because all bytes are being set to
1593       // the same value.
1594       // Nb: We don't have to worry about updating the sec-V-bits table
1595       // after these set_vabits2() calls because this code never writes
1596       // VA_BITS2_PARTDEFINED values.
1597       SizeT i;
1598       for (i = 0; i < lenT; i++) {
1599          set_vabits2(a + i, vabits2);
1600       }
1601       return;
1602    }
1603 #endif
1604 
1605    /*------------------ standard handling ------------------ */
1606 
1607    /* Get the distinguished secondary that we might want
1608       to use (part of the space-compression scheme). */
1609    example_dsm = &sm_distinguished[dsm_num];
1610 
1611    // We have to handle ranges covering various combinations of partial and
1612    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1613    // Cases marked with a '*' are common.
1614    //
1615    //   TYPE                                             PARTS USED
1616    //   ----                                             ----------
1617    // * one partial sec-map                  (p)         1
1618    // - one whole sec-map                    (P)         2
1619    //
1620    // * two partial sec-maps                 (pp)        1,3
1621    // - one partial, one whole sec-map       (pP)        1,2
1622    // - one whole, one partial sec-map       (Pp)        2,3
1623    // - two whole sec-maps                   (PP)        2,2
1624    //
1625    // * one partial, one whole, one partial  (pPp)       1,2,3
1626    // - one partial, two whole               (pPP)       1,2,2
1627    // - two whole, one partial               (PPp)       2,2,3
1628    // - three whole                          (PPP)       2,2,2
1629    //
1630    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1631    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1632    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1633    // - N whole                              (PP...PP)   2,2...2,3
1634 
1635    // Break up total length (lenT) into two parts:  length in the first
1636    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1637    aNext = start_of_this_sm(a) + SM_SIZE;
1638    len_to_next_secmap = aNext - a;
1639    if ( lenT <= len_to_next_secmap ) {
1640       // Range entirely within one sec-map.  Covers almost all cases.
1641       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1642       lenA = lenT;
1643       lenB = 0;
1644    } else if (is_start_of_sm(a)) {
1645       // Range spans at least one whole sec-map, and starts at the beginning
1646       // of a sec-map; skip to Part 2.
1647       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1648       lenA = 0;
1649       lenB = lenT;
1650       goto part2;
1651    } else {
1652       // Range spans two or more sec-maps, first one is partial.
1653       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1654       lenA = len_to_next_secmap;
1655       lenB = lenT - lenA;
1656    }
1657 
1658    //------------------------------------------------------------------------
1659    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1660    // entirely within a sec_map and this part alone will suffice.  Also,
1661    // doing it this way lets us avoid repeatedly testing for the crossing of
1662    // a sec-map boundary within these loops.
1663    //------------------------------------------------------------------------
1664 
1665    // If it's distinguished, make it undistinguished if necessary.
1666    sm_ptr = get_secmap_ptr(a);
1667    if (is_distinguished_sm(*sm_ptr)) {
1668       if (*sm_ptr == example_dsm) {
1669          // Sec-map already has the V+A bits that we want, so skip.
1670          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1671          a    = aNext;
1672          lenA = 0;
1673       } else {
1674          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1675          *sm_ptr = copy_for_writing(*sm_ptr);
1676       }
1677    }
1678    sm = *sm_ptr;
1679 
1680    // 1 byte steps
1681    while (True) {
1682       if (VG_IS_8_ALIGNED(a)) break;
1683       if (lenA < 1)           break;
1684       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1685       sm_off = SM_OFF(a);
1686       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1687       a    += 1;
1688       lenA -= 1;
1689    }
1690    // 8-aligned, 8 byte steps
1691    while (True) {
1692       if (lenA < 8) break;
1693       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1694       sm_off16 = SM_OFF_16(a);
1695       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1696       a    += 8;
1697       lenA -= 8;
1698    }
1699    // 1 byte steps
1700    while (True) {
1701       if (lenA < 1) break;
1702       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1703       sm_off = SM_OFF(a);
1704       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1705       a    += 1;
1706       lenA -= 1;
1707    }
1708 
1709    // We've finished the first sec-map.  Is that it?
1710    if (lenB == 0)
1711       return;
1712 
1713    //------------------------------------------------------------------------
1714    // Part 2: Fast-set entire sec-maps at a time.
1715    //------------------------------------------------------------------------
1716   part2:
1717    // 64KB-aligned, 64KB steps.
1718    // Nb: we can reach here with lenB < SM_SIZE
1719    tl_assert(0 == lenA);
1720    while (True) {
1721       if (lenB < SM_SIZE) break;
1722       tl_assert(is_start_of_sm(a));
1723       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1724       sm_ptr = get_secmap_ptr(a);
1725       if (!is_distinguished_sm(*sm_ptr)) {
1726          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1727          // Free the non-distinguished sec-map that we're replacing.  This
1728          // case happens moderately often, enough to be worthwhile.
1729          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1730          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1731       }
1732       update_SM_counts(*sm_ptr, example_dsm);
1733       // Make the sec-map entry point to the example DSM
1734       *sm_ptr = example_dsm;
1735       lenB -= SM_SIZE;
1736       a    += SM_SIZE;
1737    }
1738 
1739    // We've finished the whole sec-maps.  Is that it?
1740    if (lenB == 0)
1741       return;
1742 
1743    //------------------------------------------------------------------------
1744    // Part 3: Finish off the final partial sec-map, if necessary.
1745    //------------------------------------------------------------------------
1746 
1747    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1748 
1749    // If it's distinguished, make it undistinguished if necessary.
1750    sm_ptr = get_secmap_ptr(a);
1751    if (is_distinguished_sm(*sm_ptr)) {
1752       if (*sm_ptr == example_dsm) {
1753          // Sec-map already has the V+A bits that we want, so stop.
1754          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1755          return;
1756       } else {
1757          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1758          *sm_ptr = copy_for_writing(*sm_ptr);
1759       }
1760    }
1761    sm = *sm_ptr;
1762 
1763    // 8-aligned, 8 byte steps
1764    while (True) {
1765       if (lenB < 8) break;
1766       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1767       sm_off16 = SM_OFF_16(a);
1768       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1769       a    += 8;
1770       lenB -= 8;
1771    }
1772    // 1 byte steps
1773    while (True) {
1774       if (lenB < 1) return;
1775       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1776       sm_off = SM_OFF(a);
1777       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1778       a    += 1;
1779       lenB -= 1;
1780    }
1781 }
1782 
1783 
1784 /* --- Set permissions for arbitrary address ranges --- */
1785 
MC_(make_mem_noaccess)1786 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1787 {
1788    PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1789    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1790    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1791    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1792       ocache_sarp_Clear_Origins ( a, len );
1793 }
1794 
make_mem_undefined(Addr a,SizeT len)1795 static void make_mem_undefined ( Addr a, SizeT len )
1796 {
1797    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1798    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1799    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1800 }
1801 
MC_(make_mem_undefined_w_otag)1802 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1803 {
1804    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1805    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1806    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1807    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1808       ocache_sarp_Set_Origins ( a, len, otag );
1809 }
1810 
1811 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1812 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1813                                           ThreadId tid, UInt okind )
1814 {
1815    UInt        ecu;
1816    ExeContext* here;
1817    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1818       if it is invalid.  So no need to do it here. */
1819    tl_assert(okind <= 3);
1820    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1821    tl_assert(here);
1822    ecu = VG_(get_ECU_from_ExeContext)(here);
1823    tl_assert(VG_(is_plausible_ECU)(ecu));
1824    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1825 }
1826 
1827 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1828 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
1829 {
1830    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1831 }
1832 
1833 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1834 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
1835 {
1836    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1837 }
1838 
MC_(make_mem_defined)1839 void MC_(make_mem_defined) ( Addr a, SizeT len )
1840 {
1841    PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1842    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1843    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1844    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1845       ocache_sarp_Clear_Origins ( a, len );
1846 }
1847 
1848 __attribute__((unused))
make_mem_defined_w_tid(Addr a,SizeT len,ThreadId tid)1849 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1850 {
1851    MC_(make_mem_defined)(a, len);
1852 }
1853 
1854 /* For each byte in [a,a+len), if the byte is addressable, make it be
1855    defined, but if it isn't addressible, leave it alone.  In other
1856    words a version of MC_(make_mem_defined) that doesn't mess with
1857    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1858 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1859 {
1860    SizeT i;
1861    UChar vabits2;
1862    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1863    for (i = 0; i < len; i++) {
1864       vabits2 = get_vabits2( a+i );
1865       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1866          set_vabits2(a+i, VA_BITS2_DEFINED);
1867          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1868             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1869          }
1870       }
1871    }
1872 }
1873 
1874 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1875 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1876 {
1877    SizeT i;
1878    UChar vabits2;
1879    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1880    for (i = 0; i < len; i++) {
1881       vabits2 = get_vabits2( a+i );
1882       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1883          set_vabits2(a+i, VA_BITS2_DEFINED);
1884          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1885             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1886          }
1887       }
1888    }
1889 }
1890 
1891 /* --- Block-copy permissions (needed for implementing realloc() and
1892        sys_mremap). --- */
1893 
MC_(copy_address_range_state)1894 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1895 {
1896    SizeT i, j;
1897    UChar vabits2, vabits8;
1898    Bool  aligned, nooverlap;
1899 
1900    DEBUG("MC_(copy_address_range_state)\n");
1901    PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
1902 
1903    if (len == 0 || src == dst)
1904       return;
1905 
1906    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1907    nooverlap = src+len <= dst || dst+len <= src;
1908 
1909    if (nooverlap && aligned) {
1910 
1911       /* Vectorised fast case, when no overlap and suitably aligned */
1912       /* vector loop */
1913       i = 0;
1914       while (len >= 4) {
1915          vabits8 = get_vabits8_for_aligned_word32( src+i );
1916          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1917          if (LIKELY(VA_BITS8_DEFINED == vabits8
1918                             || VA_BITS8_UNDEFINED == vabits8
1919                             || VA_BITS8_NOACCESS == vabits8)) {
1920             /* do nothing */
1921          } else {
1922             /* have to copy secondary map info */
1923             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1924                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1925             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1926                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1927             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1928                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1929             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1930                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1931          }
1932          i += 4;
1933          len -= 4;
1934       }
1935       /* fixup loop */
1936       while (len >= 1) {
1937          vabits2 = get_vabits2( src+i );
1938          set_vabits2( dst+i, vabits2 );
1939          if (VA_BITS2_PARTDEFINED == vabits2) {
1940             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1941          }
1942          i++;
1943          len--;
1944       }
1945 
1946    } else {
1947 
1948       /* We have to do things the slow way */
1949       if (src < dst) {
1950          for (i = 0, j = len-1; i < len; i++, j--) {
1951             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
1952             vabits2 = get_vabits2( src+j );
1953             set_vabits2( dst+j, vabits2 );
1954             if (VA_BITS2_PARTDEFINED == vabits2) {
1955                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1956             }
1957          }
1958       }
1959 
1960       if (src > dst) {
1961          for (i = 0; i < len; i++) {
1962             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
1963             vabits2 = get_vabits2( src+i );
1964             set_vabits2( dst+i, vabits2 );
1965             if (VA_BITS2_PARTDEFINED == vabits2) {
1966                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1967             }
1968          }
1969       }
1970    }
1971 
1972 }
1973 
1974 
1975 /*------------------------------------------------------------*/
1976 /*--- Origin tracking stuff - cache basics                 ---*/
1977 /*------------------------------------------------------------*/
1978 
1979 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1980    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1981 
1982    Note that this implementation draws inspiration from the "origin
1983    tracking by value piggybacking" scheme described in "Tracking Bad
1984    Apples: Reporting the Origin of Null and Undefined Value Errors"
1985    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1986    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1987    implemented completely differently.
1988 
1989    Origin tags and ECUs -- about the shadow values
1990    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1991 
1992    This implementation tracks the defining point of all uninitialised
1993    values using so called "origin tags", which are 32-bit integers,
1994    rather than using the values themselves to encode the origins.  The
1995    latter, so-called value piggybacking", is what the OOPSLA07 paper
1996    describes.
1997 
1998    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1999    ints (UInts), regardless of the machine's word size.  Each tag
2000    comprises an upper 30-bit ECU field and a lower 2-bit
2001    'kind' field.  The ECU field is a number given out by m_execontext
2002    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
2003    directly as an origin tag (otag), but in fact we want to put
2004    additional information 'kind' field to indicate roughly where the
2005    tag came from.  This helps print more understandable error messages
2006    for the user -- it has no other purpose.  In summary:
2007 
2008    * Both ECUs and origin tags are represented as 32-bit words
2009 
2010    * m_execontext and the core-tool interface deal purely in ECUs.
2011      They have no knowledge of origin tags - that is a purely
2012      Memcheck-internal matter.
2013 
2014    * all valid ECUs have the lowest 2 bits zero and at least
2015      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2016 
2017    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2018      constants defined in mc_include.h.
2019 
2020    * to convert an otag back to an ECU, AND it with ~3
2021 
2022    One important fact is that no valid otag is zero.  A zero otag is
2023    used by the implementation to indicate "no origin", which could
2024    mean that either the value is defined, or it is undefined but the
2025    implementation somehow managed to lose the origin.
2026 
2027    The ECU used for memory created by malloc etc is derived from the
2028    stack trace at the time the malloc etc happens.  This means the
2029    mechanism can show the exact allocation point for heap-created
2030    uninitialised values.
2031 
2032    In contrast, it is simply too expensive to create a complete
2033    backtrace for each stack allocation.  Therefore we merely use a
2034    depth-1 backtrace for stack allocations, which can be done once at
2035    translation time, rather than N times at run time.  The result of
2036    this is that, for stack created uninitialised values, Memcheck can
2037    only show the allocating function, and not what called it.
2038    Furthermore, compilers tend to move the stack pointer just once at
2039    the start of the function, to allocate all locals, and so in fact
2040    the stack origin almost always simply points to the opening brace
2041    of the function.  Net result is, for stack origins, the mechanism
2042    can tell you in which function the undefined value was created, but
2043    that's all.  Users will need to carefully check all locals in the
2044    specified function.
2045 
2046    Shadowing registers and memory
2047    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2048 
2049    Memory is shadowed using a two level cache structure (ocacheL1 and
2050    ocacheL2).  Memory references are first directed to ocacheL1.  This
2051    is a traditional 2-way set associative cache with 32-byte lines and
2052    approximate LRU replacement within each set.
2053 
2054    A naive implementation would require storing one 32 bit otag for
2055    each byte of memory covered, a 4:1 space overhead.  Instead, there
2056    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2057    that shows which of the 4 bytes have that shadow value and which
2058    have a shadow value of zero (indicating no origin).  Hence a lot of
2059    space is saved, but the cost is that only one different origin per
2060    4 bytes of address space can be represented.  This is a source of
2061    imprecision, but how much of a problem it really is remains to be
2062    seen.
2063 
2064    A cache line that contains all zeroes ("no origins") contains no
2065    useful information, and can be ejected from the L1 cache "for
2066    free", in the sense that a read miss on the L1 causes a line of
2067    zeroes to be installed.  However, ejecting a line containing
2068    nonzeroes risks losing origin information permanently.  In order to
2069    prevent such lossage, ejected nonzero lines are placed in a
2070    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2071    lines.  This can grow arbitrarily large, and so should ensure that
2072    Memcheck runs out of memory in preference to losing useful origin
2073    info due to cache size limitations.
2074 
2075    Shadowing registers is a bit tricky, because the shadow values are
2076    32 bits, regardless of the size of the register.  That gives a
2077    problem for registers smaller than 32 bits.  The solution is to
2078    find spaces in the guest state that are unused, and use those to
2079    shadow guest state fragments smaller than 32 bits.  For example, on
2080    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
2081    shadow are allocated for the register's otag, then there are still
2082    12 bytes left over which could be used to shadow 3 other values.
2083 
2084    This implies there is some non-obvious mapping from guest state
2085    (start,length) pairs to the relevant shadow offset (for the origin
2086    tags).  And it is unfortunately guest-architecture specific.  The
2087    mapping is contained in mc_machine.c, which is quite lengthy but
2088    straightforward.
2089 
2090    Instrumenting the IR
2091    ~~~~~~~~~~~~~~~~~~~~
2092 
2093    Instrumentation is largely straightforward, and done by the
2094    functions schemeE and schemeS in mc_translate.c.  These generate
2095    code for handling the origin tags of expressions (E) and statements
2096    (S) respectively.  The rather strange names are a reference to the
2097    "compilation schemes" shown in Simon Peyton Jones' book "The
2098    Implementation of Functional Programming Languages" (Prentice Hall,
2099    1987, see
2100    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2101 
2102    schemeS merely arranges to move shadow values around the guest
2103    state to track the incoming IR.  schemeE is largely trivial too.
2104    The only significant point is how to compute the otag corresponding
2105    to binary (or ternary, quaternary, etc) operator applications.  The
2106    rule is simple: just take whichever value is larger (32-bit
2107    unsigned max).  Constants get the special value zero.  Hence this
2108    rule always propagates a nonzero (known) otag in preference to a
2109    zero (unknown, or more likely, value-is-defined) tag, as we want.
2110    If two different undefined values are inputs to a binary operator
2111    application, then which is propagated is arbitrary, but that
2112    doesn't matter, since the program is erroneous in using either of
2113    the values, and so there's no point in attempting to propagate
2114    both.
2115 
2116    Since constants are abstracted to (otag) zero, much of the
2117    instrumentation code can be folded out without difficulty by the
2118    generic post-instrumentation IR cleanup pass, using these rules:
2119    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2120    constants is evaluated at JIT time.  And the resulting dead code
2121    removal.  In practice this causes surprisingly few Max32Us to
2122    survive through to backend code generation.
2123 
2124    Integration with the V-bits machinery
2125    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2126 
2127    This is again largely straightforward.  Mostly the otag and V bits
2128    stuff are independent.  The only point of interaction is when the V
2129    bits instrumenter creates a call to a helper function to report an
2130    uninitialised value error -- in that case it must first use schemeE
2131    to get hold of the origin tag expression for the value, and pass
2132    that to the helper too.
2133 
2134    There is the usual stuff to do with setting address range
2135    permissions.  When memory is painted undefined, we must also know
2136    the origin tag to paint with, which involves some tedious plumbing,
2137    particularly to do with the fast case stack handlers.  When memory
2138    is painted defined or noaccess then the origin tags must be forced
2139    to zero.
2140 
2141    One of the goals of the implementation was to ensure that the
2142    non-origin tracking mode isn't slowed down at all.  To do this,
2143    various functions to do with memory permissions setting (again,
2144    mostly pertaining to the stack) are duplicated for the with- and
2145    without-otag case.
2146 
2147    Dealing with stack redzones, and the NIA cache
2148    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2149 
2150    This is one of the few non-obvious parts of the implementation.
2151 
2152    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2153    reserved area below the stack pointer, that can be used as scratch
2154    space by compiler generated code for functions.  In the Memcheck
2155    sources this is referred to as the "stack redzone".  The important
2156    thing here is that such redzones are considered volatile across
2157    function calls and returns.  So Memcheck takes care to mark them as
2158    undefined for each call and return, on the afflicted platforms.
2159    Past experience shows this is essential in order to get reliable
2160    messages about uninitialised values that come from the stack.
2161 
2162    So the question is, when we paint a redzone undefined, what origin
2163    tag should we use for it?  Consider a function f() calling g().  If
2164    we paint the redzone using an otag derived from the ExeContext of
2165    the CALL/BL instruction in f, then any errors in g causing it to
2166    use uninitialised values that happen to lie in the redzone, will be
2167    reported as having their origin in f.  Which is highly confusing.
2168 
2169    The same applies for returns: if, on a return, we paint the redzone
2170    using a origin tag derived from the ExeContext of the RET/BLR
2171    instruction in g, then any later errors in f causing it to use
2172    uninitialised values in the redzone, will be reported as having
2173    their origin in g.  Which is just as confusing.
2174 
2175    To do it right, in both cases we need to use an origin tag which
2176    pertains to the instruction which dynamically follows the CALL/BL
2177    or RET/BLR.  In short, one derived from the NIA - the "next
2178    instruction address".
2179 
2180    To make this work, Memcheck's redzone-painting helper,
2181    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2182    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
2183    ExeContext's ECU as the basis for the otag used to paint the
2184    redzone.  The expensive part of this is converting an NIA into an
2185    ECU, since this happens once for every call and every return.  So
2186    we use a simple 511-line, 2-way set associative cache
2187    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2188    the cost out.
2189 
2190    Further background comments
2191    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2192 
2193    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
2194    > it really just the address of the relevant ExeContext?
2195 
2196    Well, it's not the address, but a value which has a 1-1 mapping
2197    with ExeContexts, and is guaranteed not to be zero, since zero
2198    denotes (to memcheck) "unknown origin or defined value".  So these
2199    UInts are just numbers starting at 4 and incrementing by 4; each
2200    ExeContext is given a number when it is created.  (*** NOTE this
2201    confuses otags and ECUs; see comments above ***).
2202 
2203    Making these otags 32-bit regardless of the machine's word size
2204    makes the 64-bit implementation easier (next para).  And it doesn't
2205    really limit us in any way, since for the tags to overflow would
2206    require that the program somehow caused 2^30-1 different
2207    ExeContexts to be created, in which case it is probably in deep
2208    trouble.  Not to mention V will have soaked up many tens of
2209    gigabytes of memory merely to store them all.
2210 
2211    So having 64-bit origins doesn't really buy you anything, and has
2212    the following downsides:
2213 
2214    Suppose that instead, an otag is a UWord.  This would mean that, on
2215    a 64-bit target,
2216 
2217    1. It becomes hard to shadow any element of guest state which is
2218       smaller than 8 bytes.  To do so means you'd need to find some
2219       8-byte-sized hole in the guest state which you don't want to
2220       shadow, and use that instead to hold the otag.  On ppc64, the
2221       condition code register(s) are split into 20 UChar sized pieces,
2222       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2223       and so that would entail finding 160 bytes somewhere else in the
2224       guest state.
2225 
2226       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2227       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2228       same) and so I had to look for 4 untracked otag-sized areas in
2229       the guest state to make that possible.
2230 
2231       The same problem exists of course when origin tags are only 32
2232       bits, but it's less extreme.
2233 
2234    2. (More compelling) it doubles the size of the origin shadow
2235       memory.  Given that the shadow memory is organised as a fixed
2236       size cache, and that accuracy of tracking is limited by origins
2237       falling out the cache due to space conflicts, this isn't good.
2238 
2239    > Another question: is the origin tracking perfect, or are there
2240    > cases where it fails to determine an origin?
2241 
2242    It is imperfect for at least for the following reasons, and
2243    probably more:
2244 
2245    * Insufficient capacity in the origin cache.  When a line is
2246      evicted from the cache it is gone forever, and so subsequent
2247      queries for the line produce zero, indicating no origin
2248      information.  Interestingly, a line containing all zeroes can be
2249      evicted "free" from the cache, since it contains no useful
2250      information, so there is scope perhaps for some cleverer cache
2251      management schemes.  (*** NOTE, with the introduction of the
2252      second level origin tag cache, ocacheL2, this is no longer a
2253      problem. ***)
2254 
2255    * The origin cache only stores one otag per 32-bits of address
2256      space, plus 4 bits indicating which of the 4 bytes has that tag
2257      and which are considered defined.  The result is that if two
2258      undefined bytes in the same word are stored in memory, the first
2259      stored byte's origin will be lost and replaced by the origin for
2260      the second byte.
2261 
2262    * Nonzero origin tags for defined values.  Consider a binary
2263      operator application op(x,y).  Suppose y is undefined (and so has
2264      a valid nonzero origin tag), and x is defined, but erroneously
2265      has a nonzero origin tag (defined values should have tag zero).
2266      If the erroneous tag has a numeric value greater than y's tag,
2267      then the rule for propagating origin tags though binary
2268      operations, which is simply to take the unsigned max of the two
2269      tags, will erroneously propagate x's tag rather than y's.
2270 
2271    * Some obscure uses of x86/amd64 byte registers can cause lossage
2272      or confusion of origins.  %AH .. %DH are treated as different
2273      from, and unrelated to, their parent registers, %EAX .. %EDX.
2274      So some weird sequences like
2275 
2276         movb undefined-value, %AH
2277         movb defined-value, %AL
2278         .. use %AX or %EAX ..
2279 
2280      will cause the origin attributed to %AH to be ignored, since %AL,
2281      %AX, %EAX are treated as the same register, and %AH as a
2282      completely separate one.
2283 
2284    But having said all that, it actually seems to work fairly well in
2285    practice.
2286 */
2287 
2288 static UWord stats_ocacheL1_find           = 0;
2289 static UWord stats_ocacheL1_found_at_1     = 0;
2290 static UWord stats_ocacheL1_found_at_N     = 0;
2291 static UWord stats_ocacheL1_misses         = 0;
2292 static UWord stats_ocacheL1_lossage        = 0;
2293 static UWord stats_ocacheL1_movefwds       = 0;
2294 
2295 static UWord stats__ocacheL2_refs          = 0;
2296 static UWord stats__ocacheL2_misses        = 0;
2297 static UWord stats__ocacheL2_n_nodes_max   = 0;
2298 
2299 /* Cache of 32-bit values, one every 32 bits of address space */
2300 
2301 #define OC_BITS_PER_LINE 5
2302 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2303 
oc_line_offset(Addr a)2304 static INLINE UWord oc_line_offset ( Addr a ) {
2305    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2306 }
is_valid_oc_tag(Addr tag)2307 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2308    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2309 }
2310 
2311 #define OC_LINES_PER_SET 2
2312 
2313 #define OC_N_SET_BITS    20
2314 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2315 
2316 /* These settings give:
2317    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2318    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2319 */
2320 
2321 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2322 
2323 
2324 typedef
2325    struct {
2326       Addr  tag;
2327       UInt  w32[OC_W32S_PER_LINE];
2328       UChar descr[OC_W32S_PER_LINE];
2329    }
2330    OCacheLine;
2331 
2332 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2333    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2334    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2335 static UChar classify_OCacheLine ( OCacheLine* line )
2336 {
2337    UWord i;
2338    if (line->tag == 1/*invalid*/)
2339       return 'e'; /* EMPTY */
2340    tl_assert(is_valid_oc_tag(line->tag));
2341    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2342       tl_assert(0 == ((~0xF) & line->descr[i]));
2343       if (line->w32[i] > 0 && line->descr[i] > 0)
2344          return 'n'; /* NONZERO - contains useful info */
2345    }
2346    return 'z'; /* ZERO - no useful info */
2347 }
2348 
2349 typedef
2350    struct {
2351       OCacheLine line[OC_LINES_PER_SET];
2352    }
2353    OCacheSet;
2354 
2355 typedef
2356    struct {
2357       OCacheSet set[OC_N_SETS];
2358    }
2359    OCache;
2360 
2361 static OCache* ocacheL1 = NULL;
2362 static UWord   ocacheL1_event_ctr = 0;
2363 
2364 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2365 static void init_OCache ( void )
2366 {
2367    UWord line, set;
2368    tl_assert(MC_(clo_mc_level) >= 3);
2369    tl_assert(ocacheL1 == NULL);
2370    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2371    if (ocacheL1 == NULL) {
2372       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2373                                    sizeof(OCache) );
2374    }
2375    tl_assert(ocacheL1 != NULL);
2376    for (set = 0; set < OC_N_SETS; set++) {
2377       for (line = 0; line < OC_LINES_PER_SET; line++) {
2378          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2379       }
2380    }
2381    init_ocacheL2();
2382 }
2383 
moveLineForwards(OCacheSet * set,UWord lineno)2384 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2385 {
2386    OCacheLine tmp;
2387    stats_ocacheL1_movefwds++;
2388    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2389    tmp = set->line[lineno-1];
2390    set->line[lineno-1] = set->line[lineno];
2391    set->line[lineno] = tmp;
2392 }
2393 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2394 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2395    UWord i;
2396    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2397       line->w32[i] = 0; /* NO ORIGIN */
2398       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2399    }
2400    line->tag = tag;
2401 }
2402 
2403 //////////////////////////////////////////////////////////////
2404 //// OCache backing store
2405 
2406 static OSet* ocacheL2 = NULL;
2407 
ocacheL2_malloc(const HChar * cc,SizeT szB)2408 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2409    return VG_(malloc)(cc, szB);
2410 }
ocacheL2_free(void * v)2411 static void ocacheL2_free ( void* v ) {
2412    VG_(free)( v );
2413 }
2414 
2415 /* Stats: # nodes currently in tree */
2416 static UWord stats__ocacheL2_n_nodes = 0;
2417 
init_ocacheL2(void)2418 static void init_ocacheL2 ( void )
2419 {
2420    tl_assert(!ocacheL2);
2421    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2422    tl_assert(0 == offsetof(OCacheLine,tag));
2423    ocacheL2
2424       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2425                              NULL, /* fast cmp */
2426                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2427    stats__ocacheL2_n_nodes = 0;
2428 }
2429 
2430 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2431 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2432 {
2433    OCacheLine* line;
2434    tl_assert(is_valid_oc_tag(tag));
2435    stats__ocacheL2_refs++;
2436    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2437    return line;
2438 }
2439 
2440 /* Delete the line with the given tag from the tree, if it is present, and
2441    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2442 static void ocacheL2_del_tag ( Addr tag )
2443 {
2444    OCacheLine* line;
2445    tl_assert(is_valid_oc_tag(tag));
2446    stats__ocacheL2_refs++;
2447    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2448    if (line) {
2449       VG_(OSetGen_FreeNode)(ocacheL2, line);
2450       tl_assert(stats__ocacheL2_n_nodes > 0);
2451       stats__ocacheL2_n_nodes--;
2452    }
2453 }
2454 
2455 /* Add a copy of the given line to the tree.  It must not already be
2456    present. */
ocacheL2_add_line(OCacheLine * line)2457 static void ocacheL2_add_line ( OCacheLine* line )
2458 {
2459    OCacheLine* copy;
2460    tl_assert(is_valid_oc_tag(line->tag));
2461    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2462    *copy = *line;
2463    stats__ocacheL2_refs++;
2464    VG_(OSetGen_Insert)( ocacheL2, copy );
2465    stats__ocacheL2_n_nodes++;
2466    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2467       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2468 }
2469 
2470 ////
2471 //////////////////////////////////////////////////////////////
2472 
2473 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2474 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2475 {
2476    OCacheLine *victim, *inL2;
2477    UChar c;
2478    UWord line;
2479    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2480    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2481    UWord tag     = a & tagmask;
2482    tl_assert(setno >= 0 && setno < OC_N_SETS);
2483 
2484    /* we already tried line == 0; skip therefore. */
2485    for (line = 1; line < OC_LINES_PER_SET; line++) {
2486       if (ocacheL1->set[setno].line[line].tag == tag) {
2487          if (line == 1) {
2488             stats_ocacheL1_found_at_1++;
2489          } else {
2490             stats_ocacheL1_found_at_N++;
2491          }
2492          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2493                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2494             moveLineForwards( &ocacheL1->set[setno], line );
2495             line--;
2496          }
2497          return &ocacheL1->set[setno].line[line];
2498       }
2499    }
2500 
2501    /* A miss.  Use the last slot.  Implicitly this means we're
2502       ejecting the line in the last slot. */
2503    stats_ocacheL1_misses++;
2504    tl_assert(line == OC_LINES_PER_SET);
2505    line--;
2506    tl_assert(line > 0);
2507 
2508    /* First, move the to-be-ejected line to the L2 cache. */
2509    victim = &ocacheL1->set[setno].line[line];
2510    c = classify_OCacheLine(victim);
2511    switch (c) {
2512       case 'e':
2513          /* the line is empty (has invalid tag); ignore it. */
2514          break;
2515       case 'z':
2516          /* line contains zeroes.  We must ensure the backing store is
2517             updated accordingly, either by copying the line there
2518             verbatim, or by ensuring it isn't present there.  We
2519             chosse the latter on the basis that it reduces the size of
2520             the backing store. */
2521          ocacheL2_del_tag( victim->tag );
2522          break;
2523       case 'n':
2524          /* line contains at least one real, useful origin.  Copy it
2525             to the backing store. */
2526          stats_ocacheL1_lossage++;
2527          inL2 = ocacheL2_find_tag( victim->tag );
2528          if (inL2) {
2529             *inL2 = *victim;
2530          } else {
2531             ocacheL2_add_line( victim );
2532          }
2533          break;
2534       default:
2535          tl_assert(0);
2536    }
2537 
2538    /* Now we must reload the L1 cache from the backing tree, if
2539       possible. */
2540    tl_assert(tag != victim->tag); /* stay sane */
2541    inL2 = ocacheL2_find_tag( tag );
2542    if (inL2) {
2543       /* We're in luck.  It's in the L2. */
2544       ocacheL1->set[setno].line[line] = *inL2;
2545    } else {
2546       /* Missed at both levels of the cache hierarchy.  We have to
2547          declare it as full of zeroes (unknown origins). */
2548       stats__ocacheL2_misses++;
2549       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2550    }
2551 
2552    /* Move it one forwards */
2553    moveLineForwards( &ocacheL1->set[setno], line );
2554    line--;
2555 
2556    return &ocacheL1->set[setno].line[line];
2557 }
2558 
find_OCacheLine(Addr a)2559 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2560 {
2561    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2562    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2563    UWord tag     = a & tagmask;
2564 
2565    stats_ocacheL1_find++;
2566 
2567    if (OC_ENABLE_ASSERTIONS) {
2568       tl_assert(setno >= 0 && setno < OC_N_SETS);
2569       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2570    }
2571 
2572    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2573       return &ocacheL1->set[setno].line[0];
2574    }
2575 
2576    return find_OCacheLine_SLOW( a );
2577 }
2578 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2579 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2580 {
2581    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2582    //// Set the origins for a+0 .. a+7
2583    { OCacheLine* line;
2584      UWord lineoff = oc_line_offset(a);
2585      if (OC_ENABLE_ASSERTIONS) {
2586         tl_assert(lineoff >= 0
2587                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2588      }
2589      line = find_OCacheLine( a );
2590      line->descr[lineoff+0] = 0xF;
2591      line->descr[lineoff+1] = 0xF;
2592      line->w32[lineoff+0]   = otag;
2593      line->w32[lineoff+1]   = otag;
2594    }
2595    //// END inlined, specialised version of MC_(helperc_b_store8)
2596 }
2597 
2598 
2599 /*------------------------------------------------------------*/
2600 /*--- Aligned fast case permission setters,                ---*/
2601 /*--- for dealing with stacks                              ---*/
2602 /*------------------------------------------------------------*/
2603 
2604 /*--------------------- 32-bit ---------------------*/
2605 
2606 /* Nb: by "aligned" here we mean 4-byte aligned */
2607 
make_aligned_word32_undefined(Addr a)2608 static INLINE void make_aligned_word32_undefined ( Addr a )
2609 {
2610   PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2611 
2612 #ifndef PERF_FAST_STACK2
2613    make_mem_undefined(a, 4);
2614 #else
2615    {
2616       UWord   sm_off;
2617       SecMap* sm;
2618 
2619       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2620          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2621          make_mem_undefined(a, 4);
2622          return;
2623       }
2624 
2625       sm                  = get_secmap_for_writing_low(a);
2626       sm_off              = SM_OFF(a);
2627       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2628    }
2629 #endif
2630 }
2631 
2632 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2633 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2634 {
2635    make_aligned_word32_undefined(a);
2636    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2637    //// Set the origins for a+0 .. a+3
2638    { OCacheLine* line;
2639      UWord lineoff = oc_line_offset(a);
2640      if (OC_ENABLE_ASSERTIONS) {
2641         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2642      }
2643      line = find_OCacheLine( a );
2644      line->descr[lineoff] = 0xF;
2645      line->w32[lineoff]   = otag;
2646    }
2647    //// END inlined, specialised version of MC_(helperc_b_store4)
2648 }
2649 
2650 static INLINE
make_aligned_word32_noaccess(Addr a)2651 void make_aligned_word32_noaccess ( Addr a )
2652 {
2653    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2654 
2655 #ifndef PERF_FAST_STACK2
2656    MC_(make_mem_noaccess)(a, 4);
2657 #else
2658    {
2659       UWord   sm_off;
2660       SecMap* sm;
2661 
2662       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2663          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2664          MC_(make_mem_noaccess)(a, 4);
2665          return;
2666       }
2667 
2668       sm                  = get_secmap_for_writing_low(a);
2669       sm_off              = SM_OFF(a);
2670       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2671 
2672       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2673       //// Set the origins for a+0 .. a+3.
2674       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2675          OCacheLine* line;
2676          UWord lineoff = oc_line_offset(a);
2677          if (OC_ENABLE_ASSERTIONS) {
2678             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2679          }
2680          line = find_OCacheLine( a );
2681          line->descr[lineoff] = 0;
2682       }
2683       //// END inlined, specialised version of MC_(helperc_b_store4)
2684    }
2685 #endif
2686 }
2687 
2688 /*--------------------- 64-bit ---------------------*/
2689 
2690 /* Nb: by "aligned" here we mean 8-byte aligned */
2691 
make_aligned_word64_undefined(Addr a)2692 static INLINE void make_aligned_word64_undefined ( Addr a )
2693 {
2694    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2695 
2696 #ifndef PERF_FAST_STACK2
2697    make_mem_undefined(a, 8);
2698 #else
2699    {
2700       UWord   sm_off16;
2701       SecMap* sm;
2702 
2703       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2704          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2705          make_mem_undefined(a, 8);
2706          return;
2707       }
2708 
2709       sm       = get_secmap_for_writing_low(a);
2710       sm_off16 = SM_OFF_16(a);
2711       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2712    }
2713 #endif
2714 }
2715 
2716 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2717 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2718 {
2719    make_aligned_word64_undefined(a);
2720    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2721    //// Set the origins for a+0 .. a+7
2722    { OCacheLine* line;
2723      UWord lineoff = oc_line_offset(a);
2724      tl_assert(lineoff >= 0
2725                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2726      line = find_OCacheLine( a );
2727      line->descr[lineoff+0] = 0xF;
2728      line->descr[lineoff+1] = 0xF;
2729      line->w32[lineoff+0]   = otag;
2730      line->w32[lineoff+1]   = otag;
2731    }
2732    //// END inlined, specialised version of MC_(helperc_b_store8)
2733 }
2734 
2735 static INLINE
make_aligned_word64_noaccess(Addr a)2736 void make_aligned_word64_noaccess ( Addr a )
2737 {
2738    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2739 
2740 #ifndef PERF_FAST_STACK2
2741    MC_(make_mem_noaccess)(a, 8);
2742 #else
2743    {
2744       UWord   sm_off16;
2745       SecMap* sm;
2746 
2747       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2748          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2749          MC_(make_mem_noaccess)(a, 8);
2750          return;
2751       }
2752 
2753       sm       = get_secmap_for_writing_low(a);
2754       sm_off16 = SM_OFF_16(a);
2755       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2756 
2757       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2758       //// Clear the origins for a+0 .. a+7.
2759       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2760          OCacheLine* line;
2761          UWord lineoff = oc_line_offset(a);
2762          tl_assert(lineoff >= 0
2763                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2764          line = find_OCacheLine( a );
2765          line->descr[lineoff+0] = 0;
2766          line->descr[lineoff+1] = 0;
2767       }
2768       //// END inlined, specialised version of MC_(helperc_b_store8)
2769    }
2770 #endif
2771 }
2772 
2773 
2774 /*------------------------------------------------------------*/
2775 /*--- Stack pointer adjustment                             ---*/
2776 /*------------------------------------------------------------*/
2777 
2778 #ifdef PERF_FAST_STACK
2779 #  define MAYBE_USED
2780 #else
2781 #  define MAYBE_USED __attribute__((unused))
2782 #endif
2783 
2784 /*--------------- adjustment by 4 bytes ---------------*/
2785 
2786 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2787 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2788 {
2789    UInt otag = ecu | MC_OKIND_STACK;
2790    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2791    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2792       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2793    } else {
2794       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2795    }
2796 }
2797 
2798 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2799 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2800 {
2801    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2802    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2803       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2804    } else {
2805       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2806    }
2807 }
2808 
2809 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2810 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2811 {
2812    PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2813    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2814       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2815    } else {
2816       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2817    }
2818 }
2819 
2820 /*--------------- adjustment by 8 bytes ---------------*/
2821 
2822 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2823 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2824 {
2825    UInt otag = ecu | MC_OKIND_STACK;
2826    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2827    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2828       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2829    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2830       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2831       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2832    } else {
2833       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2834    }
2835 }
2836 
2837 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2838 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2839 {
2840    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2841    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2842       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2843    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2844       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2845       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2846    } else {
2847       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2848    }
2849 }
2850 
2851 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2852 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2853 {
2854    PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2855    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2856       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2857    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2858       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2859       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2860    } else {
2861       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2862    }
2863 }
2864 
2865 /*--------------- adjustment by 12 bytes ---------------*/
2866 
2867 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2868 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2869 {
2870    UInt otag = ecu | MC_OKIND_STACK;
2871    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2872    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2873       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2874       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2875    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2876       /* from previous test we don't have 8-alignment at offset +0,
2877          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2878          do 4 at +0 and then 8 at +4/. */
2879       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2880       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2881    } else {
2882       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2883    }
2884 }
2885 
2886 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2887 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2888 {
2889    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2890    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2891       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2892       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2893    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2894       /* from previous test we don't have 8-alignment at offset +0,
2895          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2896          do 4 at +0 and then 8 at +4/. */
2897       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2898       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2899    } else {
2900       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2901    }
2902 }
2903 
2904 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2905 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2906 {
2907    PROF_EVENT(MCPE_DIE_MEM_STACK_12);
2908    /* Note the -12 in the test */
2909    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2910       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2911          -4. */
2912       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2913       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2914    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2915       /* We have 4-alignment at +0, but we don't have 8-alignment at
2916          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2917          and then 8 at -8. */
2918       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2919       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2920    } else {
2921       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2922    }
2923 }
2924 
2925 /*--------------- adjustment by 16 bytes ---------------*/
2926 
2927 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2928 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2929 {
2930    UInt otag = ecu | MC_OKIND_STACK;
2931    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2932    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2933       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2934       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2936    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2937       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2938          Hence do 4 at +0, 8 at +4, 4 at +12. */
2939       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2940       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2941       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2942    } else {
2943       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2944    }
2945 }
2946 
2947 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2948 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2949 {
2950    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2951    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2952       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2954       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2955    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2956       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2957          Hence do 4 at +0, 8 at +4, 4 at +12. */
2958       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2959       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2960       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2961    } else {
2962       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2963    }
2964 }
2965 
2966 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2967 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2968 {
2969    PROF_EVENT(MCPE_DIE_MEM_STACK_16);
2970    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2971       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2972       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2973       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2974    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2975       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2976       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2977       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2978       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2979    } else {
2980       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2981    }
2982 }
2983 
2984 /*--------------- adjustment by 32 bytes ---------------*/
2985 
2986 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2987 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2988 {
2989    UInt otag = ecu | MC_OKIND_STACK;
2990    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
2991    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2992       /* Straightforward */
2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2996       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2997    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2998       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2999          +0,+28. */
3000       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3001       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3002       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3003       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3004       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3005    } else {
3006       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3007    }
3008 }
3009 
3010 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)3011 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3012 {
3013    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3014    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3015       /* Straightforward */
3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3019       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3020    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3021       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3022          +0,+28. */
3023       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3024       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3025       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3026       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3027       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3028    } else {
3029       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3030    }
3031 }
3032 
3033 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3034 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3035 {
3036    PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3037    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3038       /* Straightforward */
3039       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3040       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3041       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3042       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3043    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3044       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
3045          4 at -32,-4. */
3046       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3047       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3048       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3049       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3050       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3051    } else {
3052       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3053    }
3054 }
3055 
3056 /*--------------- adjustment by 112 bytes ---------------*/
3057 
3058 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3059 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3060 {
3061    UInt otag = ecu | MC_OKIND_STACK;
3062    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3063    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3064       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3065       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3066       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3067       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3068       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3069       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3070       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3071       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3072       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3073       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3074       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3075       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3076       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3077       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3078    } else {
3079       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3080    }
3081 }
3082 
3083 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3084 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3085 {
3086    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3087    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3088       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3089       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3090       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3091       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3092       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3093       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3094       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3095       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3096       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3097       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3098       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3099       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3100       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3101       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3102    } else {
3103       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3104    }
3105 }
3106 
3107 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3108 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3109 {
3110    PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3111    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3112       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3113       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3114       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3115       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3116       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3117       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3118       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3119       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3120       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3121       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3122       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3123       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3124       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3125       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3126    } else {
3127       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3128    }
3129 }
3130 
3131 /*--------------- adjustment by 128 bytes ---------------*/
3132 
3133 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3134 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3135 {
3136    UInt otag = ecu | MC_OKIND_STACK;
3137    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3138    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3141       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3142       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3143       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3144       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3145       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3146       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3147       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3148       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3149       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3150       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3151       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3152       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3153       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3154       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3155    } else {
3156       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3157    }
3158 }
3159 
3160 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3161 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3162 {
3163    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3164    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3167       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3168       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3169       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3170       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3171       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3172       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3173       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3174       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3175       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3176       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3177       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3178       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3179       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3180       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3181    } else {
3182       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3183    }
3184 }
3185 
3186 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3187 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3188 {
3189    PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3190    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3193       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3194       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3195       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3196       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3197       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3198       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3199       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3200       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3201       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3202       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3203       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3204       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3205       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3206       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3207    } else {
3208       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3209    }
3210 }
3211 
3212 /*--------------- adjustment by 144 bytes ---------------*/
3213 
3214 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3215 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3216 {
3217    UInt otag = ecu | MC_OKIND_STACK;
3218    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3219    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3224       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3225       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3226       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3227       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3228       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3229       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3230       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3231       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3232       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3233       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3234       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3235       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3236       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3237       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3238    } else {
3239       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3240    }
3241 }
3242 
3243 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3244 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3245 {
3246    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3247    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3252       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3253       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3254       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3255       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3256       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3257       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3258       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3259       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3260       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3261       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3262       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3263       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3264       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3265       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3266    } else {
3267       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3268    }
3269 }
3270 
3271 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3272 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3273 {
3274    PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3275    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3280       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3281       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3282       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3283       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3284       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3285       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3286       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3287       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3288       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3289       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3290       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3291       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3292       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3293       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3294    } else {
3295       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3296    }
3297 }
3298 
3299 /*--------------- adjustment by 160 bytes ---------------*/
3300 
3301 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3302 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3303 {
3304    UInt otag = ecu | MC_OKIND_STACK;
3305    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3306    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3313       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3314       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3315       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3316       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3317       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3318       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3319       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3320       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3321       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3322       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3323       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3324       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3325       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3326       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3327    } else {
3328       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3329    }
3330 }
3331 
3332 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3333 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3334 {
3335    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3336    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3343       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3344       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3345       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3346       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3347       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3348       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3349       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3350       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3351       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3352       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3353       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3354       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3355       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3356       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3357    } else {
3358       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3359    }
3360 }
3361 
3362 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3363 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3364 {
3365    PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3366    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3373       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3374       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3375       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3376       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3377       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3378       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3379       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3380       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3381       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3382       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3383       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3384       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3385       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3386       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3387    } else {
3388       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3389    }
3390 }
3391 
3392 /*--------------- adjustment by N bytes ---------------*/
3393 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3394 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3395 {
3396    UInt otag = ecu | MC_OKIND_STACK;
3397    PROF_EVENT(MCPE_NEW_MEM_STACK);
3398    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3399 }
3400 
mc_new_mem_stack(Addr a,SizeT len)3401 static void mc_new_mem_stack ( Addr a, SizeT len )
3402 {
3403    PROF_EVENT(MCPE_NEW_MEM_STACK);
3404    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3405 }
3406 
mc_die_mem_stack(Addr a,SizeT len)3407 static void mc_die_mem_stack ( Addr a, SizeT len )
3408 {
3409    PROF_EVENT(MCPE_DIE_MEM_STACK);
3410    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3411 }
3412 
3413 
3414 /* The AMD64 ABI says:
3415 
3416    "The 128-byte area beyond the location pointed to by %rsp is considered
3417     to be reserved and shall not be modified by signal or interrupt
3418     handlers.  Therefore, functions may use this area for temporary data
3419     that is not needed across function calls.  In particular, leaf functions
3420     may use this area for their entire stack frame, rather than adjusting
3421     the stack pointer in the prologue and epilogue.  This area is known as
3422     red zone [sic]."
3423 
3424    So after any call or return we need to mark this redzone as containing
3425    undefined values.
3426 
3427    Consider this:  we're in function f.  f calls g.  g moves rsp down
3428    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3429    defined.  g returns.  f is buggy and reads from parts of the red zone
3430    that it didn't write on.  But because g filled that area in, f is going
3431    to be picking up defined V bits and so any errors from reading bits of
3432    the red zone it didn't write, will be missed.  The only solution I could
3433    think of was to make the red zone undefined when g returns to f.
3434 
3435    This is in accordance with the ABI, which makes it clear the redzone
3436    is volatile across function calls.
3437 
3438    The problem occurs the other way round too: f could fill the RZ up
3439    with defined values and g could mistakenly read them.  So the RZ
3440    also needs to be nuked on function calls.
3441 */
3442 
3443 
3444 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3445    improved so as to have a lower miss rate. */
3446 
3447 static UWord stats__nia_cache_queries = 0;
3448 static UWord stats__nia_cache_misses  = 0;
3449 
3450 typedef
3451    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3452             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3453    WCacheEnt;
3454 
3455 #define N_NIA_TO_ECU_CACHE 511
3456 
3457 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3458 
init_nia_to_ecu_cache(void)3459 static void init_nia_to_ecu_cache ( void )
3460 {
3461    UWord       i;
3462    Addr        zero_addr = 0;
3463    ExeContext* zero_ec;
3464    UInt        zero_ecu;
3465    /* Fill all the slots with an entry for address zero, and the
3466       relevant otags accordingly.  Hence the cache is initially filled
3467       with valid data. */
3468    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3469    tl_assert(zero_ec);
3470    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3471    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3472    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3473       nia_to_ecu_cache[i].nia0 = zero_addr;
3474       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3475       nia_to_ecu_cache[i].nia1 = zero_addr;
3476       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3477    }
3478 }
3479 
convert_nia_to_ecu(Addr nia)3480 static inline UInt convert_nia_to_ecu ( Addr nia )
3481 {
3482    UWord i;
3483    UInt        ecu;
3484    ExeContext* ec;
3485 
3486    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3487 
3488    stats__nia_cache_queries++;
3489    i = nia % N_NIA_TO_ECU_CACHE;
3490    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3491 
3492    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3493       return nia_to_ecu_cache[i].ecu0;
3494 
3495    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3496 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3497       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3498       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3499 #     undef SWAP
3500       return nia_to_ecu_cache[i].ecu0;
3501    }
3502 
3503    stats__nia_cache_misses++;
3504    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3505    tl_assert(ec);
3506    ecu = VG_(get_ECU_from_ExeContext)(ec);
3507    tl_assert(VG_(is_plausible_ECU)(ecu));
3508 
3509    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3510    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3511 
3512    nia_to_ecu_cache[i].nia0 = nia;
3513    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3514    return ecu;
3515 }
3516 
3517 
3518 /* Note that this serves both the origin-tracking and
3519    no-origin-tracking modes.  We assume that calls to it are
3520    sufficiently infrequent that it isn't worth specialising for the
3521    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3522 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3523 {
3524    UInt otag;
3525    tl_assert(sizeof(UWord) == sizeof(SizeT));
3526    if (0)
3527       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3528                   base, len, nia );
3529 
3530    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3531       UInt ecu = convert_nia_to_ecu ( nia );
3532       tl_assert(VG_(is_plausible_ECU)(ecu));
3533       otag = ecu | MC_OKIND_STACK;
3534    } else {
3535       tl_assert(nia == 0);
3536       otag = 0;
3537    }
3538 
3539 #  if 0
3540    /* Really slow version */
3541    MC_(make_mem_undefined)(base, len, otag);
3542 #  endif
3543 
3544 #  if 0
3545    /* Slow(ish) version, which is fairly easily seen to be correct.
3546    */
3547    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3548       make_aligned_word64_undefined(base +   0, otag);
3549       make_aligned_word64_undefined(base +   8, otag);
3550       make_aligned_word64_undefined(base +  16, otag);
3551       make_aligned_word64_undefined(base +  24, otag);
3552 
3553       make_aligned_word64_undefined(base +  32, otag);
3554       make_aligned_word64_undefined(base +  40, otag);
3555       make_aligned_word64_undefined(base +  48, otag);
3556       make_aligned_word64_undefined(base +  56, otag);
3557 
3558       make_aligned_word64_undefined(base +  64, otag);
3559       make_aligned_word64_undefined(base +  72, otag);
3560       make_aligned_word64_undefined(base +  80, otag);
3561       make_aligned_word64_undefined(base +  88, otag);
3562 
3563       make_aligned_word64_undefined(base +  96, otag);
3564       make_aligned_word64_undefined(base + 104, otag);
3565       make_aligned_word64_undefined(base + 112, otag);
3566       make_aligned_word64_undefined(base + 120, otag);
3567    } else {
3568       MC_(make_mem_undefined)(base, len, otag);
3569    }
3570 #  endif
3571 
3572    /* Idea is: go fast when
3573          * 8-aligned and length is 128
3574          * the sm is available in the main primary map
3575          * the address range falls entirely with a single secondary map
3576       If all those conditions hold, just update the V+A bits by writing
3577       directly into the vabits array.  (If the sm was distinguished, this
3578       will make a copy and then write to it.)
3579    */
3580 
3581    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3582       /* Now we know the address range is suitably sized and aligned. */
3583       UWord a_lo = (UWord)(base);
3584       UWord a_hi = (UWord)(base + 128 - 1);
3585       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3586       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3587          // Now we know the entire range is within the main primary map.
3588          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3589          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3590          /* Now we know that the entire address range falls within a
3591             single secondary map, and that that secondary 'lives' in
3592             the main primary map. */
3593          if (LIKELY(sm == sm_hi)) {
3594             // Finally, we know that the range is entirely within one secmap.
3595             UWord   v_off = SM_OFF(a_lo);
3596             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3597             p[ 0] = VA_BITS16_UNDEFINED;
3598             p[ 1] = VA_BITS16_UNDEFINED;
3599             p[ 2] = VA_BITS16_UNDEFINED;
3600             p[ 3] = VA_BITS16_UNDEFINED;
3601             p[ 4] = VA_BITS16_UNDEFINED;
3602             p[ 5] = VA_BITS16_UNDEFINED;
3603             p[ 6] = VA_BITS16_UNDEFINED;
3604             p[ 7] = VA_BITS16_UNDEFINED;
3605             p[ 8] = VA_BITS16_UNDEFINED;
3606             p[ 9] = VA_BITS16_UNDEFINED;
3607             p[10] = VA_BITS16_UNDEFINED;
3608             p[11] = VA_BITS16_UNDEFINED;
3609             p[12] = VA_BITS16_UNDEFINED;
3610             p[13] = VA_BITS16_UNDEFINED;
3611             p[14] = VA_BITS16_UNDEFINED;
3612             p[15] = VA_BITS16_UNDEFINED;
3613             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3614                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3615                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3616                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3617                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3618                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3619                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3620                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3621                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3622                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3623                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3624                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3625                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3626                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3627                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3628                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3629                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3630             }
3631             return;
3632          }
3633       }
3634    }
3635 
3636    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3637    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3638       /* Now we know the address range is suitably sized and aligned. */
3639       UWord a_lo = (UWord)(base);
3640       UWord a_hi = (UWord)(base + 288 - 1);
3641       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3642       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3643          // Now we know the entire range is within the main primary map.
3644          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3645          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3646          /* Now we know that the entire address range falls within a
3647             single secondary map, and that that secondary 'lives' in
3648             the main primary map. */
3649          if (LIKELY(sm == sm_hi)) {
3650             // Finally, we know that the range is entirely within one secmap.
3651             UWord   v_off = SM_OFF(a_lo);
3652             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3653             p[ 0] = VA_BITS16_UNDEFINED;
3654             p[ 1] = VA_BITS16_UNDEFINED;
3655             p[ 2] = VA_BITS16_UNDEFINED;
3656             p[ 3] = VA_BITS16_UNDEFINED;
3657             p[ 4] = VA_BITS16_UNDEFINED;
3658             p[ 5] = VA_BITS16_UNDEFINED;
3659             p[ 6] = VA_BITS16_UNDEFINED;
3660             p[ 7] = VA_BITS16_UNDEFINED;
3661             p[ 8] = VA_BITS16_UNDEFINED;
3662             p[ 9] = VA_BITS16_UNDEFINED;
3663             p[10] = VA_BITS16_UNDEFINED;
3664             p[11] = VA_BITS16_UNDEFINED;
3665             p[12] = VA_BITS16_UNDEFINED;
3666             p[13] = VA_BITS16_UNDEFINED;
3667             p[14] = VA_BITS16_UNDEFINED;
3668             p[15] = VA_BITS16_UNDEFINED;
3669             p[16] = VA_BITS16_UNDEFINED;
3670             p[17] = VA_BITS16_UNDEFINED;
3671             p[18] = VA_BITS16_UNDEFINED;
3672             p[19] = VA_BITS16_UNDEFINED;
3673             p[20] = VA_BITS16_UNDEFINED;
3674             p[21] = VA_BITS16_UNDEFINED;
3675             p[22] = VA_BITS16_UNDEFINED;
3676             p[23] = VA_BITS16_UNDEFINED;
3677             p[24] = VA_BITS16_UNDEFINED;
3678             p[25] = VA_BITS16_UNDEFINED;
3679             p[26] = VA_BITS16_UNDEFINED;
3680             p[27] = VA_BITS16_UNDEFINED;
3681             p[28] = VA_BITS16_UNDEFINED;
3682             p[29] = VA_BITS16_UNDEFINED;
3683             p[30] = VA_BITS16_UNDEFINED;
3684             p[31] = VA_BITS16_UNDEFINED;
3685             p[32] = VA_BITS16_UNDEFINED;
3686             p[33] = VA_BITS16_UNDEFINED;
3687             p[34] = VA_BITS16_UNDEFINED;
3688             p[35] = VA_BITS16_UNDEFINED;
3689             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3690                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3691                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3692                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3693                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3694                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3695                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3696                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3697                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3698                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3699                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3700                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3701                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3702                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3703                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3704                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3705                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3706                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3707                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3708                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3709                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3710                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3711                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3712                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3713                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3714                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3715                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3716                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3717                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3718                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3719                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3720                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3721                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3722                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3723                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3724                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3725                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3726             }
3727             return;
3728          }
3729       }
3730    }
3731 
3732    /* else fall into slow case */
3733    MC_(make_mem_undefined_w_otag)(base, len, otag);
3734 }
3735 
3736 
3737 /*------------------------------------------------------------*/
3738 /*--- Checking memory                                      ---*/
3739 /*------------------------------------------------------------*/
3740 
3741 typedef
3742    enum {
3743       MC_Ok = 5,
3744       MC_AddrErr = 6,
3745       MC_ValueErr = 7
3746    }
3747    MC_ReadResult;
3748 
3749 
3750 /* Check permissions for address range.  If inadequate permissions
3751    exist, *bad_addr is set to the offending address, so the caller can
3752    know what it is. */
3753 
3754 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3755    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3756    indicate the lowest failing address.  Functions below are
3757    similar. */
MC_(check_mem_is_noaccess)3758 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3759 {
3760    SizeT i;
3761    UWord vabits2;
3762 
3763    PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
3764    for (i = 0; i < len; i++) {
3765       PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
3766       vabits2 = get_vabits2(a);
3767       if (VA_BITS2_NOACCESS != vabits2) {
3768          if (bad_addr != NULL) *bad_addr = a;
3769          return False;
3770       }
3771       a++;
3772    }
3773    return True;
3774 }
3775 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3776 static Bool is_mem_addressable ( Addr a, SizeT len,
3777                                  /*OUT*/Addr* bad_addr )
3778 {
3779    SizeT i;
3780    UWord vabits2;
3781 
3782    PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
3783    for (i = 0; i < len; i++) {
3784       PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
3785       vabits2 = get_vabits2(a);
3786       if (VA_BITS2_NOACCESS == vabits2) {
3787          if (bad_addr != NULL) *bad_addr = a;
3788          return False;
3789       }
3790       a++;
3791    }
3792    return True;
3793 }
3794 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3795 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3796                                       /*OUT*/Addr* bad_addr,
3797                                       /*OUT*/UInt* otag )
3798 {
3799    SizeT i;
3800    UWord vabits2;
3801 
3802    PROF_EVENT(MCPE_IS_MEM_DEFINED);
3803    DEBUG("is_mem_defined\n");
3804 
3805    if (otag)     *otag = 0;
3806    if (bad_addr) *bad_addr = 0;
3807    for (i = 0; i < len; i++) {
3808       PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
3809       vabits2 = get_vabits2(a);
3810       if (VA_BITS2_DEFINED != vabits2) {
3811          // Error!  Nb: Report addressability errors in preference to
3812          // definedness errors.  And don't report definedeness errors unless
3813          // --undef-value-errors=yes.
3814          if (bad_addr) {
3815             *bad_addr = a;
3816          }
3817          if (VA_BITS2_NOACCESS == vabits2) {
3818             return MC_AddrErr;
3819          }
3820          if (MC_(clo_mc_level) >= 2) {
3821             if (otag && MC_(clo_mc_level) == 3) {
3822                *otag = MC_(helperc_b_load1)( a );
3823             }
3824             return MC_ValueErr;
3825          }
3826       }
3827       a++;
3828    }
3829    return MC_Ok;
3830 }
3831 
3832 
3833 /* Like is_mem_defined but doesn't give up at the first uninitialised
3834    byte -- the entire range is always checked.  This is important for
3835    detecting errors in the case where a checked range strays into
3836    invalid memory, but that fact is not detected by the ordinary
3837    is_mem_defined(), because of an undefined section that precedes the
3838    out of range section, possibly as a result of an alignment hole in
3839    the checked data.  This version always checks the entire range and
3840    can report both a definedness and an accessbility error, if
3841    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3842 static void is_mem_defined_comprehensive (
3843                Addr a, SizeT len,
3844                /*OUT*/Bool* errorV,    /* is there a definedness err? */
3845                /*OUT*/Addr* bad_addrV, /* if so where? */
3846                /*OUT*/UInt* otagV,     /* and what's its otag? */
3847                /*OUT*/Bool* errorA,    /* is there an addressability err? */
3848                /*OUT*/Addr* bad_addrA  /* if so where? */
3849             )
3850 {
3851    SizeT i;
3852    UWord vabits2;
3853    Bool  already_saw_errV = False;
3854 
3855    PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
3856    DEBUG("is_mem_defined_comprehensive\n");
3857 
3858    tl_assert(!(*errorV || *errorA));
3859 
3860    for (i = 0; i < len; i++) {
3861       PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
3862       vabits2 = get_vabits2(a);
3863       switch (vabits2) {
3864          case VA_BITS2_DEFINED:
3865             a++;
3866             break;
3867          case VA_BITS2_UNDEFINED:
3868          case VA_BITS2_PARTDEFINED:
3869             if (!already_saw_errV) {
3870                *errorV    = True;
3871                *bad_addrV = a;
3872                if (MC_(clo_mc_level) == 3) {
3873                   *otagV = MC_(helperc_b_load1)( a );
3874                } else {
3875                   *otagV = 0;
3876                }
3877                already_saw_errV = True;
3878             }
3879             a++; /* keep going */
3880             break;
3881          case VA_BITS2_NOACCESS:
3882             *errorA    = True;
3883             *bad_addrA = a;
3884             return; /* give up now. */
3885          default:
3886             tl_assert(0);
3887       }
3888    }
3889 }
3890 
3891 
3892 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3893    examine the actual bytes, to find the end, until we're sure it is
3894    safe to do so. */
3895 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3896 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3897 {
3898    UWord vabits2;
3899 
3900    PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
3901    DEBUG("mc_is_defined_asciiz\n");
3902 
3903    if (otag)     *otag = 0;
3904    if (bad_addr) *bad_addr = 0;
3905    while (True) {
3906       PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
3907       vabits2 = get_vabits2(a);
3908       if (VA_BITS2_DEFINED != vabits2) {
3909          // Error!  Nb: Report addressability errors in preference to
3910          // definedness errors.  And don't report definedeness errors unless
3911          // --undef-value-errors=yes.
3912          if (bad_addr) {
3913             *bad_addr = a;
3914          }
3915          if (VA_BITS2_NOACCESS == vabits2) {
3916             return MC_AddrErr;
3917          }
3918          if (MC_(clo_mc_level) >= 2) {
3919             if (otag && MC_(clo_mc_level) == 3) {
3920                *otag = MC_(helperc_b_load1)( a );
3921             }
3922             return MC_ValueErr;
3923          }
3924       }
3925       /* Ok, a is safe to read. */
3926       if (* ((UChar*)a) == 0) {
3927          return MC_Ok;
3928       }
3929       a++;
3930    }
3931 }
3932 
3933 
3934 /*------------------------------------------------------------*/
3935 /*--- Memory event handlers                                ---*/
3936 /*------------------------------------------------------------*/
3937 
3938 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3939 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3940                                 Addr base, SizeT size )
3941 {
3942    Addr bad_addr;
3943    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3944 
3945    if (!ok) {
3946       switch (part) {
3947       case Vg_CoreSysCall:
3948          MC_(record_memparam_error) ( tid, bad_addr,
3949                                       /*isAddrErr*/True, s, 0/*otag*/ );
3950          break;
3951 
3952       case Vg_CoreSignal:
3953          MC_(record_core_mem_error)( tid, s );
3954          break;
3955 
3956       default:
3957          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3958       }
3959    }
3960 }
3961 
3962 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3963 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3964                             Addr base, SizeT size )
3965 {
3966    UInt otag = 0;
3967    Addr bad_addr;
3968    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3969 
3970    if (MC_Ok != res) {
3971       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3972 
3973       switch (part) {
3974       case Vg_CoreSysCall:
3975          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3976                                       isAddrErr ? 0 : otag );
3977          break;
3978 
3979       case Vg_CoreSysCallArgInMem:
3980          MC_(record_regparam_error) ( tid, s, otag );
3981          break;
3982 
3983       /* If we're being asked to jump to a silly address, record an error
3984          message before potentially crashing the entire system. */
3985       case Vg_CoreTranslate:
3986          MC_(record_jump_error)( tid, bad_addr );
3987          break;
3988 
3989       default:
3990          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3991       }
3992    }
3993 }
3994 
3995 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)3996 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3997                                    const HChar* s, Addr str )
3998 {
3999    MC_ReadResult res;
4000    Addr bad_addr = 0;   // shut GCC up
4001    UInt otag = 0;
4002 
4003    tl_assert(part == Vg_CoreSysCall);
4004    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4005    if (MC_Ok != res) {
4006       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4007       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4008                                    isAddrErr ? 0 : otag );
4009    }
4010 }
4011 
4012 /* Handling of mmap and mprotect is not as simple as it seems.
4013 
4014    The underlying semantics are that memory obtained from mmap is
4015    always initialised, but may be inaccessible.  And changes to the
4016    protection of memory do not change its contents and hence not its
4017    definedness state.  Problem is we can't model
4018    inaccessible-but-with-some-definedness state; once we mark memory
4019    as inaccessible we lose all info about definedness, and so can't
4020    restore that if it is later made accessible again.
4021 
4022    One obvious thing to do is this:
4023 
4024       mmap/mprotect NONE  -> noaccess
4025       mmap/mprotect other -> defined
4026 
4027    The problem case here is: taking accessible memory, writing
4028    uninitialised data to it, mprotecting it NONE and later mprotecting
4029    it back to some accessible state causes the undefinedness to be
4030    lost.
4031 
4032    A better proposal is:
4033 
4034      (1) mmap NONE       ->  make noaccess
4035      (2) mmap other      ->  make defined
4036 
4037      (3) mprotect NONE   ->  # no change
4038      (4) mprotect other  ->  change any "noaccess" to "defined"
4039 
4040    (2) is OK because memory newly obtained from mmap really is defined
4041        (zeroed out by the kernel -- doing anything else would
4042        constitute a massive security hole.)
4043 
4044    (1) is OK because the only way to make the memory usable is via
4045        (4), in which case we also wind up correctly marking it all as
4046        defined.
4047 
4048    (3) is the weak case.  We choose not to change memory state.
4049        (presumably the range is in some mixture of "defined" and
4050        "undefined", viz, accessible but with arbitrary V bits).  Doing
4051        nothing means we retain the V bits, so that if the memory is
4052        later mprotected "other", the V bits remain unchanged, so there
4053        can be no false negatives.  The bad effect is that if there's
4054        an access in the area, then MC cannot warn; but at least we'll
4055        get a SEGV to show, so it's better than nothing.
4056 
4057    Consider the sequence (3) followed by (4).  Any memory that was
4058    "defined" or "undefined" previously retains its state (as
4059    required).  Any memory that was "noaccess" before can only have
4060    been made that way by (1), and so it's OK to change it to
4061    "defined".
4062 
4063    See https://bugs.kde.org/show_bug.cgi?id=205541
4064    and https://bugs.kde.org/show_bug.cgi?id=210268
4065 */
4066 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4067 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4068                        ULong di_handle )
4069 {
4070    if (rr || ww || xx) {
4071       /* (2) mmap/mprotect other -> defined */
4072       MC_(make_mem_defined)(a, len);
4073    } else {
4074       /* (1) mmap/mprotect NONE  -> noaccess */
4075       MC_(make_mem_noaccess)(a, len);
4076    }
4077 }
4078 
4079 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4080 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4081 {
4082    if (rr || ww || xx) {
4083       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
4084       make_mem_defined_if_noaccess(a, len);
4085    } else {
4086       /* (3) mprotect NONE   ->  # no change */
4087       /* do nothing */
4088    }
4089 }
4090 
4091 
4092 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4093 void mc_new_mem_startup( Addr a, SizeT len,
4094                          Bool rr, Bool ww, Bool xx, ULong di_handle )
4095 {
4096    // Because code is defined, initialised variables get put in the data
4097    // segment and are defined, and uninitialised variables get put in the
4098    // bss segment and are auto-zeroed (and so defined).
4099    //
4100    // It's possible that there will be padding between global variables.
4101    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
4102    // a program uses it, Memcheck will not complain.  This is arguably a
4103    // false negative, but it's a grey area -- the behaviour is defined (the
4104    // padding is zeroed) but it's probably not what the user intended.  And
4105    // we can't avoid it.
4106    //
4107    // Note: we generally ignore RWX permissions, because we can't track them
4108    // without requiring more than one A bit which would slow things down a
4109    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
4110    // So we mark any such pages as "unaddressable".
4111    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4112          a, (ULong)len, rr, ww, xx);
4113    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4114 }
4115 
4116 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4117 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4118 {
4119    MC_(make_mem_defined)(a, len);
4120 }
4121 
4122 
4123 /*------------------------------------------------------------*/
4124 /*--- Register event handlers                              ---*/
4125 /*------------------------------------------------------------*/
4126 
4127 /* Try and get a nonzero origin for the guest state section of thread
4128    tid characterised by (offset,size).  Return 0 if nothing to show
4129    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4130 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4131                                              Int offset, SizeT size )
4132 {
4133    Int   sh2off;
4134    UInt  area[3];
4135    UInt  otag;
4136    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4137    if (sh2off == -1)
4138       return 0;  /* This piece of guest state is not tracked */
4139    tl_assert(sh2off >= 0);
4140    tl_assert(0 == (sh2off % 4));
4141    area[0] = 0x31313131;
4142    area[2] = 0x27272727;
4143    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4144    tl_assert(area[0] == 0x31313131);
4145    tl_assert(area[2] == 0x27272727);
4146    otag = area[1];
4147    return otag;
4148 }
4149 
4150 
4151 /* When some chunk of guest state is written, mark the corresponding
4152    shadow area as valid.  This is used to initialise arbitrarily large
4153    chunks of guest state, hence the _SIZE value, which has to be as
4154    big as the biggest guest state.
4155 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4156 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4157                                 PtrdiffT offset, SizeT size)
4158 {
4159 #  define MAX_REG_WRITE_SIZE 1712
4160    UChar area[MAX_REG_WRITE_SIZE];
4161    tl_assert(size <= MAX_REG_WRITE_SIZE);
4162    VG_(memset)(area, V_BITS8_DEFINED, size);
4163    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4164 #  undef MAX_REG_WRITE_SIZE
4165 }
4166 
4167 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4168 void mc_post_reg_write_clientcall ( ThreadId tid,
4169                                     PtrdiffT offset, SizeT size, Addr f)
4170 {
4171    mc_post_reg_write(/*dummy*/0, tid, offset, size);
4172 }
4173 
4174 /* Look at the definedness of the guest's shadow state for
4175    [offset, offset+len).  If any part of that is undefined, record
4176    a parameter error.
4177 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4178 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4179                               PtrdiffT offset, SizeT size)
4180 {
4181    Int   i;
4182    Bool  bad;
4183    UInt  otag;
4184 
4185    UChar area[16];
4186    tl_assert(size <= 16);
4187 
4188    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4189 
4190    bad = False;
4191    for (i = 0; i < size; i++) {
4192       if (area[i] != V_BITS8_DEFINED) {
4193          bad = True;
4194          break;
4195       }
4196    }
4197 
4198    if (!bad)
4199       return;
4200 
4201    /* We've found some undefinedness.  See if we can also find an
4202       origin for it. */
4203    otag = mb_get_origin_for_guest_offset( tid, offset, size );
4204    MC_(record_regparam_error) ( tid, s, otag );
4205 }
4206 
4207 
4208 /*------------------------------------------------------------*/
4209 /*--- Register-memory event handlers                       ---*/
4210 /*------------------------------------------------------------*/
4211 
mc_copy_mem_to_reg(CorePart part,ThreadId tid,Addr a,PtrdiffT guest_state_offset,SizeT size)4212 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4213                                  PtrdiffT guest_state_offset, SizeT size )
4214 {
4215    SizeT i;
4216    UChar vbits8;
4217    Int offset;
4218    UInt d32;
4219 
4220    /* Slow loop. */
4221    for (i = 0; i < size; i++) {
4222       get_vbits8( a+i, &vbits8 );
4223       VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4224                                  1, &vbits8 );
4225    }
4226 
4227    if (MC_(clo_mc_level) != 3)
4228       return;
4229 
4230    /* Track origins. */
4231    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4232    if (offset == -1)
4233       return;
4234 
4235    switch (size) {
4236    case 1:
4237       d32 = MC_(helperc_b_load1)( a );
4238       break;
4239    case 2:
4240       d32 = MC_(helperc_b_load2)( a );
4241       break;
4242    case 4:
4243       d32 = MC_(helperc_b_load4)( a );
4244       break;
4245    case 8:
4246       d32 = MC_(helperc_b_load8)( a );
4247       break;
4248    case 16:
4249       d32 = MC_(helperc_b_load16)( a );
4250       break;
4251    case 32:
4252       d32 = MC_(helperc_b_load32)( a );
4253       break;
4254    default:
4255       tl_assert(0);
4256    }
4257 
4258    VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4259 }
4260 
mc_copy_reg_to_mem(CorePart part,ThreadId tid,PtrdiffT guest_state_offset,Addr a,SizeT size)4261 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4262                                  PtrdiffT guest_state_offset, Addr a,
4263                                  SizeT size )
4264 {
4265    SizeT i;
4266    UChar vbits8;
4267    Int offset;
4268    UInt d32;
4269 
4270    /* Slow loop. */
4271    for (i = 0; i < size; i++) {
4272       VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4273                                  guest_state_offset+i, 1 );
4274       set_vbits8( a+i, vbits8 );
4275    }
4276 
4277    if (MC_(clo_mc_level) != 3)
4278       return;
4279 
4280    /* Track origins. */
4281    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4282    if (offset == -1)
4283       return;
4284 
4285    VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4286    switch (size) {
4287    case 1:
4288       MC_(helperc_b_store1)( a, d32 );
4289       break;
4290    case 2:
4291       MC_(helperc_b_store2)( a, d32 );
4292       break;
4293    case 4:
4294       MC_(helperc_b_store4)( a, d32 );
4295       break;
4296    case 8:
4297       MC_(helperc_b_store8)( a, d32 );
4298       break;
4299    case 16:
4300       MC_(helperc_b_store16)( a, d32 );
4301       break;
4302    case 32:
4303       MC_(helperc_b_store32)( a, d32 );
4304       break;
4305    default:
4306       tl_assert(0);
4307    }
4308 }
4309 
4310 
4311 /*------------------------------------------------------------*/
4312 /*--- Some static assertions                               ---*/
4313 /*------------------------------------------------------------*/
4314 
4315 /* The handwritten assembly helpers below have baked-in assumptions
4316    about various constant values.  These assertions attempt to make
4317    that a bit safer by checking those values and flagging changes that
4318    would make the assembly invalid.  Not perfect but it's better than
4319    nothing. */
4320 
4321 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4322 
4323 STATIC_ASSERT(VA_BITS8_DEFINED   == 0xAA);
4324 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4325 
4326 STATIC_ASSERT(V_BITS32_DEFINED   == 0x00000000);
4327 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4328 
4329 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4330 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4331 
4332 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4333 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4334 
4335 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4336 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4337 
4338 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4339 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4340 
4341 
4342 /*------------------------------------------------------------*/
4343 /*--- Functions called directly from generated code:       ---*/
4344 /*--- Load/store handlers.                                 ---*/
4345 /*------------------------------------------------------------*/
4346 
4347 /* Types:  LOADV32, LOADV16, LOADV8 are:
4348                UWord fn ( Addr a )
4349    so they return 32-bits on 32-bit machines and 64-bits on
4350    64-bit machines.  Addr has the same size as a host word.
4351 
4352    LOADV64 is always  ULong fn ( Addr a )
4353 
4354    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4355    are a UWord, and for STOREV64 they are a ULong.
4356 */
4357 
4358 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4359    naturally '_sz/8'-aligned, or it exceeds the range covered by the
4360    primary map.  This is all very tricky (and important!), so let's
4361    work through the maths by hand (below), *and* assert for these
4362    values at startup. */
4363 #define MASK(_szInBytes) \
4364    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4365 
4366 /* MASK only exists so as to define this macro. */
4367 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4368    ((_a) & MASK((_szInBits>>3)))
4369 
4370 /* On a 32-bit machine:
4371 
4372    N_PRIMARY_BITS          == 16, so
4373    N_PRIMARY_MAP           == 0x10000, so
4374    N_PRIMARY_MAP-1         == 0xFFFF, so
4375    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4376 
4377    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4378            = ~ ( 0xFFFF | 0xFFFF0000 )
4379            = ~ 0xFFFF'FFFF
4380            = 0
4381 
4382    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4383            = ~ ( 0xFFFE | 0xFFFF0000 )
4384            = ~ 0xFFFF'FFFE
4385            = 1
4386 
4387    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4388            = ~ ( 0xFFFC | 0xFFFF0000 )
4389            = ~ 0xFFFF'FFFC
4390            = 3
4391 
4392    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4393            = ~ ( 0xFFF8 | 0xFFFF0000 )
4394            = ~ 0xFFFF'FFF8
4395            = 7
4396 
4397    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4398    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4399    the 1-byte alignment case, it is always a zero value, since MASK(1)
4400    is zero.  All as expected.
4401 
4402    On a 64-bit machine, it's more complex, since we're testing
4403    simultaneously for misalignment and for the address being at or
4404    above 64G:
4405 
4406    N_PRIMARY_BITS          == 20, so
4407    N_PRIMARY_MAP           == 0x100000, so
4408    N_PRIMARY_MAP-1         == 0xFFFFF, so
4409    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4410 
4411    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4412            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4413            = ~ 0xF'FFFF'FFFF
4414            = 0xFFFF'FFF0'0000'0000
4415 
4416    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4417            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4418            = ~ 0xF'FFFF'FFFE
4419            = 0xFFFF'FFF0'0000'0001
4420 
4421    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4422            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4423            = ~ 0xF'FFFF'FFFC
4424            = 0xFFFF'FFF0'0000'0003
4425 
4426    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4427            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4428            = ~ 0xF'FFFF'FFF8
4429            = 0xFFFF'FFF0'0000'0007
4430 */
4431 
4432 /*------------------------------------------------------------*/
4433 /*--- LOADV256 and LOADV128                                ---*/
4434 /*------------------------------------------------------------*/
4435 
4436 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4437 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4438                            Addr a, SizeT nBits, Bool isBigEndian )
4439 {
4440    PROF_EVENT(MCPE_LOADV_128_OR_256);
4441 
4442 #ifndef PERF_FAST_LOADV
4443    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4444    return;
4445 #else
4446    {
4447       UWord   sm_off16, vabits16, j;
4448       UWord   nBytes  = nBits / 8;
4449       UWord   nULongs = nBytes / 8;
4450       SecMap* sm;
4451 
4452       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4453          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4454          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4455          return;
4456       }
4457 
4458       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4459          suitably aligned, is mapped, and addressible. */
4460       for (j = 0; j < nULongs; j++) {
4461          sm       = get_secmap_for_reading_low(a + 8*j);
4462          sm_off16 = SM_OFF_16(a + 8*j);
4463          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4464 
4465          // Convert V bits from compact memory form to expanded
4466          // register form.
4467          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4468             res[j] = V_BITS64_DEFINED;
4469          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4470             res[j] = V_BITS64_UNDEFINED;
4471          } else {
4472             /* Slow case: some block of 8 bytes are not all-defined or
4473                all-undefined. */
4474             PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4475             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4476             return;
4477          }
4478       }
4479       return;
4480    }
4481 #endif
4482 }
4483 
MC_(helperc_LOADV256be)4484 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4485 {
4486    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4487 }
MC_(helperc_LOADV256le)4488 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4489 {
4490    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4491 }
4492 
MC_(helperc_LOADV128be)4493 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4494 {
4495    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4496 }
MC_(helperc_LOADV128le)4497 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4498 {
4499    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4500 }
4501 
4502 /*------------------------------------------------------------*/
4503 /*--- LOADV64                                              ---*/
4504 /*------------------------------------------------------------*/
4505 
4506 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4507 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4508 {
4509    PROF_EVENT(MCPE_LOADV64);
4510 
4511 #ifndef PERF_FAST_LOADV
4512    return mc_LOADVn_slow( a, 64, isBigEndian );
4513 #else
4514    {
4515       UWord   sm_off16, vabits16;
4516       SecMap* sm;
4517 
4518       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4519          PROF_EVENT(MCPE_LOADV64_SLOW1);
4520          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4521       }
4522 
4523       sm       = get_secmap_for_reading_low(a);
4524       sm_off16 = SM_OFF_16(a);
4525       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4526 
4527       // Handle common case quickly: a is suitably aligned, is mapped, and
4528       // addressible.
4529       // Convert V bits from compact memory form to expanded register form.
4530       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4531          return V_BITS64_DEFINED;
4532       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4533          return V_BITS64_UNDEFINED;
4534       } else {
4535          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4536          PROF_EVENT(MCPE_LOADV64_SLOW2);
4537          return mc_LOADVn_slow( a, 64, isBigEndian );
4538       }
4539    }
4540 #endif
4541 }
4542 
4543 // Generic for all platforms
MC_(helperc_LOADV64be)4544 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4545 {
4546    return mc_LOADV64(a, True);
4547 }
4548 
4549 // Non-generic assembly for arm32-linux
4550 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4551     && defined(VGP_arm_linux)
4552 __asm__( /* Derived from the 32 bit assembly helper */
4553 ".text                                  \n"
4554 ".align 2                               \n"
4555 ".global vgMemCheck_helperc_LOADV64le   \n"
4556 ".type   vgMemCheck_helperc_LOADV64le, %function \n"
4557 "vgMemCheck_helperc_LOADV64le:          \n"
4558 "      tst    r0, #7                    \n"
4559 "      movw   r3, #:lower16:primary_map \n"
4560 "      bne    .LLV64LEc4                \n" // if misaligned
4561 "      lsr    r2, r0, #16               \n"
4562 "      movt   r3, #:upper16:primary_map \n"
4563 "      ldr    r2, [r3, r2, lsl #2]      \n"
4564 "      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
4565 "      movw   r3, #0xAAAA               \n"
4566 "      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4567 "      ldrh   r1, [r2, r1]              \n"
4568 "      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
4569 "      bne    .LLV64LEc0                \n" // if !all_defined
4570 "      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
4571 "      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
4572 "      bx     lr                        \n"
4573 ".LLV64LEc0:                            \n"
4574 "      movw   r3, #0x5555               \n"
4575 "      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
4576 "      bne    .LLV64LEc4                \n" // if !all_undefined
4577 "      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4578 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4579 "      bx     lr                        \n"
4580 ".LLV64LEc4:                            \n"
4581 "      push   {r4, lr}                  \n"
4582 "      mov    r2, #0                    \n"
4583 "      mov    r1, #64                   \n"
4584 "      bl     mc_LOADVn_slow            \n"
4585 "      pop    {r4, pc}                  \n"
4586 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4587 ".previous\n"
4588 );
4589 
4590 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4591       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4592 __asm__(
4593 ".text\n"
4594 ".align 16\n"
4595 ".global vgMemCheck_helperc_LOADV64le\n"
4596 ".type   vgMemCheck_helperc_LOADV64le, @function\n"
4597 "vgMemCheck_helperc_LOADV64le:\n"
4598 "      test   $0x7,  %eax\n"
4599 "      jne    .LLV64LE2\n"          /* jump if not aligned */
4600 "      mov    %eax,  %ecx\n"
4601 "      movzwl %ax,   %edx\n"
4602 "      shr    $0x10, %ecx\n"
4603 "      mov    primary_map(,%ecx,4), %ecx\n"
4604 "      shr    $0x3,  %edx\n"
4605 "      movzwl (%ecx,%edx,2), %edx\n"
4606 "      cmp    $0xaaaa, %edx\n"
4607 "      jne    .LLV64LE1\n"          /* jump if not all defined */
4608 "      xor    %eax, %eax\n"         /* return 0 in edx:eax */
4609 "      xor    %edx, %edx\n"
4610 "      ret\n"
4611 ".LLV64LE1:\n"
4612 "      cmp    $0x5555, %edx\n"
4613 "      jne    .LLV64LE2\n"         /* jump if not all undefined */
4614 "      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4615 "      or     $0xffffffff, %edx\n"
4616 "      ret\n"
4617 ".LLV64LE2:\n"
4618 "      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
4619 "      mov    $64,   %edx\n"
4620 "      jmp    mc_LOADVn_slow\n"
4621 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4622 ".previous\n"
4623 );
4624 
4625 #else
4626 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV64le)4627 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4628 {
4629    return mc_LOADV64(a, False);
4630 }
4631 #endif
4632 
4633 /*------------------------------------------------------------*/
4634 /*--- STOREV64                                             ---*/
4635 /*------------------------------------------------------------*/
4636 
4637 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4638 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4639 {
4640    PROF_EVENT(MCPE_STOREV64);
4641 
4642 #ifndef PERF_FAST_STOREV
4643    // XXX: this slow case seems to be marginally faster than the fast case!
4644    // Investigate further.
4645    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4646 #else
4647    {
4648       UWord   sm_off16, vabits16;
4649       SecMap* sm;
4650 
4651       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4652          PROF_EVENT(MCPE_STOREV64_SLOW1);
4653          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4654          return;
4655       }
4656 
4657       sm       = get_secmap_for_reading_low(a);
4658       sm_off16 = SM_OFF_16(a);
4659       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4660 
4661       // To understand the below cleverness, see the extensive comments
4662       // in MC_(helperc_STOREV8).
4663       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4664          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4665             return;
4666          }
4667          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4668             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4669             return;
4670          }
4671          PROF_EVENT(MCPE_STOREV64_SLOW2);
4672          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4673          return;
4674       }
4675       if (V_BITS64_UNDEFINED == vbits64) {
4676          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4677             return;
4678          }
4679          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4680             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4681             return;
4682          }
4683          PROF_EVENT(MCPE_STOREV64_SLOW3);
4684          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4685          return;
4686       }
4687 
4688       PROF_EVENT(MCPE_STOREV64_SLOW4);
4689       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4690    }
4691 #endif
4692 }
4693 
MC_(helperc_STOREV64be)4694 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4695 {
4696    mc_STOREV64(a, vbits64, True);
4697 }
MC_(helperc_STOREV64le)4698 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4699 {
4700    mc_STOREV64(a, vbits64, False);
4701 }
4702 
4703 /*------------------------------------------------------------*/
4704 /*--- LOADV32                                              ---*/
4705 /*------------------------------------------------------------*/
4706 
4707 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4708 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4709 {
4710    PROF_EVENT(MCPE_LOADV32);
4711 
4712 #ifndef PERF_FAST_LOADV
4713    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4714 #else
4715    {
4716       UWord   sm_off, vabits8;
4717       SecMap* sm;
4718 
4719       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4720          PROF_EVENT(MCPE_LOADV32_SLOW1);
4721          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4722       }
4723 
4724       sm      = get_secmap_for_reading_low(a);
4725       sm_off  = SM_OFF(a);
4726       vabits8 = sm->vabits8[sm_off];
4727 
4728       // Handle common case quickly: a is suitably aligned, is mapped, and the
4729       // entire word32 it lives in is addressible.
4730       // Convert V bits from compact memory form to expanded register form.
4731       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4732       // Almost certainly not necessary, but be paranoid.
4733       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4734          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4735       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4736          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4737       } else {
4738          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4739          PROF_EVENT(MCPE_LOADV32_SLOW2);
4740          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4741       }
4742    }
4743 #endif
4744 }
4745 
4746 // Generic for all platforms
MC_(helperc_LOADV32be)4747 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4748 {
4749    return mc_LOADV32(a, True);
4750 }
4751 
4752 // Non-generic assembly for arm32-linux
4753 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4754     && defined(VGP_arm_linux)
4755 __asm__( /* Derived from NCode template */
4756 ".text                                  \n"
4757 ".align 2                               \n"
4758 ".global vgMemCheck_helperc_LOADV32le   \n"
4759 ".type   vgMemCheck_helperc_LOADV32le, %function \n"
4760 "vgMemCheck_helperc_LOADV32le:          \n"
4761 "      tst    r0, #3                    \n" // 1
4762 "      movw   r3, #:lower16:primary_map \n" // 1
4763 "      bne    .LLV32LEc4                \n" // 2  if misaligned
4764 "      lsr    r2, r0, #16               \n" // 3
4765 "      movt   r3, #:upper16:primary_map \n" // 3
4766 "      ldr    r2, [r3, r2, lsl #2]      \n" // 4
4767 "      uxth   r1, r0                    \n" // 4
4768 "      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
4769 "      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
4770 "      bne    .LLV32LEc0                \n" // 7  if !all_defined
4771 "      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
4772 "      bx     lr                        \n" // 9
4773 ".LLV32LEc0:                            \n"
4774 "      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
4775 "      bne    .LLV32LEc4                \n" // if !all_undefined
4776 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4777 "      bx     lr                        \n"
4778 ".LLV32LEc4:                            \n"
4779 "      push   {r4, lr}                  \n"
4780 "      mov    r2, #0                    \n"
4781 "      mov    r1, #32                   \n"
4782 "      bl     mc_LOADVn_slow            \n"
4783 "      pop    {r4, pc}                  \n"
4784 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
4785 ".previous\n"
4786 );
4787 
4788 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4789       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4790 __asm__(
4791 ".text\n"
4792 ".align 16\n"
4793 ".global vgMemCheck_helperc_LOADV32le\n"
4794 ".type   vgMemCheck_helperc_LOADV32le, @function\n"
4795 "vgMemCheck_helperc_LOADV32le:\n"
4796 "      test   $0x3,  %eax\n"
4797 "      jnz    .LLV32LE2\n"         /* jump if misaligned */
4798 "      mov    %eax,  %edx\n"
4799 "      shr    $16,   %edx\n"
4800 "      mov    primary_map(,%edx,4), %ecx\n"
4801 "      movzwl %ax,   %edx\n"
4802 "      shr    $2,    %edx\n"
4803 "      movzbl (%ecx,%edx,1), %edx\n"
4804 "      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
4805 "      jne    .LLV32LE1\n"         /* jump if not completely defined */
4806 "      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
4807 "      ret\n"
4808 ".LLV32LE1:\n"
4809 "      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
4810 "      jne    .LLV32LE2\n"         /* jump if not completely undefined */
4811 "      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
4812 "      ret\n"
4813 ".LLV32LE2:\n"
4814 "      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
4815 "      mov    $32,   %edx\n"
4816 "      jmp    mc_LOADVn_slow\n"
4817 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
4818 ".previous\n"
4819 );
4820 
4821 #else
4822 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV32le)4823 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4824 {
4825    return mc_LOADV32(a, False);
4826 }
4827 #endif
4828 
4829 /*------------------------------------------------------------*/
4830 /*--- STOREV32                                             ---*/
4831 /*------------------------------------------------------------*/
4832 
4833 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4834 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4835 {
4836    PROF_EVENT(MCPE_STOREV32);
4837 
4838 #ifndef PERF_FAST_STOREV
4839    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4840 #else
4841    {
4842       UWord   sm_off, vabits8;
4843       SecMap* sm;
4844 
4845       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4846          PROF_EVENT(MCPE_STOREV32_SLOW1);
4847          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4848          return;
4849       }
4850 
4851       sm      = get_secmap_for_reading_low(a);
4852       sm_off  = SM_OFF(a);
4853       vabits8 = sm->vabits8[sm_off];
4854 
4855       // To understand the below cleverness, see the extensive comments
4856       // in MC_(helperc_STOREV8).
4857       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4858          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4859             return;
4860          }
4861          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
4862             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4863             return;
4864          }
4865          PROF_EVENT(MCPE_STOREV32_SLOW2);
4866          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4867          return;
4868       }
4869       if (V_BITS32_UNDEFINED == vbits32) {
4870          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4871             return;
4872          }
4873          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4874             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4875             return;
4876          }
4877          PROF_EVENT(MCPE_STOREV32_SLOW3);
4878          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4879          return;
4880       }
4881 
4882       PROF_EVENT(MCPE_STOREV32_SLOW4);
4883       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4884    }
4885 #endif
4886 }
4887 
MC_(helperc_STOREV32be)4888 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4889 {
4890    mc_STOREV32(a, vbits32, True);
4891 }
MC_(helperc_STOREV32le)4892 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4893 {
4894    mc_STOREV32(a, vbits32, False);
4895 }
4896 
4897 /*------------------------------------------------------------*/
4898 /*--- LOADV16                                              ---*/
4899 /*------------------------------------------------------------*/
4900 
4901 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4902 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4903 {
4904    PROF_EVENT(MCPE_LOADV16);
4905 
4906 #ifndef PERF_FAST_LOADV
4907    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4908 #else
4909    {
4910       UWord   sm_off, vabits8;
4911       SecMap* sm;
4912 
4913       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4914          PROF_EVENT(MCPE_LOADV16_SLOW1);
4915          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4916       }
4917 
4918       sm      = get_secmap_for_reading_low(a);
4919       sm_off  = SM_OFF(a);
4920       vabits8 = sm->vabits8[sm_off];
4921       // Handle common case quickly: a is suitably aligned, is mapped, and is
4922       // addressible.
4923       // Convert V bits from compact memory form to expanded register form
4924       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4925       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4926       else {
4927          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4928          // the two sub-bytes.
4929          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4930          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4931          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4932          else {
4933             /* Slow case: the two bytes are not all-defined or all-undefined. */
4934             PROF_EVENT(MCPE_LOADV16_SLOW2);
4935             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4936          }
4937       }
4938    }
4939 #endif
4940 }
4941 
4942 // Generic for all platforms
MC_(helperc_LOADV16be)4943 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4944 {
4945    return mc_LOADV16(a, True);
4946 }
4947 
4948 // Non-generic assembly for arm32-linux
4949 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4950     && defined(VGP_arm_linux)
4951 __asm__( /* Derived from NCode template */
4952 ".text                                  \n"
4953 ".align 2                               \n"
4954 ".global vgMemCheck_helperc_LOADV16le   \n"
4955 ".type   vgMemCheck_helperc_LOADV16le, %function \n"
4956 "vgMemCheck_helperc_LOADV16le:          \n" //
4957 "      tst    r0, #1                    \n" //
4958 "      bne    .LLV16LEc12               \n" // if misaligned
4959 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
4960 "      movw   r3, #:lower16:primary_map \n" //
4961 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
4962 "      movt   r3, #:upper16:primary_map \n" //
4963 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
4964 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
4965 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
4966 "      bne    .LLV16LEc0                \n" // no, goto .LLV16LEc0
4967 ".LLV16LEh9:                            \n" //
4968 "      mov    r0, #0xFFFFFFFF           \n" //
4969 "      lsl    r0, r0, #16               \n" // V_BITS16_DEFINED | top16safe
4970 "      bx     lr                        \n" //
4971 ".LLV16LEc0:                            \n" //
4972 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
4973 "      bne    .LLV16LEc4                \n" //
4974 ".LLV16LEc2:                            \n" //
4975 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS16_UNDEFINED | top16safe
4976 "      bx     lr                        \n" //
4977 ".LLV16LEc4:                            \n" //
4978        // r1 holds sec-map-VABITS8.  r0 holds the address and is 2-aligned.
4979        // Extract the relevant 4 bits and inspect.
4980 "      and    r2, r0, #2       \n" // addr & 2
4981 "      add    r2, r2, r2       \n" // 2 * (addr & 2)
4982 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 2))
4983 "      and    r1, r1, #15      \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
4984 
4985 "      cmp    r1, #0xA                  \n" // VA_BITS4_DEFINED
4986 "      beq    .LLV16LEh9                \n" //
4987 
4988 "      cmp    r1, #0x5                  \n" // VA_BITS4_UNDEFINED
4989 "      beq    .LLV16LEc2                \n" //
4990 
4991 ".LLV16LEc12:                           \n" //
4992 "      push   {r4, lr}                  \n" //
4993 "      mov    r2, #0                    \n" //
4994 "      mov    r1, #16                   \n" //
4995 "      bl     mc_LOADVn_slow            \n" //
4996 "      pop    {r4, pc}                  \n" //
4997 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
4998 ".previous\n"
4999 );
5000 
5001 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5002       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5003 __asm__(
5004 ".text\n"
5005 ".align 16\n"
5006 ".global vgMemCheck_helperc_LOADV16le\n"
5007 ".type   vgMemCheck_helperc_LOADV16le, @function\n"
5008 "vgMemCheck_helperc_LOADV16le:\n"
5009 "      test   $0x1,  %eax\n"
5010 "      jne    .LLV16LE5\n"          /* jump if not aligned */
5011 "      mov    %eax,  %edx\n"
5012 "      shr    $0x10, %edx\n"
5013 "      mov    primary_map(,%edx,4), %ecx\n"
5014 "      movzwl %ax,   %edx\n"
5015 "      shr    $0x2,  %edx\n"
5016 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5017 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED */
5018 "      jne    .LLV16LE2\n"          /* jump if not all 32bits defined */
5019 ".LLV16LE1:\n"
5020 "      mov    $0xffff0000,%eax\n"   /* V_BITS16_DEFINED | top16safe */
5021 "      ret\n"
5022 ".LLV16LE2:\n"
5023 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
5024 "      jne    .LLV16LE4\n"          /* jump if not all 32bits undefined */
5025 ".LLV16LE3:\n"
5026 "      or     $0xffffffff,%eax\n"   /* V_BITS16_UNDEFINED | top16safe */
5027 "      ret\n"
5028 ".LLV16LE4:\n"
5029 "      mov    %eax,  %ecx\n"
5030 "      and    $0x2,  %ecx\n"
5031 "      add    %ecx,  %ecx\n"
5032 "      sar    %cl,   %edx\n"
5033 "      and    $0xf,  %edx\n"
5034 "      cmp    $0xa,  %edx\n"
5035 "      je     .LLV16LE1\n"          /* jump if all 16bits are defined */
5036 "      cmp    $0x5,  %edx\n"
5037 "      je     .LLV16LE3\n"          /* jump if all 16bits are undefined */
5038 ".LLV16LE5:\n"
5039 "      xor    %ecx,  %ecx\n"        /* tail call mc_LOADVn_slow(a, 16, 0) */
5040 "      mov    $16,   %edx\n"
5041 "      jmp    mc_LOADVn_slow\n"
5042 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5043 ".previous\n"
5044 );
5045 
5046 #else
5047 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV16le)5048 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5049 {
5050    return mc_LOADV16(a, False);
5051 }
5052 #endif
5053 
5054 /*------------------------------------------------------------*/
5055 /*--- STOREV16                                             ---*/
5056 /*------------------------------------------------------------*/
5057 
5058 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5059 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)5060 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5061 {
5062    UInt shift;
5063    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
5064    shift = (a & 2) << 1;               // shift by 0 or 4
5065    vabits8 >>= shift;                  // shift the four bits to the bottom
5066     // check 2 x vabits2 != VA_BITS2_NOACCESS
5067    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5068       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5069 }
5070 
5071 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)5072 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5073 {
5074    PROF_EVENT(MCPE_STOREV16);
5075 
5076 #ifndef PERF_FAST_STOREV
5077    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5078 #else
5079    {
5080       UWord   sm_off, vabits8;
5081       SecMap* sm;
5082 
5083       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5084          PROF_EVENT(MCPE_STOREV16_SLOW1);
5085          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5086          return;
5087       }
5088 
5089       sm      = get_secmap_for_reading_low(a);
5090       sm_off  = SM_OFF(a);
5091       vabits8 = sm->vabits8[sm_off];
5092 
5093       // To understand the below cleverness, see the extensive comments
5094       // in MC_(helperc_STOREV8).
5095       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5096          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5097             return;
5098          }
5099          if (!is_distinguished_sm(sm)
5100              && accessible_vabits4_in_vabits8(a, vabits8)) {
5101             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5102                                          &(sm->vabits8[sm_off]) );
5103             return;
5104          }
5105          PROF_EVENT(MCPE_STOREV16_SLOW2);
5106          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5107       }
5108       if (V_BITS16_UNDEFINED == vbits16) {
5109          if (vabits8 == VA_BITS8_UNDEFINED) {
5110             return;
5111          }
5112          if (!is_distinguished_sm(sm)
5113              && accessible_vabits4_in_vabits8(a, vabits8)) {
5114             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5115                                          &(sm->vabits8[sm_off]) );
5116             return;
5117          }
5118          PROF_EVENT(MCPE_STOREV16_SLOW3);
5119          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5120          return;
5121       }
5122 
5123       PROF_EVENT(MCPE_STOREV16_SLOW4);
5124       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5125    }
5126 #endif
5127 }
5128 
5129 
MC_(helperc_STOREV16be)5130 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5131 {
5132    mc_STOREV16(a, vbits16, True);
5133 }
MC_(helperc_STOREV16le)5134 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5135 {
5136    mc_STOREV16(a, vbits16, False);
5137 }
5138 
5139 /*------------------------------------------------------------*/
5140 /*--- LOADV8                                               ---*/
5141 /*------------------------------------------------------------*/
5142 
5143 /* Note: endianness is irrelevant for size == 1 */
5144 
5145 // Non-generic assembly for arm32-linux
5146 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5147     && defined(VGP_arm_linux)
5148 __asm__( /* Derived from NCode template */
5149 ".text                                  \n"
5150 ".align 2                               \n"
5151 ".global vgMemCheck_helperc_LOADV8      \n"
5152 ".type   vgMemCheck_helperc_LOADV8, %function \n"
5153 "vgMemCheck_helperc_LOADV8:             \n" //
5154 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
5155 "      movw   r3, #:lower16:primary_map \n" //
5156 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
5157 "      movt   r3, #:upper16:primary_map \n" //
5158 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
5159 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
5160 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
5161 "      bne    .LLV8c0                   \n" // no, goto .LLV8c0
5162 ".LLV8h9:                               \n" //
5163 "      mov    r0, #0xFFFFFF00           \n" // V_BITS8_DEFINED | top24safe
5164 "      bx     lr                        \n" //
5165 ".LLV8c0:                               \n" //
5166 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
5167 "      bne    .LLV8c4                   \n" //
5168 ".LLV8c2:                               \n" //
5169 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS8_UNDEFINED | top24safe
5170 "      bx     lr                        \n" //
5171 ".LLV8c4:                               \n" //
5172        // r1 holds sec-map-VABITS8
5173        // r0 holds the address.  Extract the relevant 2 bits and inspect.
5174 "      and    r2, r0, #3       \n" // addr & 3
5175 "      add    r2, r2, r2       \n" // 2 * (addr & 3)
5176 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5177 "      and    r1, r1, #3       \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5178 
5179 "      cmp    r1, #2                    \n" // VA_BITS2_DEFINED
5180 "      beq    .LLV8h9                   \n" //
5181 
5182 "      cmp    r1, #1                    \n" // VA_BITS2_UNDEFINED
5183 "      beq    .LLV8c2                   \n" //
5184 
5185 "      push   {r4, lr}                  \n" //
5186 "      mov    r2, #0                    \n" //
5187 "      mov    r1, #8                    \n" //
5188 "      bl     mc_LOADVn_slow            \n" //
5189 "      pop    {r4, pc}                  \n" //
5190 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5191 ".previous\n"
5192 );
5193 
5194 /* Non-generic assembly for x86-linux */
5195 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5196       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5197 __asm__(
5198 ".text\n"
5199 ".align 16\n"
5200 ".global vgMemCheck_helperc_LOADV8\n"
5201 ".type   vgMemCheck_helperc_LOADV8, @function\n"
5202 "vgMemCheck_helperc_LOADV8:\n"
5203 "      mov    %eax,  %edx\n"
5204 "      shr    $0x10, %edx\n"
5205 "      mov    primary_map(,%edx,4), %ecx\n"
5206 "      movzwl %ax,   %edx\n"
5207 "      shr    $0x2,  %edx\n"
5208 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5209 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED? */
5210 "      jne    .LLV8LE2\n"           /* jump if not defined */
5211 ".LLV8LE1:\n"
5212 "      mov    $0xffffff00, %eax\n"  /* V_BITS8_DEFINED | top24safe */
5213 "      ret\n"
5214 ".LLV8LE2:\n"
5215 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
5216 "      jne    .LLV8LE4\n"           /* jump if not all 32bits are undefined */
5217 ".LLV8LE3:\n"
5218 "      or     $0xffffffff, %eax\n"  /* V_BITS8_UNDEFINED | top24safe */
5219 "      ret\n"
5220 ".LLV8LE4:\n"
5221 "      mov    %eax,  %ecx\n"
5222 "      and    $0x3,  %ecx\n"
5223 "      add    %ecx,  %ecx\n"
5224 "      sar    %cl,   %edx\n"
5225 "      and    $0x3,  %edx\n"
5226 "      cmp    $0x2,  %edx\n"
5227 "      je     .LLV8LE1\n"           /* jump if all 8bits are defined */
5228 "      cmp    $0x1,  %edx\n"
5229 "      je     .LLV8LE3\n"           /* jump if all 8bits are undefined */
5230 "      xor    %ecx,  %ecx\n"        /* tail call to mc_LOADVn_slow(a, 8, 0) */
5231 "      mov    $0x8,  %edx\n"
5232 "      jmp    mc_LOADVn_slow\n"
5233 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5234 ".previous\n"
5235 );
5236 
5237 #else
5238 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5239 VG_REGPARM(1)
MC_(helperc_LOADV8)5240 UWord MC_(helperc_LOADV8) ( Addr a )
5241 {
5242    PROF_EVENT(MCPE_LOADV8);
5243 
5244 #ifndef PERF_FAST_LOADV
5245    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5246 #else
5247    {
5248       UWord   sm_off, vabits8;
5249       SecMap* sm;
5250 
5251       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5252          PROF_EVENT(MCPE_LOADV8_SLOW1);
5253          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5254       }
5255 
5256       sm      = get_secmap_for_reading_low(a);
5257       sm_off  = SM_OFF(a);
5258       vabits8 = sm->vabits8[sm_off];
5259       // Convert V bits from compact memory form to expanded register form
5260       // Handle common case quickly: a is mapped, and the entire
5261       // word32 it lives in is addressible.
5262       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
5263       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5264       else {
5265          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5266          // the single byte.
5267          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5268          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
5269          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5270          else {
5271             /* Slow case: the byte is not all-defined or all-undefined. */
5272             PROF_EVENT(MCPE_LOADV8_SLOW2);
5273             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5274          }
5275       }
5276    }
5277 #endif
5278 }
5279 #endif
5280 
5281 /*------------------------------------------------------------*/
5282 /*--- STOREV8                                              ---*/
5283 /*------------------------------------------------------------*/
5284 
5285 VG_REGPARM(2)
MC_(helperc_STOREV8)5286 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5287 {
5288    PROF_EVENT(MCPE_STOREV8);
5289 
5290 #ifndef PERF_FAST_STOREV
5291    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5292 #else
5293    {
5294       UWord   sm_off, vabits8;
5295       SecMap* sm;
5296 
5297       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5298          PROF_EVENT(MCPE_STOREV8_SLOW1);
5299          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5300          return;
5301       }
5302 
5303       sm      = get_secmap_for_reading_low(a);
5304       sm_off  = SM_OFF(a);
5305       vabits8 = sm->vabits8[sm_off];
5306 
5307       // Clevernesses to speed up storing V bits.
5308       // The 64/32/16 bit cases also have similar clevernesses, but it
5309       // works a little differently to the code below.
5310       //
5311       // Cleverness 1:  sometimes we don't have to write the shadow memory at
5312       // all, if we can tell that what we want to write is the same as what is
5313       // already there. These cases are marked below as "defined on defined" and
5314       // "undefined on undefined".
5315       //
5316       // Cleverness 2:
5317       // We also avoid to call mc_STOREVn_slow if the V bits can directly
5318       // be written in the secondary map. V bits can be directly written
5319       // if 4 conditions are respected:
5320       //   * The address for which V bits are written is naturally aligned
5321       //        on 1 byte  for STOREV8 (this is always true)
5322       //        on 2 bytes for STOREV16
5323       //        on 4 bytes for STOREV32
5324       //        on 8 bytes for STOREV64.
5325       //   * V bits being written are either fully defined or fully undefined.
5326       //     (for partially defined V bits, V bits cannot be directly written,
5327       //      as the secondary vbits table must be maintained).
5328       //   * the secmap is not distinguished (distinguished maps cannot be
5329       //     modified).
5330       //   * the memory corresponding to the V bits being written is
5331       //     accessible (if one or more bytes are not accessible,
5332       //     we must call mc_STOREVn_slow in order to report accessibility
5333       //     errors).
5334       //     Note that for STOREV32 and STOREV64, it is too expensive
5335       //     to verify the accessibility of each byte for the benefit it
5336       //     brings. Instead, a quicker check is done by comparing to
5337       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5338       //     but misses some opportunity of direct modifications.
5339       //     Checking each byte accessibility was measured for
5340       //     STOREV32+perf tests and was slowing down all perf tests.
5341       // The cases corresponding to cleverness 2 are marked below as
5342       // "direct mod".
5343       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5344          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5345             return; // defined on defined
5346          }
5347          if (!is_distinguished_sm(sm)
5348              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5349             // direct mod
5350             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5351                                          &(sm->vabits8[sm_off]) );
5352             return;
5353          }
5354          PROF_EVENT(MCPE_STOREV8_SLOW2);
5355          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5356          return;
5357       }
5358       if (V_BITS8_UNDEFINED == vbits8) {
5359          if (vabits8 == VA_BITS8_UNDEFINED) {
5360             return; // undefined on undefined
5361          }
5362          if (!is_distinguished_sm(sm)
5363              && (VA_BITS2_NOACCESS
5364                  != extract_vabits2_from_vabits8(a, vabits8))) {
5365             // direct mod
5366             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5367                                          &(sm->vabits8[sm_off]) );
5368             return;
5369          }
5370          PROF_EVENT(MCPE_STOREV8_SLOW3);
5371          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5372          return;
5373       }
5374 
5375       // Partially defined word
5376       PROF_EVENT(MCPE_STOREV8_SLOW4);
5377       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5378    }
5379 #endif
5380 }
5381 
5382 
5383 /*------------------------------------------------------------*/
5384 /*--- Functions called directly from generated code:       ---*/
5385 /*--- Value-check failure handlers.                        ---*/
5386 /*------------------------------------------------------------*/
5387 
5388 /* Call these ones when an origin is available ... */
5389 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)5390 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5391    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5392 }
5393 
5394 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)5395 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5396    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5397 }
5398 
5399 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)5400 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5401    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5402 }
5403 
5404 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)5405 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5406    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5407 }
5408 
5409 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)5410 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5411    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5412 }
5413 
5414 /* ... and these when an origin isn't available. */
5415 
5416 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)5417 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5418    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5419 }
5420 
5421 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)5422 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5423    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5424 }
5425 
5426 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)5427 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5428    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5429 }
5430 
5431 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)5432 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5433    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5434 }
5435 
5436 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)5437 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5438    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5439 }
5440 
5441 
5442 /*------------------------------------------------------------*/
5443 /*--- Metadata get/set functions, for client requests.     ---*/
5444 /*------------------------------------------------------------*/
5445 
5446 // Nb: this expands the V+A bits out into register-form V bits, even though
5447 // they're in memory.  This is for backward compatibility, and because it's
5448 // probably what the user wants.
5449 
5450 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5451    error [no longer used], 3 == addressing error. */
5452 /* Nb: We used to issue various definedness/addressability errors from here,
5453    but we took them out because they ranged from not-very-helpful to
5454    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)5455 static Int mc_get_or_set_vbits_for_client (
5456    Addr a,
5457    Addr vbits,
5458    SizeT szB,
5459    Bool setting, /* True <=> set vbits,  False <=> get vbits */
5460    Bool is_client_request /* True <=> real user request
5461                              False <=> internal call from gdbserver */
5462 )
5463 {
5464    SizeT i;
5465    Bool  ok;
5466    UChar vbits8;
5467 
5468    /* Check that arrays are addressible before doing any getting/setting.
5469       vbits to be checked only for real user request. */
5470    for (i = 0; i < szB; i++) {
5471       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5472           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5473          return 3;
5474       }
5475    }
5476 
5477    /* Do the copy */
5478    if (setting) {
5479       /* setting */
5480       for (i = 0; i < szB; i++) {
5481          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5482          tl_assert(ok);
5483       }
5484    } else {
5485       /* getting */
5486       for (i = 0; i < szB; i++) {
5487          ok = get_vbits8(a + i, &vbits8);
5488          tl_assert(ok);
5489          ((UChar*)vbits)[i] = vbits8;
5490       }
5491       if (is_client_request)
5492         // The bytes in vbits[] have now been set, so mark them as such.
5493         MC_(make_mem_defined)(vbits, szB);
5494    }
5495 
5496    return 1;
5497 }
5498 
5499 
5500 /*------------------------------------------------------------*/
5501 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
5502 /*------------------------------------------------------------*/
5503 
5504 /* For the memory leak detector, say whether an entire 64k chunk of
5505    address space is possibly in use, or not.  If in doubt return
5506    True.
5507 */
MC_(is_within_valid_secondary)5508 Bool MC_(is_within_valid_secondary) ( Addr a )
5509 {
5510    SecMap* sm = maybe_get_secmap_for ( a );
5511    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5512       /* Definitely not in use. */
5513       return False;
5514    } else {
5515       return True;
5516    }
5517 }
5518 
5519 
5520 /* For the memory leak detector, say whether or not a given word
5521    address is to be regarded as valid. */
MC_(is_valid_aligned_word)5522 Bool MC_(is_valid_aligned_word) ( Addr a )
5523 {
5524    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5525    tl_assert(VG_IS_WORD_ALIGNED(a));
5526    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5527       return False;
5528    if (sizeof(UWord) == 8) {
5529       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5530          return False;
5531    }
5532    if (UNLIKELY(MC_(in_ignored_range)(a)))
5533       return False;
5534    else
5535       return True;
5536 }
5537 
5538 
5539 /*------------------------------------------------------------*/
5540 /*--- Initialisation                                       ---*/
5541 /*------------------------------------------------------------*/
5542 
init_shadow_memory(void)5543 static void init_shadow_memory ( void )
5544 {
5545    Int     i;
5546    SecMap* sm;
5547 
5548    tl_assert(V_BIT_UNDEFINED   == 1);
5549    tl_assert(V_BIT_DEFINED     == 0);
5550    tl_assert(V_BITS8_UNDEFINED == 0xFF);
5551    tl_assert(V_BITS8_DEFINED   == 0);
5552 
5553    /* Build the 3 distinguished secondaries */
5554    sm = &sm_distinguished[SM_DIST_NOACCESS];
5555    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5556 
5557    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5558    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5559 
5560    sm = &sm_distinguished[SM_DIST_DEFINED];
5561    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5562 
5563    /* Set up the primary map. */
5564    /* These entries gradually get overwritten as the used address
5565       space expands. */
5566    for (i = 0; i < N_PRIMARY_MAP; i++)
5567       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5568 
5569    /* Auxiliary primary maps */
5570    init_auxmap_L1_L2();
5571 
5572    /* auxmap_size = auxmap_used = 0;
5573       no ... these are statically initialised */
5574 
5575    /* Secondary V bit table */
5576    secVBitTable = createSecVBitTable();
5577 }
5578 
5579 
5580 /*------------------------------------------------------------*/
5581 /*--- Sanity check machinery (permanently engaged)         ---*/
5582 /*------------------------------------------------------------*/
5583 
mc_cheap_sanity_check(void)5584 static Bool mc_cheap_sanity_check ( void )
5585 {
5586    n_sanity_cheap++;
5587    PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5588    /* Check for sane operating level */
5589    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5590       return False;
5591    /* nothing else useful we can rapidly check */
5592    return True;
5593 }
5594 
mc_expensive_sanity_check(void)5595 static Bool mc_expensive_sanity_check ( void )
5596 {
5597    Int     i;
5598    Word    n_secmaps_found;
5599    SecMap* sm;
5600    const HChar*  errmsg;
5601    Bool    bad = False;
5602 
5603    if (0) VG_(printf)("expensive sanity check\n");
5604    if (0) return True;
5605 
5606    n_sanity_expensive++;
5607    PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5608 
5609    /* Check for sane operating level */
5610    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5611       return False;
5612 
5613    /* Check that the 3 distinguished SMs are still as they should be. */
5614 
5615    /* Check noaccess DSM. */
5616    sm = &sm_distinguished[SM_DIST_NOACCESS];
5617    for (i = 0; i < SM_CHUNKS; i++)
5618       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5619          bad = True;
5620 
5621    /* Check undefined DSM. */
5622    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5623    for (i = 0; i < SM_CHUNKS; i++)
5624       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5625          bad = True;
5626 
5627    /* Check defined DSM. */
5628    sm = &sm_distinguished[SM_DIST_DEFINED];
5629    for (i = 0; i < SM_CHUNKS; i++)
5630       if (sm->vabits8[i] != VA_BITS8_DEFINED)
5631          bad = True;
5632 
5633    if (bad) {
5634       VG_(printf)("memcheck expensive sanity: "
5635                   "distinguished_secondaries have changed\n");
5636       return False;
5637    }
5638 
5639    /* If we're not checking for undefined value errors, the secondary V bit
5640     * table should be empty. */
5641    if (MC_(clo_mc_level) == 1) {
5642       if (0 != VG_(OSetGen_Size)(secVBitTable))
5643          return False;
5644    }
5645 
5646    /* check the auxiliary maps, very thoroughly */
5647    n_secmaps_found = 0;
5648    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5649    if (errmsg) {
5650       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5651       return False;
5652    }
5653 
5654    /* n_secmaps_found is now the number referred to by the auxiliary
5655       primary map.  Now add on the ones referred to by the main
5656       primary map. */
5657    for (i = 0; i < N_PRIMARY_MAP; i++) {
5658       if (primary_map[i] == NULL) {
5659          bad = True;
5660       } else {
5661          if (!is_distinguished_sm(primary_map[i]))
5662             n_secmaps_found++;
5663       }
5664    }
5665 
5666    /* check that the number of secmaps issued matches the number that
5667       are reachable (iow, no secmap leaks) */
5668    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5669       bad = True;
5670 
5671    if (bad) {
5672       VG_(printf)("memcheck expensive sanity: "
5673                   "apparent secmap leakage\n");
5674       return False;
5675    }
5676 
5677    if (bad) {
5678       VG_(printf)("memcheck expensive sanity: "
5679                   "auxmap covers wrong address space\n");
5680       return False;
5681    }
5682 
5683    /* there is only one pointer to each secmap (expensive) */
5684 
5685    return True;
5686 }
5687 
5688 /*------------------------------------------------------------*/
5689 /*--- Command line args                                    ---*/
5690 /*------------------------------------------------------------*/
5691 
5692 /* 31 Aug 2015: Vectorised code is now so widespread that
5693    --partial-loads-ok needs to be enabled by default on all platforms.
5694    Not doing so causes lots of false errors. */
5695 Bool          MC_(clo_partial_loads_ok)       = True;
5696 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
5697 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
5698 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
5699 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
5700 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
5701 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
5702 UInt          MC_(clo_leak_check_heuristics)  =   H2S(LchStdString)
5703                                                 | H2S( LchLength64)
5704                                                 | H2S( LchNewArray)
5705                                                 | H2S( LchMultipleInheritance);
5706 Bool          MC_(clo_workaround_gcc296_bugs) = False;
5707 Int           MC_(clo_malloc_fill)            = -1;
5708 Int           MC_(clo_free_fill)              = -1;
5709 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_and_free;
5710 Int           MC_(clo_mc_level)               = 2;
5711 Bool          MC_(clo_show_mismatched_frees)  = True;
5712 Bool          MC_(clo_expensive_definedness_checks) = False;
5713 
5714 static const HChar * MC_(parse_leak_heuristics_tokens) =
5715    "-,stdstring,length64,newarray,multipleinheritance";
5716 /* The first heuristic value (LchNone) has no keyword, as this is
5717    a fake heuristic used to collect the blocks found without any
5718    heuristic. */
5719 
mc_process_cmd_line_options(const HChar * arg)5720 static Bool mc_process_cmd_line_options(const HChar* arg)
5721 {
5722    const HChar* tmp_str;
5723    Int   tmp_show;
5724 
5725    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5726 
5727    /* Set MC_(clo_mc_level):
5728          1 = A bit tracking only
5729          2 = A and V bit tracking, but no V bit origins
5730          3 = A and V bit tracking, and V bit origins
5731 
5732       Do this by inspecting --undef-value-errors= and
5733       --track-origins=.  Reject the case --undef-value-errors=no
5734       --track-origins=yes as meaningless.
5735    */
5736    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5737       if (MC_(clo_mc_level) == 3) {
5738          goto bad_level;
5739       } else {
5740          MC_(clo_mc_level) = 1;
5741          return True;
5742       }
5743    }
5744    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5745       if (MC_(clo_mc_level) == 1)
5746          MC_(clo_mc_level) = 2;
5747       return True;
5748    }
5749    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5750       if (MC_(clo_mc_level) == 3)
5751          MC_(clo_mc_level) = 2;
5752       return True;
5753    }
5754    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5755       if (MC_(clo_mc_level) == 1) {
5756          goto bad_level;
5757       } else {
5758          MC_(clo_mc_level) = 3;
5759          return True;
5760       }
5761    }
5762 
5763         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5764    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5765                        MC_(parse_leak_kinds_tokens),
5766                        MC_(clo_error_for_leak_kinds)) {}
5767    else if VG_USET_CLO(arg, "--show-leak-kinds",
5768                        MC_(parse_leak_kinds_tokens),
5769                        MC_(clo_show_leak_kinds)) {}
5770    else if VG_USET_CLO(arg, "--leak-check-heuristics",
5771                        MC_(parse_leak_heuristics_tokens),
5772                        MC_(clo_leak_check_heuristics)) {}
5773    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5774       if (tmp_show) {
5775          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5776       } else {
5777          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5778       }
5779    }
5780    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5781       if (tmp_show) {
5782          MC_(clo_show_leak_kinds) |= R2S(Possible);
5783       } else {
5784          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5785       }
5786    }
5787    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5788                                             MC_(clo_workaround_gcc296_bugs)) {}
5789 
5790    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
5791                                                0, 10*1000*1000*1000LL) {}
5792 
5793    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5794                        MC_(clo_freelist_big_blocks),
5795                        0, 10*1000*1000*1000LL) {}
5796 
5797    else if VG_XACT_CLO(arg, "--leak-check=no",
5798                             MC_(clo_leak_check), LC_Off) {}
5799    else if VG_XACT_CLO(arg, "--leak-check=summary",
5800                             MC_(clo_leak_check), LC_Summary) {}
5801    else if VG_XACT_CLO(arg, "--leak-check=yes",
5802                             MC_(clo_leak_check), LC_Full) {}
5803    else if VG_XACT_CLO(arg, "--leak-check=full",
5804                             MC_(clo_leak_check), LC_Full) {}
5805 
5806    else if VG_XACT_CLO(arg, "--leak-resolution=low",
5807                             MC_(clo_leak_resolution), Vg_LowRes) {}
5808    else if VG_XACT_CLO(arg, "--leak-resolution=med",
5809                             MC_(clo_leak_resolution), Vg_MedRes) {}
5810    else if VG_XACT_CLO(arg, "--leak-resolution=high",
5811                             MC_(clo_leak_resolution), Vg_HighRes) {}
5812 
5813    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5814       Bool ok = parse_ignore_ranges(tmp_str);
5815       if (!ok) {
5816          VG_(message)(Vg_DebugMsg,
5817             "ERROR: --ignore-ranges: "
5818             "invalid syntax, or end <= start in range\n");
5819          return False;
5820       }
5821       if (gIgnoredAddressRanges) {
5822          UInt i;
5823          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5824             UWord val     = IAR_INVALID;
5825             UWord key_min = ~(UWord)0;
5826             UWord key_max = (UWord)0;
5827             VG_(indexRangeMap)( &key_min, &key_max, &val,
5828                                 gIgnoredAddressRanges, i );
5829             tl_assert(key_min <= key_max);
5830             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5831             if (key_max - key_min > limit && val == IAR_CommandLine) {
5832                VG_(message)(Vg_DebugMsg,
5833                   "ERROR: --ignore-ranges: suspiciously large range:\n");
5834                VG_(message)(Vg_DebugMsg,
5835                    "       0x%lx-0x%lx (size %lu)\n", key_min, key_max,
5836                    key_max - key_min + 1);
5837                return False;
5838             }
5839          }
5840       }
5841    }
5842 
5843    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5844    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
5845 
5846    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5847                        MC_(clo_keep_stacktraces), KS_alloc) {}
5848    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5849                        MC_(clo_keep_stacktraces), KS_free) {}
5850    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5851                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5852    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5853                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5854    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5855                        MC_(clo_keep_stacktraces), KS_none) {}
5856 
5857    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5858                        MC_(clo_show_mismatched_frees)) {}
5859    else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
5860                        MC_(clo_expensive_definedness_checks)) {}
5861 
5862    else
5863       return VG_(replacement_malloc_process_cmd_line_option)(arg);
5864 
5865    return True;
5866 
5867 
5868   bad_level:
5869    VG_(fmsg_bad_option)(arg,
5870       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5871 }
5872 
mc_print_usage(void)5873 static void mc_print_usage(void)
5874 {
5875    VG_(printf)(
5876 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
5877 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
5878 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5879 "                                            [definite,possible]\n"
5880 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
5881 "                                            [definite,possible]\n"
5882 "        where kind is one of:\n"
5883 "          definite indirect possible reachable all none\n"
5884 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5885 "        improving leak search false positive [all]\n"
5886 "        where heur is one of:\n"
5887 "          stdstring length64 newarray multipleinheritance all none\n"
5888 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
5889 "    --show-reachable=no --show-possibly-lost=yes\n"
5890 "                                     same as --show-leak-kinds=definite,possible\n"
5891 "    --show-reachable=no --show-possibly-lost=no\n"
5892 "                                     same as --show-leak-kinds=definite\n"
5893 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
5894 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
5895 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [yes]\n"
5896 "    --expensive-definedness-checks=no|yes\n"
5897 "                                     Use extra-precise definedness tracking [no]\n"
5898 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
5899 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
5900 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
5901 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
5902 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
5903 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
5904 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5905 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-and-free]\n"
5906 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
5907    );
5908 }
5909 
mc_print_debug_usage(void)5910 static void mc_print_debug_usage(void)
5911 {
5912    VG_(printf)(
5913 "    (none)\n"
5914    );
5915 }
5916 
5917 
5918 /*------------------------------------------------------------*/
5919 /*--- Client blocks                                        ---*/
5920 /*------------------------------------------------------------*/
5921 
5922 /* Client block management:
5923 
5924    This is managed as an expanding array of client block descriptors.
5925    Indices of live descriptors are issued to the client, so it can ask
5926    to free them later.  Therefore we cannot slide live entries down
5927    over dead ones.  Instead we must use free/inuse flags and scan for
5928    an empty slot at allocation time.  This in turn means allocation is
5929    relatively expensive, so we hope this does not happen too often.
5930 
5931    An unused block has start == size == 0
5932 */
5933 
5934 /* type CGenBlock is defined in mc_include.h */
5935 
5936 /* This subsystem is self-initialising. */
5937 static UWord      cgb_size = 0;
5938 static UWord      cgb_used = 0;
5939 static CGenBlock* cgbs     = NULL;
5940 
5941 /* Stats for this subsystem. */
5942 static ULong cgb_used_MAX = 0;   /* Max in use. */
5943 static ULong cgb_allocs   = 0;   /* Number of allocs. */
5944 static ULong cgb_discards = 0;   /* Number of discards. */
5945 static ULong cgb_search   = 0;   /* Number of searches. */
5946 
5947 
5948 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5949 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5950                                  /*OUT*/UWord* nBlocks )
5951 {
5952    *blocks  = cgbs;
5953    *nBlocks = cgb_used;
5954 }
5955 
5956 
5957 static
alloc_client_block(void)5958 Int alloc_client_block ( void )
5959 {
5960    UWord      i, sz_new;
5961    CGenBlock* cgbs_new;
5962 
5963    cgb_allocs++;
5964 
5965    for (i = 0; i < cgb_used; i++) {
5966       cgb_search++;
5967       if (cgbs[i].start == 0 && cgbs[i].size == 0)
5968          return i;
5969    }
5970 
5971    /* Not found.  Try to allocate one at the end. */
5972    if (cgb_used < cgb_size) {
5973       cgb_used++;
5974       return cgb_used-1;
5975    }
5976 
5977    /* Ok, we have to allocate a new one. */
5978    tl_assert(cgb_used == cgb_size);
5979    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5980 
5981    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5982    for (i = 0; i < cgb_used; i++)
5983       cgbs_new[i] = cgbs[i];
5984 
5985    if (cgbs != NULL)
5986       VG_(free)( cgbs );
5987    cgbs = cgbs_new;
5988 
5989    cgb_size = sz_new;
5990    cgb_used++;
5991    if (cgb_used > cgb_used_MAX)
5992       cgb_used_MAX = cgb_used;
5993    return cgb_used-1;
5994 }
5995 
5996 
show_client_block_stats(void)5997 static void show_client_block_stats ( void )
5998 {
5999    VG_(message)(Vg_DebugMsg,
6000       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6001       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6002    );
6003 }
print_monitor_help(void)6004 static void print_monitor_help ( void )
6005 {
6006    VG_(gdb_printf)
6007       (
6008 "\n"
6009 "memcheck monitor commands:\n"
6010 "  xb <addr> [<len>]\n"
6011 "        prints validity bits for <len> (or 1) bytes at <addr>\n"
6012 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6013 "        Then prints the bytes values below the corresponding validity bits\n"
6014 "        in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6015 "        Example: xb 0x8049c78 10\n"
6016 "  get_vbits <addr> [<len>]\n"
6017 "        Similar to xb, but only prints the validity bytes by group of 4.\n"
6018 "  make_memory [noaccess|undefined\n"
6019 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
6020 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6021 "  check_memory [addressable|defined] <addr> [<len>]\n"
6022 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6023 "            and outputs a description of <addr>\n"
6024 "  leak_check [full*|summary]\n"
6025 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6026 "                [heuristics heur1,heur2,...]\n"
6027 "                [increased*|changed|any]\n"
6028 "                [unlimited*|limited <max_loss_records_output>]\n"
6029 "            * = defaults\n"
6030 "       where kind is one of:\n"
6031 "         definite indirect possible reachable all none\n"
6032 "       where heur is one of:\n"
6033 "         stdstring length64 newarray multipleinheritance all none*\n"
6034 "       Examples: leak_check\n"
6035 "                 leak_check summary any\n"
6036 "                 leak_check full kinds indirect,possible\n"
6037 "                 leak_check full reachable any limited 100\n"
6038 "  block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6039 "                [unlimited*|limited <max_blocks>]\n"
6040 "                [heuristics heur1,heur2,...]\n"
6041 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
6042 "        (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6043 "        With heuristics, only shows the blocks found via heur1,heur2,...\n"
6044 "            * = defaults\n"
6045 "  who_points_at <addr> [<len>]\n"
6046 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
6047 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6048 "         with len > 1, will also show \"interior pointers\")\n"
6049 "\n");
6050 }
6051 
6052 /* Print szB bytes at address, with a format similar to the gdb command
6053    x /<szB>xb address.
6054    res[i] == 1 indicates the corresponding byte is addressable. */
gdb_xb(Addr address,SizeT szB,Int res[])6055 static void gdb_xb (Addr address, SizeT szB, Int res[])
6056 {
6057    UInt i;
6058 
6059    for (i = 0; i < szB; i++) {
6060       UInt bnr = i % 8;
6061       if (bnr == 0) {
6062          if (i != 0)
6063             VG_(printf) ("\n"); // Terminate previous line
6064          VG_(printf) ("%p:", (void*)(address+i));
6065       }
6066       if (res[i] == 1)
6067          VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6068       else
6069          VG_(printf) ("\t0x??");
6070    }
6071    VG_(printf) ("\n"); // Terminate previous line
6072 }
6073 
6074 
6075 /* Returns the address of the next non space character,
6076    or address of the string terminator. */
next_non_space(HChar * s)6077 static HChar* next_non_space (HChar *s)
6078 {
6079    while (*s && *s == ' ')
6080       s++;
6081    return s;
6082 }
6083 
6084 /* Parse an integer slice, i.e. a single integer or a range of integer.
6085    Syntax is:
6086        <integer>[..<integer> ]
6087    (spaces are allowed before and/or after ..).
6088    Return True if range correctly parsed, False otherwise. */
VG_(parse_slice)6089 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6090                               UInt *from, UInt *to)
6091 {
6092    HChar* wl;
6093    HChar *endptr;
6094    endptr = NULL;////
6095    wl = VG_(strtok_r) (s, " ", saveptr);
6096 
6097    /* slice must start with an integer. */
6098    if (wl == NULL) {
6099       VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6100       return False;
6101    }
6102    *from = VG_(strtoull10) (wl, &endptr);
6103    if (endptr == wl) {
6104       VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6105       return False;
6106    }
6107 
6108    if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6109       /* wl token is an integer terminating the string
6110          or else next token does not start with .
6111          In both cases, the slice is a single integer. */
6112       *to = *from;
6113       return True;
6114    }
6115 
6116    if (*endptr == '\0') {
6117       // iii ..    => get the next token
6118       wl =  VG_(strtok_r) (NULL, " .", saveptr);
6119    } else {
6120       // It must be iii..
6121       if (*endptr != '.' && *(endptr+1) != '.') {
6122          VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6123          return False;
6124       }
6125       if ( *(endptr+2) == ' ') {
6126          // It must be iii.. jjj  => get the next token
6127          wl =  VG_(strtok_r) (NULL, " .", saveptr);
6128       } else {
6129          // It must be iii..jjj
6130          wl = endptr+2;
6131       }
6132    }
6133 
6134    *to = VG_(strtoull10) (wl, &endptr);
6135    if (*endptr != '\0') {
6136       VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6137       return False;
6138    }
6139 
6140    if (*from > *to) {
6141       VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6142                        "in slice <from>..<to>\n");
6143       return False;
6144    }
6145 
6146    return True;
6147 }
6148 
6149 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)6150 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6151 {
6152    HChar* wcmd;
6153    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
6154    HChar *ssaveptr;
6155 
6156    VG_(strcpy) (s, req);
6157 
6158    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6159    /* NB: if possible, avoid introducing a new command below which
6160       starts with the same first letter(s) as an already existing
6161       command. This ensures a shorter abbreviation for the user. */
6162    switch (VG_(keyword_id)
6163            ("help get_vbits leak_check make_memory check_memory "
6164             "block_list who_points_at xb",
6165             wcmd, kwd_report_duplicated_matches)) {
6166    case -2: /* multiple matches */
6167       return True;
6168    case -1: /* not found */
6169       return False;
6170    case  0: /* help */
6171       print_monitor_help();
6172       return True;
6173    case  1: { /* get_vbits */
6174       Addr address;
6175       SizeT szB = 1;
6176       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6177          UChar vbits;
6178          Int i;
6179          Int unaddressable = 0;
6180          for (i = 0; i < szB; i++) {
6181             Int res = mc_get_or_set_vbits_for_client
6182                (address+i, (Addr) &vbits, 1,
6183                 False, /* get them */
6184                 False  /* is client request */ );
6185             /* we are before the first character on next line, print a \n. */
6186             if ((i % 32) == 0 && i != 0)
6187                VG_(printf) ("\n");
6188             /* we are before the next block of 4 starts, print a space. */
6189             else if ((i % 4) == 0 && i != 0)
6190                VG_(printf) (" ");
6191             if (res == 1) {
6192                VG_(printf) ("%02x", vbits);
6193             } else {
6194                tl_assert(3 == res);
6195                unaddressable++;
6196                VG_(printf) ("__");
6197             }
6198          }
6199          VG_(printf) ("\n");
6200          if (unaddressable) {
6201             VG_(printf)
6202                ("Address %p len %lu has %d bytes unaddressable\n",
6203                 (void *)address, szB, unaddressable);
6204          }
6205       }
6206       return True;
6207    }
6208    case  2: { /* leak_check */
6209       Int err = 0;
6210       LeakCheckParams lcp;
6211       HChar* kw;
6212 
6213       lcp.mode               = LC_Full;
6214       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
6215       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6216       lcp.heuristics         = 0;
6217       lcp.deltamode          = LCD_Increased;
6218       lcp.max_loss_records_output = 999999999;
6219       lcp.requested_by_monitor_command = True;
6220 
6221       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6222            kw != NULL;
6223            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6224          switch (VG_(keyword_id)
6225                  ("full summary "
6226                   "kinds reachable possibleleak definiteleak "
6227                   "heuristics "
6228                   "increased changed any "
6229                   "unlimited limited ",
6230                   kw, kwd_report_all)) {
6231          case -2: err++; break;
6232          case -1: err++; break;
6233          case  0: /* full */
6234             lcp.mode = LC_Full; break;
6235          case  1: /* summary */
6236             lcp.mode = LC_Summary; break;
6237          case  2: { /* kinds */
6238             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6239             if (wcmd == NULL
6240                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6241                                         True/*allow_all*/,
6242                                         wcmd,
6243                                         &lcp.show_leak_kinds)) {
6244                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6245                err++;
6246             }
6247             break;
6248          }
6249          case  3: /* reachable */
6250             lcp.show_leak_kinds = MC_(all_Reachedness)();
6251             break;
6252          case  4: /* possibleleak */
6253             lcp.show_leak_kinds
6254                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6255             break;
6256          case  5: /* definiteleak */
6257             lcp.show_leak_kinds = R2S(Unreached);
6258             break;
6259          case  6: { /* heuristics */
6260             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6261             if (wcmd == NULL
6262                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6263                                         True,/*allow_all*/
6264                                         wcmd,
6265                                         &lcp.heuristics)) {
6266                VG_(gdb_printf) ("missing or malformed heuristics set\n");
6267                err++;
6268             }
6269             break;
6270          }
6271          case  7: /* increased */
6272             lcp.deltamode = LCD_Increased; break;
6273          case  8: /* changed */
6274             lcp.deltamode = LCD_Changed; break;
6275          case  9: /* any */
6276             lcp.deltamode = LCD_Any; break;
6277          case 10: /* unlimited */
6278             lcp.max_loss_records_output = 999999999; break;
6279          case 11: { /* limited */
6280             Int int_value;
6281             const HChar* endptr;
6282 
6283             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6284             if (wcmd == NULL) {
6285                int_value = 0;
6286                endptr = "empty"; /* to report an error below */
6287             } else {
6288                HChar *the_end;
6289                int_value = VG_(strtoll10) (wcmd, &the_end);
6290                endptr = the_end;
6291             }
6292             if (*endptr != '\0')
6293                VG_(gdb_printf) ("missing or malformed integer value\n");
6294             else if (int_value > 0)
6295                lcp.max_loss_records_output = (UInt) int_value;
6296             else
6297                VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6298                                 " got %d\n", int_value);
6299             break;
6300          }
6301          default:
6302             tl_assert (0);
6303          }
6304       }
6305       if (!err)
6306          MC_(detect_memory_leaks)(tid, &lcp);
6307       return True;
6308    }
6309 
6310    case  3: { /* make_memory */
6311       Addr address;
6312       SizeT szB = 1;
6313       Int kwdid = VG_(keyword_id)
6314          ("noaccess undefined defined Definedifaddressable",
6315           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6316       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6317          return True;
6318       switch (kwdid) {
6319       case -2: break;
6320       case -1: break;
6321       case  0: MC_(make_mem_noaccess) (address, szB); break;
6322       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6323                                                     MC_OKIND_USER ); break;
6324       case  2: MC_(make_mem_defined) ( address, szB ); break;
6325       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
6326       default: tl_assert(0);
6327       }
6328       return True;
6329    }
6330 
6331    case  4: { /* check_memory */
6332       Addr address;
6333       SizeT szB = 1;
6334       Addr bad_addr;
6335       UInt okind;
6336       const HChar* src;
6337       UInt otag;
6338       UInt ecu;
6339       ExeContext* origin_ec;
6340       MC_ReadResult res;
6341 
6342       Int kwdid = VG_(keyword_id)
6343          ("addressable defined",
6344           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6345       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6346          return True;
6347       switch (kwdid) {
6348       case -2: break;
6349       case -1: break;
6350       case  0: /* addressable */
6351          if (is_mem_addressable ( address, szB, &bad_addr ))
6352             VG_(printf) ("Address %p len %lu addressable\n",
6353                              (void *)address, szB);
6354          else
6355             VG_(printf)
6356                ("Address %p len %lu not addressable:\nbad address %p\n",
6357                 (void *)address, szB, (void *) bad_addr);
6358          MC_(pp_describe_addr) (address);
6359          break;
6360       case  1: /* defined */
6361          res = is_mem_defined ( address, szB, &bad_addr, &otag );
6362          if (MC_AddrErr == res)
6363             VG_(printf)
6364                ("Address %p len %lu not addressable:\nbad address %p\n",
6365                 (void *)address, szB, (void *) bad_addr);
6366          else if (MC_ValueErr == res) {
6367             okind = otag & 3;
6368             switch (okind) {
6369             case MC_OKIND_STACK:
6370                src = " was created by a stack allocation"; break;
6371             case MC_OKIND_HEAP:
6372                src = " was created by a heap allocation"; break;
6373             case MC_OKIND_USER:
6374                src = " was created by a client request"; break;
6375             case MC_OKIND_UNKNOWN:
6376                src = ""; break;
6377             default: tl_assert(0);
6378             }
6379             VG_(printf)
6380                ("Address %p len %lu not defined:\n"
6381                 "Uninitialised value at %p%s\n",
6382                 (void *)address, szB, (void *) bad_addr, src);
6383             ecu = otag & ~3;
6384             if (VG_(is_plausible_ECU)(ecu)) {
6385                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6386                VG_(pp_ExeContext)( origin_ec );
6387             }
6388          }
6389          else
6390             VG_(printf) ("Address %p len %lu defined\n",
6391                          (void *)address, szB);
6392          MC_(pp_describe_addr) (address);
6393          break;
6394       default: tl_assert(0);
6395       }
6396       return True;
6397    }
6398 
6399    case  5: { /* block_list */
6400       HChar* wl;
6401       HChar *the_end;
6402       UInt lr_nr_from = 0;
6403       UInt lr_nr_to = 0;
6404 
6405       if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6406          UInt limit_blocks = 999999999;
6407          Int int_value;
6408          UInt heuristics = 0;
6409 
6410          for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6411               wl != NULL;
6412               wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6413             switch (VG_(keyword_id) ("unlimited limited heuristics ",
6414                                      wl,  kwd_report_all)) {
6415             case -2: return True;
6416             case -1: return True;
6417             case  0: /* unlimited */
6418                limit_blocks = 999999999; break;
6419             case  1: /* limited */
6420                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6421                if (wcmd == NULL) {
6422                   VG_(gdb_printf) ("missing integer value\n");
6423                   return True;
6424                }
6425                int_value = VG_(strtoll10) (wcmd, &the_end);
6426                if (*the_end != '\0') {
6427                   VG_(gdb_printf) ("malformed integer value\n");
6428                   return True;
6429                }
6430                if (int_value <= 0) {
6431                   VG_(gdb_printf) ("max_blocks must be >= 1,"
6432                                    " got %d\n", int_value);
6433                   return True;
6434                }
6435                limit_blocks = (UInt) int_value;
6436                break;
6437             case  2: /* heuristics */
6438                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6439                if (wcmd == NULL
6440                    || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6441                                            True,/*allow_all*/
6442                                            wcmd,
6443                                            &heuristics)) {
6444                   VG_(gdb_printf) ("missing or malformed heuristics set\n");
6445                   return True;
6446                }
6447                break;
6448             default:
6449                tl_assert (0);
6450             }
6451          }
6452          /* substract 1 from lr_nr_from/lr_nr_to  as what is shown to the user
6453             is 1 more than the index in lr_array. */
6454          if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6455                                                          lr_nr_to-1,
6456                                                          limit_blocks,
6457                                                          heuristics))
6458             VG_(gdb_printf) ("invalid loss record nr\n");
6459       }
6460       return True;
6461    }
6462 
6463    case  6: { /* who_points_at */
6464       Addr address;
6465       SizeT szB = 1;
6466 
6467       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6468          return True;
6469       if (address == (Addr) 0) {
6470          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6471          return True;
6472       }
6473       MC_(who_points_at) (address, szB);
6474       return True;
6475    }
6476 
6477    case  7: { /* xb */
6478       Addr address;
6479       SizeT szB = 1;
6480       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6481          UChar vbits[8];
6482          Int res[8];
6483          Int i;
6484          Int unaddressable = 0;
6485          for (i = 0; i < szB; i++) {
6486             Int bnr = i % 8;
6487             res[bnr] = mc_get_or_set_vbits_for_client
6488                (address+i, (Addr) &vbits[bnr], 1,
6489                 False, /* get them */
6490                 False  /* is client request */ );
6491             /* We going to print the first vabits of a new line.
6492                Terminate the previous line if needed: prints a line with the
6493                address and the data. */
6494             if (bnr == 0) {
6495                if (i != 0) {
6496                   VG_(printf) ("\n");
6497                   gdb_xb (address + i - 8, 8, res);
6498                }
6499                VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6500             }
6501             if (res[bnr] == 1) {
6502                VG_(printf) ("\t  %02x", vbits[bnr]);
6503             } else {
6504                tl_assert(3 == res[bnr]);
6505                unaddressable++;
6506                VG_(printf) ("\t  __");
6507             }
6508          }
6509          VG_(printf) ("\n");
6510          if (szB % 8 == 0 && szB > 0)
6511             gdb_xb (address + szB - 8, 8, res);
6512          else
6513             gdb_xb (address + szB - szB % 8, szB % 8, res);
6514          if (unaddressable) {
6515             VG_(printf)
6516                ("Address %p len %lu has %d bytes unaddressable\n",
6517                 (void *)address, szB, unaddressable);
6518          }
6519       }
6520       return True;
6521    }
6522 
6523    default:
6524       tl_assert(0);
6525       return False;
6526    }
6527 }
6528 
6529 /*------------------------------------------------------------*/
6530 /*--- Client requests                                      ---*/
6531 /*------------------------------------------------------------*/
6532 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)6533 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6534 {
6535    Int   i;
6536    Addr  bad_addr;
6537 
6538    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6539        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6540        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6541        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
6542        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
6543        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
6544        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
6545        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
6546        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
6547        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
6548        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
6549        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
6550        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
6551        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6552        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6553       return False;
6554 
6555    switch (arg[0]) {
6556       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6557          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6558          if (!ok)
6559             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6560          *ret = ok ? (UWord)NULL : bad_addr;
6561          break;
6562       }
6563 
6564       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6565          Bool errorV    = False;
6566          Addr bad_addrV = 0;
6567          UInt otagV     = 0;
6568          Bool errorA    = False;
6569          Addr bad_addrA = 0;
6570          is_mem_defined_comprehensive(
6571             arg[1], arg[2],
6572             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6573          );
6574          if (errorV) {
6575             MC_(record_user_error) ( tid, bad_addrV,
6576                                      /*isAddrErr*/False, otagV );
6577          }
6578          if (errorA) {
6579             MC_(record_user_error) ( tid, bad_addrA,
6580                                      /*isAddrErr*/True, 0 );
6581          }
6582          /* Return the lower of the two erring addresses, if any. */
6583          *ret = 0;
6584          if (errorV && !errorA) {
6585             *ret = bad_addrV;
6586          }
6587          if (!errorV && errorA) {
6588             *ret = bad_addrA;
6589          }
6590          if (errorV && errorA) {
6591             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6592          }
6593          break;
6594       }
6595 
6596       case VG_USERREQ__DO_LEAK_CHECK: {
6597          LeakCheckParams lcp;
6598 
6599          if (arg[1] == 0)
6600             lcp.mode = LC_Full;
6601          else if (arg[1] == 1)
6602             lcp.mode = LC_Summary;
6603          else {
6604             VG_(message)(Vg_UserMsg,
6605                          "Warning: unknown memcheck leak search mode\n");
6606             lcp.mode = LC_Full;
6607          }
6608 
6609          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6610          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6611          lcp.heuristics = MC_(clo_leak_check_heuristics);
6612 
6613          if (arg[2] == 0)
6614             lcp.deltamode = LCD_Any;
6615          else if (arg[2] == 1)
6616             lcp.deltamode = LCD_Increased;
6617          else if (arg[2] == 2)
6618             lcp.deltamode = LCD_Changed;
6619          else {
6620             VG_(message)
6621                (Vg_UserMsg,
6622                 "Warning: unknown memcheck leak search deltamode\n");
6623             lcp.deltamode = LCD_Any;
6624          }
6625          lcp.max_loss_records_output = 999999999;
6626          lcp.requested_by_monitor_command = False;
6627 
6628          MC_(detect_memory_leaks)(tid, &lcp);
6629          *ret = 0; /* return value is meaningless */
6630          break;
6631       }
6632 
6633       case VG_USERREQ__MAKE_MEM_NOACCESS:
6634          MC_(make_mem_noaccess) ( arg[1], arg[2] );
6635          *ret = -1;
6636          break;
6637 
6638       case VG_USERREQ__MAKE_MEM_UNDEFINED:
6639          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
6640                                               MC_OKIND_USER );
6641          *ret = -1;
6642          break;
6643 
6644       case VG_USERREQ__MAKE_MEM_DEFINED:
6645          MC_(make_mem_defined) ( arg[1], arg[2] );
6646          *ret = -1;
6647          break;
6648 
6649       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
6650          make_mem_defined_if_addressable ( arg[1], arg[2] );
6651          *ret = -1;
6652          break;
6653 
6654       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
6655          if (arg[1] != 0 && arg[2] != 0) {
6656             i = alloc_client_block();
6657             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6658             cgbs[i].start = arg[1];
6659             cgbs[i].size  = arg[2];
6660             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
6661             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
6662             *ret = i;
6663          } else
6664             *ret = -1;
6665          break;
6666 
6667       case VG_USERREQ__DISCARD: /* discard */
6668          if (cgbs == NULL
6669              || arg[2] >= cgb_used ||
6670              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
6671             *ret = 1;
6672          } else {
6673             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
6674             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
6675             VG_(free)(cgbs[arg[2]].desc);
6676             cgb_discards++;
6677             *ret = 0;
6678          }
6679          break;
6680 
6681       case VG_USERREQ__GET_VBITS:
6682          *ret = mc_get_or_set_vbits_for_client
6683                    ( arg[1], arg[2], arg[3],
6684                      False /* get them */,
6685                      True /* is client request */ );
6686          break;
6687 
6688       case VG_USERREQ__SET_VBITS:
6689          *ret = mc_get_or_set_vbits_for_client
6690                    ( arg[1], arg[2], arg[3],
6691                      True /* set them */,
6692                      True /* is client request */ );
6693          break;
6694 
6695       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
6696          UWord** argp = (UWord**)arg;
6697          // MC_(bytes_leaked) et al were set by the last leak check (or zero
6698          // if no prior leak checks performed).
6699          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
6700          *argp[2] = MC_(bytes_dubious);
6701          *argp[3] = MC_(bytes_reachable);
6702          *argp[4] = MC_(bytes_suppressed);
6703          // there is no argp[5]
6704          //*argp[5] = MC_(bytes_indirect);
6705          // XXX need to make *argp[1-4] defined;  currently done in the
6706          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
6707          *ret = 0;
6708          return True;
6709       }
6710       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
6711          UWord** argp = (UWord**)arg;
6712          // MC_(blocks_leaked) et al were set by the last leak check (or zero
6713          // if no prior leak checks performed).
6714          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
6715          *argp[2] = MC_(blocks_dubious);
6716          *argp[3] = MC_(blocks_reachable);
6717          *argp[4] = MC_(blocks_suppressed);
6718          // there is no argp[5]
6719          //*argp[5] = MC_(blocks_indirect);
6720          // XXX need to make *argp[1-4] defined;  currently done in the
6721          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6722          *ret = 0;
6723          return True;
6724       }
6725       case VG_USERREQ__MALLOCLIKE_BLOCK: {
6726          Addr p         = (Addr)arg[1];
6727          SizeT sizeB    =       arg[2];
6728          UInt rzB       =       arg[3];
6729          Bool is_zeroed = (Bool)arg[4];
6730 
6731          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6732                           MC_AllocCustom, MC_(malloc_list) );
6733          if (rzB > 0) {
6734             MC_(make_mem_noaccess) ( p - rzB, rzB);
6735             MC_(make_mem_noaccess) ( p + sizeB, rzB);
6736          }
6737          return True;
6738       }
6739       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6740          Addr p         = (Addr)arg[1];
6741          SizeT oldSizeB =       arg[2];
6742          SizeT newSizeB =       arg[3];
6743          UInt rzB       =       arg[4];
6744 
6745          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6746          return True;
6747       }
6748       case VG_USERREQ__FREELIKE_BLOCK: {
6749          Addr p         = (Addr)arg[1];
6750          UInt rzB       =       arg[2];
6751 
6752          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6753          return True;
6754       }
6755 
6756       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6757          HChar* s  = (HChar*)arg[1];
6758          Addr  dst = (Addr) arg[2];
6759          Addr  src = (Addr) arg[3];
6760          SizeT len = (SizeT)arg[4];
6761          MC_(record_overlap_error)(tid, s, src, dst, len);
6762          return True;
6763       }
6764 
6765       case VG_USERREQ__CREATE_MEMPOOL: {
6766          Addr pool      = (Addr)arg[1];
6767          UInt rzB       =       arg[2];
6768          Bool is_zeroed = (Bool)arg[3];
6769 
6770          MC_(create_mempool) ( pool, rzB, is_zeroed );
6771          return True;
6772       }
6773 
6774       case VG_USERREQ__DESTROY_MEMPOOL: {
6775          Addr pool      = (Addr)arg[1];
6776 
6777          MC_(destroy_mempool) ( pool );
6778          return True;
6779       }
6780 
6781       case VG_USERREQ__MEMPOOL_ALLOC: {
6782          Addr pool      = (Addr)arg[1];
6783          Addr addr      = (Addr)arg[2];
6784          UInt size      =       arg[3];
6785 
6786          MC_(mempool_alloc) ( tid, pool, addr, size );
6787          return True;
6788       }
6789 
6790       case VG_USERREQ__MEMPOOL_FREE: {
6791          Addr pool      = (Addr)arg[1];
6792          Addr addr      = (Addr)arg[2];
6793 
6794          MC_(mempool_free) ( pool, addr );
6795          return True;
6796       }
6797 
6798       case VG_USERREQ__MEMPOOL_TRIM: {
6799          Addr pool      = (Addr)arg[1];
6800          Addr addr      = (Addr)arg[2];
6801          UInt size      =       arg[3];
6802 
6803          MC_(mempool_trim) ( pool, addr, size );
6804          return True;
6805       }
6806 
6807       case VG_USERREQ__MOVE_MEMPOOL: {
6808          Addr poolA     = (Addr)arg[1];
6809          Addr poolB     = (Addr)arg[2];
6810 
6811          MC_(move_mempool) ( poolA, poolB );
6812          return True;
6813       }
6814 
6815       case VG_USERREQ__MEMPOOL_CHANGE: {
6816          Addr pool      = (Addr)arg[1];
6817          Addr addrA     = (Addr)arg[2];
6818          Addr addrB     = (Addr)arg[3];
6819          UInt size      =       arg[4];
6820 
6821          MC_(mempool_change) ( pool, addrA, addrB, size );
6822          return True;
6823       }
6824 
6825       case VG_USERREQ__MEMPOOL_EXISTS: {
6826          Addr pool      = (Addr)arg[1];
6827 
6828          *ret = (UWord) MC_(mempool_exists) ( pool );
6829 	 return True;
6830       }
6831 
6832       case VG_USERREQ__GDB_MONITOR_COMMAND: {
6833          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6834          if (handled)
6835             *ret = 1;
6836          else
6837             *ret = 0;
6838          return handled;
6839       }
6840 
6841       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6842       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6843          Bool addRange
6844             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6845          Bool ok
6846             = modify_ignore_ranges(addRange, arg[1], arg[2]);
6847          *ret = ok ? 1 : 0;
6848          return True;
6849       }
6850 
6851       default:
6852          VG_(message)(
6853             Vg_UserMsg,
6854             "Warning: unknown memcheck client request code %llx\n",
6855             (ULong)arg[0]
6856          );
6857          return False;
6858    }
6859    return True;
6860 }
6861 
6862 
6863 /*------------------------------------------------------------*/
6864 /*--- Crude profiling machinery.                           ---*/
6865 /*------------------------------------------------------------*/
6866 
6867 // We track a number of interesting events (using PROF_EVENT)
6868 // if MC_PROFILE_MEMORY is defined.
6869 
6870 #ifdef MC_PROFILE_MEMORY
6871 
6872 ULong  MC_(event_ctr)[MCPE_LAST];
6873 
6874 /* Event counter names. Use the name of the function that increases the
6875    event counter. Drop any MC_() and mc_ prefices. */
6876 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
6877    [MCPE_LOADVN_SLOW] = "LOADVn_slow",
6878    [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
6879    [MCPE_STOREVN_SLOW] = "STOREVn_slow",
6880    [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
6881    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
6882    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
6883         "make_aligned_word32_undefined_slow",
6884    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
6885    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
6886         "make_aligned_word64_undefined_slow",
6887    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
6888    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
6889          "make_aligned_word32_noaccess_slow",
6890    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
6891    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
6892         "make_aligned_word64_noaccess_slow",
6893    [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
6894    [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
6895    [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
6896    [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
6897    [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
6898    [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
6899    [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
6900    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
6901    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
6902    [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
6903    [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
6904    [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
6905    [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
6906    [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
6907    [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
6908    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
6909    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
6910         "is_mem_defined_comprehensive(loop)",
6911    [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
6912    [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
6913    [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
6914    [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
6915    [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
6916    [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
6917         "set_address_range_perms(single-secmap)",
6918    [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
6919         "set_address_range_perms(startof-secmap)",
6920    [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
6921    "set_address_range_perms(multiple-secmaps)",
6922    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
6923         "set_address_range_perms(dist-sm1)",
6924    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
6925         "set_address_range_perms(dist-sm2)",
6926    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
6927         "set_address_range_perms(dist-sm1-quick)",
6928    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
6929         "set_address_range_perms(dist-sm2-quick)",
6930    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
6931    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
6932    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
6933    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
6934    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
6935    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
6936    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
6937         "set_address_range_perms(loop64K-free-dist-sm)",
6938    [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
6939    [MCPE_LOADV_128_OR_256]       = "LOADV_128_or_256",
6940    [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
6941    [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
6942    [MCPE_LOADV64]        = "LOADV64",
6943    [MCPE_LOADV64_SLOW1]  = "LOADV64-slow1",
6944    [MCPE_LOADV64_SLOW2]  = "LOADV64-slow2",
6945    [MCPE_STOREV64]       = "STOREV64",
6946    [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
6947    [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
6948    [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
6949    [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
6950    [MCPE_LOADV32]        = "LOADV32",
6951    [MCPE_LOADV32_SLOW1]  = "LOADV32-slow1",
6952    [MCPE_LOADV32_SLOW2]  = "LOADV32-slow2",
6953    [MCPE_STOREV32]       = "STOREV32",
6954    [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
6955    [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
6956    [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
6957    [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
6958    [MCPE_LOADV16]        = "LOADV16",
6959    [MCPE_LOADV16_SLOW1]  = "LOADV16-slow1",
6960    [MCPE_LOADV16_SLOW2]  = "LOADV16-slow2",
6961    [MCPE_STOREV16]       = "STOREV16",
6962    [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
6963    [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
6964    [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
6965    [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
6966    [MCPE_LOADV8]         = "LOADV8",
6967    [MCPE_LOADV8_SLOW1]   = "LOADV8-slow1",
6968    [MCPE_LOADV8_SLOW2]   = "LOADV8-slow2",
6969    [MCPE_STOREV8]        = "STOREV8",
6970    [MCPE_STOREV8_SLOW1]  = "STOREV8-slow1",
6971    [MCPE_STOREV8_SLOW2]  = "STOREV8-slow2",
6972    [MCPE_STOREV8_SLOW3]  = "STOREV8-slow3",
6973    [MCPE_STOREV8_SLOW4]  = "STOREV8-slow4",
6974    [MCPE_NEW_MEM_STACK_4]   = "new_mem_stack_4",
6975    [MCPE_NEW_MEM_STACK_8]   = "new_mem_stack_8",
6976    [MCPE_NEW_MEM_STACK_12]  = "new_mem_stack_12",
6977    [MCPE_NEW_MEM_STACK_16]  = "new_mem_stack_16",
6978    [MCPE_NEW_MEM_STACK_32]  = "new_mem_stack_32",
6979    [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
6980    [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
6981    [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
6982    [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
6983    [MCPE_DIE_MEM_STACK_4]   = "die_mem_stack_4",
6984    [MCPE_DIE_MEM_STACK_8]   = "die_mem_stack_8",
6985    [MCPE_DIE_MEM_STACK_12]  = "die_mem_stack_12",
6986    [MCPE_DIE_MEM_STACK_16]  = "die_mem_stack_16",
6987    [MCPE_DIE_MEM_STACK_32]  = "die_mem_stack_32",
6988    [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
6989    [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
6990    [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
6991    [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
6992    [MCPE_NEW_MEM_STACK]     = "new_mem_stack",
6993    [MCPE_DIE_MEM_STACK]     = "die_mem_stack",
6994 };
6995 
init_prof_mem(void)6996 static void init_prof_mem ( void )
6997 {
6998    Int i, name_count = 0;
6999 
7000    for (i = 0; i < MCPE_LAST; i++) {
7001       MC_(event_ctr)[i] = 0;
7002       if (MC_(event_ctr_name)[i] != NULL)
7003          ++name_count;
7004    }
7005 
7006    /* Make sure every profiling event has a name */
7007    tl_assert(name_count == MCPE_LAST);
7008 }
7009 
done_prof_mem(void)7010 static void done_prof_mem ( void )
7011 {
7012    Int  i, n;
7013    Bool spaced = False;
7014    for (i = n = 0; i < MCPE_LAST; i++) {
7015       if (!spaced && (n % 10) == 0) {
7016          VG_(printf)("\n");
7017          spaced = True;
7018       }
7019       if (MC_(event_ctr)[i] > 0) {
7020          spaced = False;
7021          ++n;
7022          VG_(printf)( "prof mem event %3d: %11llu   %s\n",
7023                       i, MC_(event_ctr)[i],
7024                       MC_(event_ctr_name)[i]);
7025       }
7026    }
7027 }
7028 
7029 #else
7030 
init_prof_mem(void)7031 static void init_prof_mem ( void ) { }
done_prof_mem(void)7032 static void done_prof_mem ( void ) { }
7033 
7034 #endif
7035 
7036 
7037 /*------------------------------------------------------------*/
7038 /*--- Origin tracking stuff                                ---*/
7039 /*------------------------------------------------------------*/
7040 
7041 /*--------------------------------------------*/
7042 /*--- Origin tracking: load handlers       ---*/
7043 /*--------------------------------------------*/
7044 
merge_origins(UInt or1,UInt or2)7045 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7046    return or1 > or2 ? or1 : or2;
7047 }
7048 
MC_(helperc_b_load1)7049 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7050    OCacheLine* line;
7051    UChar descr;
7052    UWord lineoff = oc_line_offset(a);
7053    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7054 
7055    if (OC_ENABLE_ASSERTIONS) {
7056       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7057    }
7058 
7059    line = find_OCacheLine( a );
7060 
7061    descr = line->descr[lineoff];
7062    if (OC_ENABLE_ASSERTIONS) {
7063       tl_assert(descr < 0x10);
7064    }
7065 
7066    if (LIKELY(0 == (descr & (1 << byteoff))))  {
7067       return 0;
7068    } else {
7069       return line->w32[lineoff];
7070    }
7071 }
7072 
MC_(helperc_b_load2)7073 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7074    OCacheLine* line;
7075    UChar descr;
7076    UWord lineoff, byteoff;
7077 
7078    if (UNLIKELY(a & 1)) {
7079       /* Handle misaligned case, slowly. */
7080       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
7081       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
7082       return merge_origins(oLo, oHi);
7083    }
7084 
7085    lineoff = oc_line_offset(a);
7086    byteoff = a & 3; /* 0 or 2 */
7087 
7088    if (OC_ENABLE_ASSERTIONS) {
7089       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7090    }
7091    line = find_OCacheLine( a );
7092 
7093    descr = line->descr[lineoff];
7094    if (OC_ENABLE_ASSERTIONS) {
7095       tl_assert(descr < 0x10);
7096    }
7097 
7098    if (LIKELY(0 == (descr & (3 << byteoff)))) {
7099       return 0;
7100    } else {
7101       return line->w32[lineoff];
7102    }
7103 }
7104 
MC_(helperc_b_load4)7105 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7106    OCacheLine* line;
7107    UChar descr;
7108    UWord lineoff;
7109 
7110    if (UNLIKELY(a & 3)) {
7111       /* Handle misaligned case, slowly. */
7112       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
7113       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
7114       return merge_origins(oLo, oHi);
7115    }
7116 
7117    lineoff = oc_line_offset(a);
7118    if (OC_ENABLE_ASSERTIONS) {
7119       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7120    }
7121 
7122    line = find_OCacheLine( a );
7123 
7124    descr = line->descr[lineoff];
7125    if (OC_ENABLE_ASSERTIONS) {
7126       tl_assert(descr < 0x10);
7127    }
7128 
7129    if (LIKELY(0 == descr)) {
7130       return 0;
7131    } else {
7132       return line->w32[lineoff];
7133    }
7134 }
7135 
MC_(helperc_b_load8)7136 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7137    OCacheLine* line;
7138    UChar descrLo, descrHi, descr;
7139    UWord lineoff;
7140 
7141    if (UNLIKELY(a & 7)) {
7142       /* Handle misaligned case, slowly. */
7143       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
7144       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
7145       return merge_origins(oLo, oHi);
7146    }
7147 
7148    lineoff = oc_line_offset(a);
7149    if (OC_ENABLE_ASSERTIONS) {
7150       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7151    }
7152 
7153    line = find_OCacheLine( a );
7154 
7155    descrLo = line->descr[lineoff + 0];
7156    descrHi = line->descr[lineoff + 1];
7157    descr   = descrLo | descrHi;
7158    if (OC_ENABLE_ASSERTIONS) {
7159       tl_assert(descr < 0x10);
7160    }
7161 
7162    if (LIKELY(0 == descr)) {
7163       return 0; /* both 32-bit chunks are defined */
7164    } else {
7165       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7166       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7167       return merge_origins(oLo, oHi);
7168    }
7169 }
7170 
MC_(helperc_b_load16)7171 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7172    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
7173    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
7174    UInt oBoth = merge_origins(oLo, oHi);
7175    return (UWord)oBoth;
7176 }
7177 
MC_(helperc_b_load32)7178 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7179    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
7180    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
7181    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
7182    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
7183    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
7184                               merge_origins(oQ2, oQ3));
7185    return (UWord)oAll;
7186 }
7187 
7188 
7189 /*--------------------------------------------*/
7190 /*--- Origin tracking: store handlers      ---*/
7191 /*--------------------------------------------*/
7192 
MC_(helperc_b_store1)7193 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7194    OCacheLine* line;
7195    UWord lineoff = oc_line_offset(a);
7196    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7197 
7198    if (OC_ENABLE_ASSERTIONS) {
7199       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7200    }
7201 
7202    line = find_OCacheLine( a );
7203 
7204    if (d32 == 0) {
7205       line->descr[lineoff] &= ~(1 << byteoff);
7206    } else {
7207       line->descr[lineoff] |= (1 << byteoff);
7208       line->w32[lineoff] = d32;
7209    }
7210 }
7211 
MC_(helperc_b_store2)7212 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7213    OCacheLine* line;
7214    UWord lineoff, byteoff;
7215 
7216    if (UNLIKELY(a & 1)) {
7217       /* Handle misaligned case, slowly. */
7218       MC_(helperc_b_store1)( a + 0, d32 );
7219       MC_(helperc_b_store1)( a + 1, d32 );
7220       return;
7221    }
7222 
7223    lineoff = oc_line_offset(a);
7224    byteoff = a & 3; /* 0 or 2 */
7225 
7226    if (OC_ENABLE_ASSERTIONS) {
7227       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7228    }
7229 
7230    line = find_OCacheLine( a );
7231 
7232    if (d32 == 0) {
7233       line->descr[lineoff] &= ~(3 << byteoff);
7234    } else {
7235       line->descr[lineoff] |= (3 << byteoff);
7236       line->w32[lineoff] = d32;
7237    }
7238 }
7239 
MC_(helperc_b_store4)7240 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7241    OCacheLine* line;
7242    UWord lineoff;
7243 
7244    if (UNLIKELY(a & 3)) {
7245       /* Handle misaligned case, slowly. */
7246       MC_(helperc_b_store2)( a + 0, d32 );
7247       MC_(helperc_b_store2)( a + 2, d32 );
7248       return;
7249    }
7250 
7251    lineoff = oc_line_offset(a);
7252    if (OC_ENABLE_ASSERTIONS) {
7253       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7254    }
7255 
7256    line = find_OCacheLine( a );
7257 
7258    if (d32 == 0) {
7259       line->descr[lineoff] = 0;
7260    } else {
7261       line->descr[lineoff] = 0xF;
7262       line->w32[lineoff] = d32;
7263    }
7264 }
7265 
MC_(helperc_b_store8)7266 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7267    OCacheLine* line;
7268    UWord lineoff;
7269 
7270    if (UNLIKELY(a & 7)) {
7271       /* Handle misaligned case, slowly. */
7272       MC_(helperc_b_store4)( a + 0, d32 );
7273       MC_(helperc_b_store4)( a + 4, d32 );
7274       return;
7275    }
7276 
7277    lineoff = oc_line_offset(a);
7278    if (OC_ENABLE_ASSERTIONS) {
7279       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7280    }
7281 
7282    line = find_OCacheLine( a );
7283 
7284    if (d32 == 0) {
7285       line->descr[lineoff + 0] = 0;
7286       line->descr[lineoff + 1] = 0;
7287    } else {
7288       line->descr[lineoff + 0] = 0xF;
7289       line->descr[lineoff + 1] = 0xF;
7290       line->w32[lineoff + 0] = d32;
7291       line->w32[lineoff + 1] = d32;
7292    }
7293 }
7294 
MC_(helperc_b_store16)7295 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7296    MC_(helperc_b_store8)( a + 0, d32 );
7297    MC_(helperc_b_store8)( a + 8, d32 );
7298 }
7299 
MC_(helperc_b_store32)7300 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7301    MC_(helperc_b_store8)( a +  0, d32 );
7302    MC_(helperc_b_store8)( a +  8, d32 );
7303    MC_(helperc_b_store8)( a + 16, d32 );
7304    MC_(helperc_b_store8)( a + 24, d32 );
7305 }
7306 
7307 
7308 /*--------------------------------------------*/
7309 /*--- Origin tracking: sarp handlers       ---*/
7310 /*--------------------------------------------*/
7311 
7312 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)7313 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7314    if ((a & 1) && len >= 1) {
7315       MC_(helperc_b_store1)( a, otag );
7316       a++;
7317       len--;
7318    }
7319    if ((a & 2) && len >= 2) {
7320       MC_(helperc_b_store2)( a, otag );
7321       a += 2;
7322       len -= 2;
7323    }
7324    if (len >= 4)
7325       tl_assert(0 == (a & 3));
7326    while (len >= 4) {
7327       MC_(helperc_b_store4)( a, otag );
7328       a += 4;
7329       len -= 4;
7330    }
7331    if (len >= 2) {
7332       MC_(helperc_b_store2)( a, otag );
7333       a += 2;
7334       len -= 2;
7335    }
7336    if (len >= 1) {
7337       MC_(helperc_b_store1)( a, otag );
7338       //a++;
7339       len--;
7340    }
7341    tl_assert(len == 0);
7342 }
7343 
7344 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)7345 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7346    if ((a & 1) && len >= 1) {
7347       MC_(helperc_b_store1)( a, 0 );
7348       a++;
7349       len--;
7350    }
7351    if ((a & 2) && len >= 2) {
7352       MC_(helperc_b_store2)( a, 0 );
7353       a += 2;
7354       len -= 2;
7355    }
7356    if (len >= 4)
7357       tl_assert(0 == (a & 3));
7358    while (len >= 4) {
7359       MC_(helperc_b_store4)( a, 0 );
7360       a += 4;
7361       len -= 4;
7362    }
7363    if (len >= 2) {
7364       MC_(helperc_b_store2)( a, 0 );
7365       a += 2;
7366       len -= 2;
7367    }
7368    if (len >= 1) {
7369       MC_(helperc_b_store1)( a, 0 );
7370       //a++;
7371       len--;
7372    }
7373    tl_assert(len == 0);
7374 }
7375 
7376 
7377 /*------------------------------------------------------------*/
7378 /*--- Setup and finalisation                               ---*/
7379 /*------------------------------------------------------------*/
7380 
mc_post_clo_init(void)7381 static void mc_post_clo_init ( void )
7382 {
7383    /* If we've been asked to emit XML, mash around various other
7384       options so as to constrain the output somewhat. */
7385    if (VG_(clo_xml)) {
7386       /* Extract as much info as possible from the leak checker. */
7387       MC_(clo_leak_check) = LC_Full;
7388    }
7389 
7390    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
7391       VG_(message)(Vg_UserMsg,
7392                    "Warning: --freelist-big-blocks value %lld has no effect\n"
7393                    "as it is >= to --freelist-vol value %lld\n",
7394                    MC_(clo_freelist_big_blocks),
7395                    MC_(clo_freelist_vol));
7396 
7397    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7398 
7399    if (MC_(clo_mc_level) == 3) {
7400       /* We're doing origin tracking. */
7401 #     ifdef PERF_FAST_STACK
7402       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
7403       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
7404       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
7405       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
7406       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
7407       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7408       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7409       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7410       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7411 #     endif
7412       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
7413       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
7414    } else {
7415       /* Not doing origin tracking */
7416 #     ifdef PERF_FAST_STACK
7417       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
7418       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
7419       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
7420       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
7421       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
7422       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7423       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7424       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7425       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7426 #     endif
7427       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
7428       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7429    }
7430 
7431    // We assume that brk()/sbrk() does not initialise new memory.  Is this
7432    // accurate?  John Reiser says:
7433    //
7434    //   0) sbrk() can *decrease* process address space.  No zero fill is done
7435    //   for a decrease, not even the fragment on the high end of the last page
7436    //   that is beyond the new highest address.  For maximum safety and
7437    //   portability, then the bytes in the last page that reside above [the
7438    //   new] sbrk(0) should be considered to be uninitialized, but in practice
7439    //   it is exceedingly likely that they will retain their previous
7440    //   contents.
7441    //
7442    //   1) If an increase is large enough to require new whole pages, then
7443    //   those new whole pages (like all new pages) are zero-filled by the
7444    //   operating system.  So if sbrk(0) already is page aligned, then
7445    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7446    //
7447    //   2) Any increase that lies within an existing allocated page is not
7448    //   changed.  So if (x = sbrk(0)) is not page aligned, then
7449    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7450    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7451    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7452    //   of them come along for the ride because the operating system deals
7453    //   only in whole pages.  Again, for maximum safety and portability, then
7454    //   anything that lives above [the new] sbrk(0) should be considered
7455    //   uninitialized, but in practice will retain previous contents [zero in
7456    //   this case.]"
7457    //
7458    // In short:
7459    //
7460    //   A key property of sbrk/brk is that new whole pages that are supplied
7461    //   by the operating system *do* get initialized to zero.
7462    //
7463    // As for the portability of all this:
7464    //
7465    //   sbrk and brk are not POSIX.  However, any system that is a derivative
7466    //   of *nix has sbrk and brk because there are too many softwares (such as
7467    //   the Bourne shell) which rely on the traditional memory map (.text,
7468    //   .data+.bss, stack) and the existence of sbrk/brk.
7469    //
7470    // So we should arguably observe all this.  However:
7471    // - The current inaccuracy has caused maybe one complaint in seven years(?)
7472    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7473    //   doubt most programmers know the above information.
7474    // So I'm not terribly unhappy with marking it as undefined. --njn.
7475    //
7476    // [More:  I think most of what John said only applies to sbrk().  It seems
7477    // that brk() always deals in whole pages.  And since this event deals
7478    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7479    // just mark all memory it allocates as defined.]
7480    //
7481 #  if !defined(VGO_solaris)
7482    if (MC_(clo_mc_level) == 3)
7483       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
7484    else
7485       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
7486 #  else
7487    // On Solaris, brk memory has to be marked as defined, otherwise we get
7488    // many false positives.
7489    VG_(track_new_mem_brk)         ( make_mem_defined_w_tid );
7490 #  endif
7491 
7492    /* This origin tracking cache is huge (~100M), so only initialise
7493       if we need it. */
7494    if (MC_(clo_mc_level) >= 3) {
7495       init_OCache();
7496       tl_assert(ocacheL1 != NULL);
7497       tl_assert(ocacheL2 != NULL);
7498    } else {
7499       tl_assert(ocacheL1 == NULL);
7500       tl_assert(ocacheL2 == NULL);
7501    }
7502 
7503    MC_(chunk_poolalloc) = VG_(newPA)
7504       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7505        1000,
7506        VG_(malloc),
7507        "mc.cMC.1 (MC_Chunk pools)",
7508        VG_(free));
7509 
7510    /* Do not check definedness of guest state if --undef-value-errors=no */
7511    if (MC_(clo_mc_level) >= 2)
7512       VG_(track_pre_reg_read) ( mc_pre_reg_read );
7513 }
7514 
print_SM_info(const HChar * type,Int n_SMs)7515 static void print_SM_info(const HChar* type, Int n_SMs)
7516 {
7517    VG_(message)(Vg_DebugMsg,
7518       " memcheck: SMs: %s = %d (%luk, %luM)\n",
7519       type,
7520       n_SMs,
7521       n_SMs * sizeof(SecMap) / 1024UL,
7522       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7523 }
7524 
mc_print_stats(void)7525 static void mc_print_stats (void)
7526 {
7527    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7528 
7529    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7530                 VG_(free_queue_volume), VG_(free_queue_length));
7531    VG_(message)(Vg_DebugMsg,
7532       " memcheck: sanity checks: %d cheap, %d expensive\n",
7533       n_sanity_cheap, n_sanity_expensive );
7534    VG_(message)(Vg_DebugMsg,
7535       " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7536       n_auxmap_L2_nodes,
7537       n_auxmap_L2_nodes * 64,
7538       n_auxmap_L2_nodes / 16 );
7539    VG_(message)(Vg_DebugMsg,
7540       " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7541       n_auxmap_L1_searches, n_auxmap_L1_cmps,
7542       (10ULL * n_auxmap_L1_cmps)
7543          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7544    );
7545    VG_(message)(Vg_DebugMsg,
7546       " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7547       n_auxmap_L2_searches, n_auxmap_L2_nodes
7548    );
7549 
7550    print_SM_info("n_issued     ", n_issued_SMs);
7551    print_SM_info("n_deissued   ", n_deissued_SMs);
7552    print_SM_info("max_noaccess ", max_noaccess_SMs);
7553    print_SM_info("max_undefined", max_undefined_SMs);
7554    print_SM_info("max_defined  ", max_defined_SMs);
7555    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
7556 
7557    // Three DSMs, plus the non-DSM ones
7558    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7559    // The 3*sizeof(Word) bytes is the AVL node metadata size.
7560    // The VG_ROUNDUP is because the OSet pool allocator will/must align
7561    // the elements on pointer size.
7562    // Note that the pool allocator has some additional small overhead
7563    // which is not counted in the below.
7564    // Hardwiring this logic sucks, but I don't see how else to do it.
7565    max_secVBit_szB = max_secVBit_nodes *
7566          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7567    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7568 
7569    VG_(message)(Vg_DebugMsg,
7570       " memcheck: max sec V bit nodes:    %d (%luk, %luM)\n",
7571       max_secVBit_nodes, max_secVBit_szB / 1024,
7572                          max_secVBit_szB / (1024 * 1024));
7573    VG_(message)(Vg_DebugMsg,
7574       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7575       sec_vbits_new_nodes + sec_vbits_updates,
7576       sec_vbits_new_nodes, sec_vbits_updates );
7577    VG_(message)(Vg_DebugMsg,
7578       " memcheck: max shadow mem size:   %luk, %luM\n",
7579       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
7580 
7581    if (MC_(clo_mc_level) >= 3) {
7582       VG_(message)(Vg_DebugMsg,
7583                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
7584                    stats_ocacheL1_find,
7585                    stats_ocacheL1_misses,
7586                    stats_ocacheL1_lossage );
7587       VG_(message)(Vg_DebugMsg,
7588                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
7589                    stats_ocacheL1_find - stats_ocacheL1_misses
7590                       - stats_ocacheL1_found_at_1
7591                       - stats_ocacheL1_found_at_N,
7592                    stats_ocacheL1_found_at_1 );
7593       VG_(message)(Vg_DebugMsg,
7594                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
7595                    stats_ocacheL1_found_at_N,
7596                    stats_ocacheL1_movefwds );
7597       VG_(message)(Vg_DebugMsg,
7598                    " ocacheL1: %'12lu sizeB  %'12d useful\n",
7599                    (SizeT)sizeof(OCache),
7600                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
7601       VG_(message)(Vg_DebugMsg,
7602                    " ocacheL2: %'12lu refs   %'12lu misses\n",
7603                    stats__ocacheL2_refs,
7604                    stats__ocacheL2_misses );
7605       VG_(message)(Vg_DebugMsg,
7606                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
7607                    stats__ocacheL2_n_nodes_max,
7608                    stats__ocacheL2_n_nodes );
7609       VG_(message)(Vg_DebugMsg,
7610                    " niacache: %'12lu refs   %'12lu misses\n",
7611                    stats__nia_cache_queries, stats__nia_cache_misses);
7612    } else {
7613       tl_assert(ocacheL1 == NULL);
7614       tl_assert(ocacheL2 == NULL);
7615    }
7616 }
7617 
7618 
mc_fini(Int exitcode)7619 static void mc_fini ( Int exitcode )
7620 {
7621    MC_(print_malloc_stats)();
7622 
7623    if (MC_(clo_leak_check) != LC_Off) {
7624       LeakCheckParams lcp;
7625       lcp.mode = MC_(clo_leak_check);
7626       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7627       lcp.heuristics = MC_(clo_leak_check_heuristics);
7628       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7629       lcp.deltamode = LCD_Any;
7630       lcp.max_loss_records_output = 999999999;
7631       lcp.requested_by_monitor_command = False;
7632       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
7633    } else {
7634       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7635          VG_(umsg)(
7636             "For a detailed leak analysis, rerun with: --leak-check=full\n"
7637             "\n"
7638          );
7639       }
7640    }
7641 
7642    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7643       VG_(message)(Vg_UserMsg,
7644                    "For counts of detected and suppressed errors, rerun with: -v\n");
7645    }
7646 
7647    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
7648        && MC_(clo_mc_level) == 2) {
7649       VG_(message)(Vg_UserMsg,
7650                    "Use --track-origins=yes to see where "
7651                    "uninitialised values come from\n");
7652    }
7653 
7654    /* Print a warning if any client-request generated ignore-ranges
7655       still exist.  It would be reasonable to expect that a properly
7656       written program would remove any such ranges before exiting, and
7657       since they are a bit on the dangerous side, let's comment.  By
7658       contrast ranges which are specified on the command line normally
7659       pertain to hardware mapped into the address space, and so we
7660       can't expect the client to have got rid of them. */
7661    if (gIgnoredAddressRanges) {
7662       UInt i, nBad = 0;
7663       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
7664          UWord val     = IAR_INVALID;
7665          UWord key_min = ~(UWord)0;
7666          UWord key_max = (UWord)0;
7667          VG_(indexRangeMap)( &key_min, &key_max, &val,
7668                              gIgnoredAddressRanges, i );
7669          if (val != IAR_ClientReq)
7670            continue;
7671          /* Print the offending range.  Also, if it is the first,
7672             print a banner before it. */
7673          nBad++;
7674          if (nBad == 1) {
7675             VG_(umsg)(
7676               "WARNING: exiting program has the following client-requested\n"
7677               "WARNING: address error disablement range(s) still in force,\n"
7678               "WARNING: "
7679                  "possibly as a result of some mistake in the use of the\n"
7680               "WARNING: "
7681                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
7682             );
7683          }
7684          VG_(umsg)("   [%u]  0x%016lx-0x%016lx  %s\n",
7685                    i, key_min, key_max, showIARKind(val));
7686       }
7687    }
7688 
7689    done_prof_mem();
7690 
7691    if (VG_(clo_stats))
7692       mc_print_stats();
7693 
7694    if (0) {
7695       VG_(message)(Vg_DebugMsg,
7696         "------ Valgrind's client block stats follow ---------------\n" );
7697       show_client_block_stats();
7698    }
7699 }
7700 
7701 /* mark the given addr/len unaddressable for watchpoint implementation
7702    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)7703 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
7704                                                   Addr addr, SizeT len)
7705 {
7706    /* GDBTD this is somewhat fishy. We might rather have to save the previous
7707       accessibility and definedness in gdbserver so as to allow restoring it
7708       properly. Currently, we assume that the user only watches things
7709       which are properly addressable and defined */
7710    if (insert)
7711       MC_(make_mem_noaccess) (addr, len);
7712    else
7713       MC_(make_mem_defined)  (addr, len);
7714    return True;
7715 }
7716 
mc_pre_clo_init(void)7717 static void mc_pre_clo_init(void)
7718 {
7719    VG_(details_name)            ("Memcheck");
7720    VG_(details_version)         (NULL);
7721    VG_(details_description)     ("a memory error detector");
7722    VG_(details_copyright_author)(
7723       "Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.");
7724    VG_(details_bug_reports_to)  (VG_BUGS_TO);
7725    VG_(details_avg_translation_sizeB) ( 640 );
7726 
7727    VG_(basic_tool_funcs)          (mc_post_clo_init,
7728                                    MC_(instrument),
7729                                    mc_fini);
7730 
7731    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
7732 
7733 
7734    VG_(needs_core_errors)         ();
7735    VG_(needs_tool_errors)         (MC_(eq_Error),
7736                                    MC_(before_pp_Error),
7737                                    MC_(pp_Error),
7738                                    True,/*show TIDs for errors*/
7739                                    MC_(update_Error_extra),
7740                                    MC_(is_recognised_suppression),
7741                                    MC_(read_extra_suppression_info),
7742                                    MC_(error_matches_suppression),
7743                                    MC_(get_error_name),
7744                                    MC_(get_extra_suppression_info),
7745                                    MC_(print_extra_suppression_use),
7746                                    MC_(update_extra_suppression_use));
7747    VG_(needs_libc_freeres)        ();
7748    VG_(needs_command_line_options)(mc_process_cmd_line_options,
7749                                    mc_print_usage,
7750                                    mc_print_debug_usage);
7751    VG_(needs_client_requests)     (mc_handle_client_request);
7752    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
7753                                    mc_expensive_sanity_check);
7754    VG_(needs_print_stats)         (mc_print_stats);
7755    VG_(needs_info_location)       (MC_(pp_describe_addr));
7756    VG_(needs_malloc_replacement)  (MC_(malloc),
7757                                    MC_(__builtin_new),
7758                                    MC_(__builtin_vec_new),
7759                                    MC_(memalign),
7760                                    MC_(calloc),
7761                                    MC_(free),
7762                                    MC_(__builtin_delete),
7763                                    MC_(__builtin_vec_delete),
7764                                    MC_(realloc),
7765                                    MC_(malloc_usable_size),
7766                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
7767    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
7768 
7769    VG_(needs_xml_output)          ();
7770 
7771    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
7772 
7773    // Handling of mmap and mprotect isn't simple (well, it is simple,
7774    // but the justification isn't.)  See comments above, just prior to
7775    // mc_new_mem_mmap.
7776    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
7777    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
7778 
7779    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
7780 
7781    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
7782    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
7783    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
7784 
7785    /* Defer the specification of the new_mem_stack functions to the
7786       post_clo_init function, since we need to first parse the command
7787       line before deciding which set to use. */
7788 
7789 #  ifdef PERF_FAST_STACK
7790    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
7791    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
7792    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
7793    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
7794    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
7795    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
7796    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
7797    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
7798    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
7799 #  endif
7800    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
7801 
7802    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
7803 
7804    VG_(track_pre_mem_read)        ( check_mem_is_defined );
7805    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
7806    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
7807    VG_(track_post_mem_write)      ( mc_post_mem_write );
7808 
7809    VG_(track_post_reg_write)                  ( mc_post_reg_write );
7810    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
7811 
7812    if (MC_(clo_mc_level) >= 2) {
7813       VG_(track_copy_mem_to_reg)  ( mc_copy_mem_to_reg );
7814       VG_(track_copy_reg_to_mem)  ( mc_copy_reg_to_mem );
7815    }
7816 
7817    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
7818 
7819    init_shadow_memory();
7820    // MC_(chunk_poolalloc) must be allocated in post_clo_init
7821    tl_assert(MC_(chunk_poolalloc) == NULL);
7822    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
7823    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
7824    init_prof_mem();
7825 
7826    tl_assert( mc_expensive_sanity_check() );
7827 
7828    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
7829    tl_assert(sizeof(UWord) == sizeof(Addr));
7830    // Call me paranoid.  I don't care.
7831    tl_assert(sizeof(void*) == sizeof(Addr));
7832 
7833    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
7834    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
7835 
7836    /* This is small.  Always initialise it. */
7837    init_nia_to_ecu_cache();
7838 
7839    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
7840       if we need to, since the command line args haven't been
7841       processed yet.  Hence defer it to mc_post_clo_init. */
7842    tl_assert(ocacheL1 == NULL);
7843    tl_assert(ocacheL2 == NULL);
7844 
7845    /* Check some important stuff.  See extensive comments above
7846       re UNALIGNED_OR_HIGH for background. */
7847 #  if VG_WORDSIZE == 4
7848    tl_assert(sizeof(void*) == 4);
7849    tl_assert(sizeof(Addr)  == 4);
7850    tl_assert(sizeof(UWord) == 4);
7851    tl_assert(sizeof(Word)  == 4);
7852    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
7853    tl_assert(MASK(1) == 0UL);
7854    tl_assert(MASK(2) == 1UL);
7855    tl_assert(MASK(4) == 3UL);
7856    tl_assert(MASK(8) == 7UL);
7857 #  else
7858    tl_assert(VG_WORDSIZE == 8);
7859    tl_assert(sizeof(void*) == 8);
7860    tl_assert(sizeof(Addr)  == 8);
7861    tl_assert(sizeof(UWord) == 8);
7862    tl_assert(sizeof(Word)  == 8);
7863    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7864    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7865    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7866    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7867    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7868 #  endif
7869 }
7870 
7871 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7872 
7873 /*--------------------------------------------------------------------*/
7874 /*--- end                                                mc_main.c ---*/
7875 /*--------------------------------------------------------------------*/
7876