1 
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking               ---*/
4 /*--- the happens-before relationship in concurrent programs.      ---*/
5 /*---                                                 libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of LibHB, a library for implementing and checking
10    the happens-before relationship in concurrent programs.
11 
12    Copyright (C) 2008-2015 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_poolalloc.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_wordfm.h"
40 #include "pub_tool_hashtable.h"
41 #include "pub_tool_xarray.h"
42 #include "pub_tool_oset.h"
43 #include "pub_tool_threadstate.h"
44 #include "pub_tool_aspacemgr.h"
45 #include "pub_tool_stacktrace.h"
46 #include "pub_tool_execontext.h"
47 #include "pub_tool_errormgr.h"
48 #include "pub_tool_options.h"        // VG_(clo_stats)
49 #include "hg_basics.h"
50 #include "hg_wordset.h"
51 #include "hg_lock_n_thread.h"
52 #include "hg_errors.h"
53 
54 #include "libhb.h"
55 
56 
57 /////////////////////////////////////////////////////////////////
58 /////////////////////////////////////////////////////////////////
59 //                                                             //
60 // Debugging #defines                                          //
61 //                                                             //
62 /////////////////////////////////////////////////////////////////
63 /////////////////////////////////////////////////////////////////
64 
65 /* Check the sanity of shadow values in the core memory state
66    machine.  Change #if 0 to #if 1 to enable this. */
67 #if 0
68 #  define CHECK_MSM 1
69 #else
70 #  define CHECK_MSM 0
71 #endif
72 
73 
74 /* Check sanity (reference counts, etc) in the conflicting access
75    machinery.  Change #if 0 to #if 1 to enable this. */
76 #if 0
77 #  define CHECK_CEM 1
78 #else
79 #  define CHECK_CEM 0
80 #endif
81 
82 
83 /* Check sanity in the compressed shadow memory machinery,
84    particularly in its caching innards.  Unfortunately there's no
85    almost-zero-cost way to make them selectable at run time.  Hence
86    set the #if 0 to #if 1 and rebuild if you want them. */
87 #if 0
88 #  define CHECK_ZSM 1  /* do sanity-check CacheLine stuff */
89 #  define inline __attribute__((noinline))
90    /* probably want to ditch -fomit-frame-pointer too */
91 #else
92 #  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
93 #endif
94 
95 
96 /////////////////////////////////////////////////////////////////
97 /////////////////////////////////////////////////////////////////
98 //                                                             //
99 // data decls: VtsID                                           //
100 //                                                             //
101 /////////////////////////////////////////////////////////////////
102 /////////////////////////////////////////////////////////////////
103 
104 /* VtsIDs: Unique small-integer IDs for VTSs.  VtsIDs can't exceed 30
105    bits, since they have to be packed into the lowest 30 bits of an
106    SVal. */
107 typedef  UInt  VtsID;
108 #define VtsID_INVALID 0xFFFFFFFF
109 
110 
111 
112 /////////////////////////////////////////////////////////////////
113 /////////////////////////////////////////////////////////////////
114 //                                                             //
115 // data decls: SVal                                            //
116 //                                                             //
117 /////////////////////////////////////////////////////////////////
118 /////////////////////////////////////////////////////////////////
119 
120 typedef  ULong  SVal;
121 
122 /* This value has special significance to the implementation, and callers
123    may not store it in the shadow memory. */
124 #define SVal_INVALID (3ULL << 62)
125 
126 /* This is the default value for shadow memory.  Initially the shadow
127    memory contains no accessible areas and so all reads produce this
128    value.  TODO: make this caller-defineable. */
129 #define SVal_NOACCESS (2ULL << 62)
130 
131 
132 
133 /////////////////////////////////////////////////////////////////
134 /////////////////////////////////////////////////////////////////
135 //                                                             //
136 // data decls: ScalarTS                                        //
137 //                                                             //
138 /////////////////////////////////////////////////////////////////
139 /////////////////////////////////////////////////////////////////
140 
141 /* Scalar Timestamp.  We have to store a lot of these, so there is
142    some effort to make them as small as possible.  Logically they are
143    a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
144    We pack it into 64 bits by representing the Thr* using a ThrID, a
145    small integer (18 bits), and a 46 bit integer for the timestamp
146    number.  The 46/18 split is arbitrary, but has the effect that
147    Helgrind can only handle programs that create 2^18 or fewer threads
148    over their entire lifetime, and have no more than 2^46 timestamp
149    ticks (synchronisation operations on the same thread).
150 
151    This doesn't seem like much of a limitation.  2^46 ticks is
152    7.06e+13, and if each tick (optimistically) takes the machine 1000
153    cycles to process, then the minimum time to process that many ticks
154    at a clock rate of 5 GHz is 162.9 days.  And that's doing nothing
155    but VTS ticks, which isn't realistic.
156 
157    NB1: SCALARTS_N_THRBITS must be 27 or lower.  The obvious limit is
158    32 since a ThrID is a UInt.  27 comes from the fact that
159    'Thr_n_RCEC', which records information about old accesses, packs
160    in tsw not only a ThrID but also minimum 4+1 other bits (access size
161    and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
162 
163    NB2: thrid values are issued upwards from 1024, and values less
164    than that aren't valid.  This isn't per se necessary (any order
165    will do, so long as they are unique), but it does help ensure they
166    are less likely to get confused with the various other kinds of
167    small-integer thread ids drifting around (eg, TId).
168    So, SCALARTS_N_THRBITS must be 11 or more.
169    See also NB5.
170 
171    NB3: this probably also relies on the fact that Thr's are never
172    deallocated -- they exist forever.  Hence the 1-1 mapping from
173    Thr's to thrid values (set up in Thr__new) persists forever.
174 
175    NB4: temp_max_sized_VTS is allocated at startup and never freed.
176    It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
177    ScalarTSs.  So we can't make SCALARTS_N_THRBITS too large without
178    making the memory use for this go sky-high.  With
179    SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
180    like an OK tradeoff.  If more than 256k threads need to be
181    supported, we could change SCALARTS_N_THRBITS to 20, which would
182    facilitate supporting 1 million threads at the cost of 8MB storage
183    for temp_max_sized_VTS.
184 
185    NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
186    ThrID == 0 to denote an empty Thr_n_RCEC record.  So ThrID == 0
187    must never be a valid ThrID.  Given NB2 that's OK.
188 */
189 #define SCALARTS_N_THRBITS 18  /* valid range: 11 to 27 inclusive,
190                                   See NB1 and NB2 above. */
191 
192 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
193 typedef
194    struct {
195       ThrID thrid : SCALARTS_N_THRBITS;
196       ULong tym   : SCALARTS_N_TYMBITS;
197    }
198    ScalarTS;
199 
200 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
201 
202 
203 
204 /////////////////////////////////////////////////////////////////
205 /////////////////////////////////////////////////////////////////
206 //                                                             //
207 // data decls: Filter                                          //
208 //                                                             //
209 /////////////////////////////////////////////////////////////////
210 /////////////////////////////////////////////////////////////////
211 
212 // baseline: 5, 9
213 #define FI_LINE_SZB_LOG2  5
214 #define FI_NUM_LINES_LOG2 10
215 
216 #define FI_LINE_SZB       (1 << FI_LINE_SZB_LOG2)
217 #define FI_NUM_LINES      (1 << FI_NUM_LINES_LOG2)
218 
219 #define FI_TAG_MASK        (~(Addr)(FI_LINE_SZB - 1))
220 #define FI_GET_TAG(_a)     ((_a) & FI_TAG_MASK)
221 
222 #define FI_GET_LINENO(_a)  ( ((_a) >> FI_LINE_SZB_LOG2) \
223                              & (Addr)(FI_NUM_LINES-1) )
224 
225 
226 /* In the lines, each 8 bytes are treated individually, and are mapped
227    to a UShort.  Regardless of endianness of the underlying machine,
228    bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
229    the highest address.
230 
231    Of each bit pair, the higher numbered bit is set if a R has been
232    seen, so the actual layout is:
233 
234    15 14             ...  01 00
235 
236    R  W  for addr+7  ...  R  W  for addr+0
237 
238    So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
239 */
240 
241 /* tags are separated from lines.  tags are Addrs and are
242    the base address of the line. */
243 typedef
244    struct {
245       UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
246    }
247    FiLine;
248 
249 typedef
250    struct {
251       Addr   tags[FI_NUM_LINES];
252       FiLine lines[FI_NUM_LINES];
253    }
254    Filter;
255 
256 
257 
258 /////////////////////////////////////////////////////////////////
259 /////////////////////////////////////////////////////////////////
260 //                                                             //
261 // data decls: Thr, ULong_n_EC                                 //
262 //                                                             //
263 /////////////////////////////////////////////////////////////////
264 /////////////////////////////////////////////////////////////////
265 
266 // Records stacks for H1 history mechanism (DRD-style)
267 typedef
268    struct { ULong ull; ExeContext* ec; }
269    ULong_n_EC;
270 
271 
272 /* How many of the above records to collect for each thread?  Older
273    ones are dumped when we run out of space.  62.5k requires 1MB per
274    thread, since each ULong_n_EC record is 16 bytes long.  When more
275    than N_KWs_N_STACKs_PER_THREAD are present, the older half are
276    deleted to make space.  Hence in the worst case we will be able to
277    produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
278    Kw transitions (segments in this thread).  For the current setting
279    that gives a guaranteed stack for at least the last 31.25k
280    segments. */
281 #define N_KWs_N_STACKs_PER_THREAD 62500
282 
283 
284 struct _Thr {
285    /* Current VTSs for this thread.  They change as we go along.  viR
286       is the VTS to be used for reads, viW for writes.  Usually they
287       are the same, but can differ when we deal with reader-writer
288       locks.  It is always the case that
289          VtsID__cmpLEQ(viW,viR) == True
290       that is, viW must be the same, or lagging behind, viR. */
291    VtsID viR;
292    VtsID viW;
293 
294    /* Is initially False, and is set to True after the thread really
295       has done a low-level exit.  When True, we expect to never see
296       any more memory references done by this thread. */
297    Bool llexit_done;
298 
299    /* Is initially False, and is set to True after the thread has been
300       joined with (reaped by some other thread).  After this point, we
301       do not expect to see any uses of .viR or .viW, so it is safe to
302       set them to VtsID_INVALID. */
303    Bool joinedwith_done;
304 
305    /* A small integer giving a unique identity to this Thr.  See
306       comments on the definition of ScalarTS for details. */
307    ThrID thrid : SCALARTS_N_THRBITS;
308 
309    /* A filter that removes references for which we believe that
310       msmcread/msmcwrite will not change the state, nor report a
311       race. */
312    Filter* filter;
313 
314    /* A pointer back to the top level Thread structure.  There is a
315       1-1 mapping between Thread and Thr structures -- each Thr points
316       at its corresponding Thread, and vice versa.  Really, Thr and
317       Thread should be merged into a single structure. */
318    Thread* hgthread;
319 
320    /* The ULongs (scalar Kws) in this accumulate in strictly
321       increasing order, without duplicates.  This is important because
322       we need to be able to find a given scalar Kw in this array
323       later, by binary search. */
324    XArray* /* ULong_n_EC */ local_Kws_n_stacks;
325 };
326 
327 
328 
329 /////////////////////////////////////////////////////////////////
330 /////////////////////////////////////////////////////////////////
331 //                                                             //
332 // data decls: SO                                              //
333 //                                                             //
334 /////////////////////////////////////////////////////////////////
335 /////////////////////////////////////////////////////////////////
336 
337 // (UInt) `echo "Synchronisation object" | md5sum`
338 #define SO_MAGIC 0x56b3c5b0U
339 
340 struct _SO {
341    struct _SO* admin_prev;
342    struct _SO* admin_next;
343    VtsID viR; /* r-clock of sender */
344    VtsID viW; /* w-clock of sender */
345    UInt  magic;
346 };
347 
348 
349 
350 /////////////////////////////////////////////////////////////////
351 /////////////////////////////////////////////////////////////////
352 //                                                             //
353 // Forward declarations                                        //
354 //                                                             //
355 /////////////////////////////////////////////////////////////////
356 /////////////////////////////////////////////////////////////////
357 
358 /* fwds for
359    Globals needed by other parts of the library.  These are set
360    once at startup and then never changed. */
361 static void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
362 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
363 
364 /* misc fn and data fwdses */
365 static void VtsID__rcinc ( VtsID ii );
366 static void VtsID__rcdec ( VtsID ii );
367 
368 static inline Bool SVal__isC ( SVal s );
369 static inline VtsID SVal__unC_Rmin ( SVal s );
370 static inline VtsID SVal__unC_Wmin ( SVal s );
371 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
372 static inline void SVal__rcinc ( SVal s );
373 static inline void SVal__rcdec ( SVal s );
374 /* SVal in LineZ are used to store various pointers. */
375 static inline void *SVal2Ptr (SVal s);
376 static inline SVal Ptr2SVal (void* ptr);
377 
378 /* A double linked list of all the SO's. */
379 SO* admin_SO;
380 
381 
382 
383 /////////////////////////////////////////////////////////////////
384 /////////////////////////////////////////////////////////////////
385 //                                                             //
386 // SECTION BEGIN compressed shadow memory                      //
387 //                                                             //
388 /////////////////////////////////////////////////////////////////
389 /////////////////////////////////////////////////////////////////
390 
391 #ifndef __HB_ZSM_H
392 #define __HB_ZSM_H
393 
394 /* Initialise the library.  Once initialised, it will (or may) call
395    SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
396    allow the user to do reference counting on the SVals stored herein.
397    It is important to understand, however, that due to internal
398    caching, the reference counts are in general inaccurate, and can be
399    both above or below the true reference count for an item.  In
400    particular, the library may indicate that the reference count for
401    an item is zero, when in fact it is not.
402 
403    To make the reference counting exact and therefore non-pointless,
404    call zsm_flush_cache.  Immediately after it returns, the reference
405    counts for all items, as deduced by the caller by observing calls
406    to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
407    zero reference count may be freed (or at least considered to be
408    unreferenced by this library).
409 */
410 static void zsm_init ( void );
411 
412 static void zsm_sset_range  ( Addr, SizeT, SVal );
413 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
414 static void zsm_scopy_range ( Addr, Addr, SizeT );
415 static void zsm_flush_cache ( void );
416 
417 #endif /* ! __HB_ZSM_H */
418 
419 
420 /* Round a up to the next multiple of N.  N must be a power of 2 */
421 #define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
422 /* Round a down to the next multiple of N.  N must be a power of 2 */
423 #define ROUNDDN(a, N)   ((a) & ~(N-1))
424 
425 /* True if a belongs in range [start, start + szB[
426    (i.e. start + szB is excluded). */
address_in_range(Addr a,Addr start,SizeT szB)427 static inline Bool address_in_range (Addr a, Addr start,  SizeT szB)
428 {
429    /* Checking start <= a && a < start + szB.
430       As start and a are unsigned addresses, the condition can
431       be simplified. */
432    if (CHECK_ZSM)
433       tl_assert ((a - start < szB)
434                  == (start <= a
435                      &&       a < start + szB));
436    return a - start < szB;
437 }
438 
439 /* ------ CacheLine ------ */
440 
441 #define N_LINE_BITS      6 /* must be >= 3 */
442 #define N_LINE_ARANGE    (1 << N_LINE_BITS)
443 #define N_LINE_TREES     (N_LINE_ARANGE >> 3)
444 
445 typedef
446    struct {
447       UShort descrs[N_LINE_TREES];
448       SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
449    }
450    CacheLine;
451 
452 #define TREE_DESCR_16_0 (1<<0)
453 #define TREE_DESCR_32_0 (1<<1)
454 #define TREE_DESCR_16_1 (1<<2)
455 #define TREE_DESCR_64   (1<<3)
456 #define TREE_DESCR_16_2 (1<<4)
457 #define TREE_DESCR_32_1 (1<<5)
458 #define TREE_DESCR_16_3 (1<<6)
459 #define TREE_DESCR_8_0  (1<<7)
460 #define TREE_DESCR_8_1  (1<<8)
461 #define TREE_DESCR_8_2  (1<<9)
462 #define TREE_DESCR_8_3  (1<<10)
463 #define TREE_DESCR_8_4  (1<<11)
464 #define TREE_DESCR_8_5  (1<<12)
465 #define TREE_DESCR_8_6  (1<<13)
466 #define TREE_DESCR_8_7  (1<<14)
467 #define TREE_DESCR_DTY  (1<<15)
468 
469 typedef
470    struct {
471       SVal  dict[4]; /* can represent up to 4 diff values in the line */
472       UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
473                                       dict indexes */
474       /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
475          LineF to use, and dict[2..] are also SVal_INVALID. */
476    }
477    LineZ; /* compressed rep for a cache line */
478 
479 /* LineZ.dict[1] is used to store various pointers:
480    * In the first lineZ of a free SecMap, it points to the next free SecMap.
481    * In a lineZ for which we need to use a lineF, it points to the lineF. */
482 
483 
484 typedef
485    struct {
486       SVal w64s[N_LINE_ARANGE];
487    }
488    LineF; /* full rep for a cache line */
489 
490 /* We use a pool allocator for LineF, as LineF is relatively small,
491    and we will often alloc/release such lines. */
492 static PoolAlloc* LineF_pool_allocator;
493 
494 /* SVal in a lineZ are used to store various pointers.
495    Below are conversion functions to support that. */
LineF_Ptr(LineZ * lineZ)496 static inline LineF *LineF_Ptr (LineZ *lineZ)
497 {
498    tl_assert(lineZ->dict[0] == SVal_INVALID);
499    return SVal2Ptr (lineZ->dict[1]);
500 }
501 
502 /* Shadow memory.
503    Primary map is a WordFM Addr SecMap*.
504    SecMaps cover some page-size-ish section of address space and hold
505      a compressed representation.
506    CacheLine-sized chunks of SecMaps are copied into a Cache, being
507    decompressed when moved into the cache and recompressed on the
508    way out.  Because of this, the cache must operate as a writeback
509    cache, not a writethrough one.
510 
511    Each SecMap must hold a power-of-2 number of CacheLines.  Hence
512    N_SECMAP_BITS must >= N_LINE_BITS.
513 */
514 #define N_SECMAP_BITS   13
515 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
516 
517 // # CacheLines held by a SecMap
518 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
519 
520 /* The data in the SecMap is held in the array of LineZs.  Each LineZ
521    either carries the required data directly, in a compressed
522    representation, or it holds (in .dict[1]) a pointer to a LineF
523    that holds the full representation.
524 
525    As each in-use LineF is referred to by exactly one LineZ,
526    the number of .linesZ[] that refer to a lineF should equal
527    the number of used lineF.
528 
529    RC obligations: the RCs presented to the user include exactly
530    the values in:
531    * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
532    * F reps that are in use
533 
534    Hence the following actions at the following transitions are required:
535 
536    F rep: alloc'd       -> freed                -- rcdec_LineF
537    F rep:               -> alloc'd              -- rcinc_LineF
538    Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
539    Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
540 */
541 
542 typedef
543    struct {
544       UInt   magic;
545       LineZ  linesZ[N_SECMAP_ZLINES];
546    }
547    SecMap;
548 
549 #define SecMap_MAGIC   0x571e58cbU
550 
551 // (UInt) `echo "Free SecMap" | md5sum`
552 #define SecMap_free_MAGIC 0x5a977f30U
553 
554 __attribute__((unused))
is_sane_SecMap(SecMap * sm)555 static inline Bool is_sane_SecMap ( SecMap* sm ) {
556    return sm != NULL && sm->magic == SecMap_MAGIC;
557 }
558 
559 /* ------ Cache ------ */
560 
561 #define N_WAY_BITS 16
562 #define N_WAY_NENT (1 << N_WAY_BITS)
563 
564 /* Each tag is the address of the associated CacheLine, rounded down
565    to a CacheLine address boundary.  A CacheLine size must be a power
566    of 2 and must be 8 or more.  Hence an easy way to initialise the
567    cache so it is empty is to set all the tag values to any value % 8
568    != 0, eg 1.  This means all queries in the cache initially miss.
569    It does however require us to detect and not writeback, any line
570    with a bogus tag. */
571 typedef
572    struct {
573       CacheLine lyns0[N_WAY_NENT];
574       Addr      tags0[N_WAY_NENT];
575    }
576    Cache;
577 
is_valid_scache_tag(Addr tag)578 static inline Bool is_valid_scache_tag ( Addr tag ) {
579    /* a valid tag should be naturally aligned to the start of
580       a CacheLine. */
581    return 0 == (tag & (N_LINE_ARANGE - 1));
582 }
583 
584 
585 /* --------- Primary data structures --------- */
586 
587 /* Shadow memory primary map */
588 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
589 static Cache   cache_shmem;
590 
591 
592 static UWord stats__secmaps_search       = 0; // # SM finds
593 static UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
594 static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
595 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
596 static UWord stats__secmaps_scanGC       = 0; // # nr of scan GC done.
597 static UWord stats__secmaps_scanGCed     = 0; // # SecMaps GC-ed via scan
598 static UWord stats__secmaps_ssetGCed     = 0; // # SecMaps GC-ed via setnoaccess
599 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
600 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
601 static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
602 static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
603 static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
604 static UWord stats__cache_F_fetches      = 0; // # F lines fetched
605 static UWord stats__cache_F_wbacks       = 0; // # F lines written back
606 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
607 static UWord stats__cache_totrefs        = 0; // # total accesses
608 static UWord stats__cache_totmisses      = 0; // # misses
609 static ULong stats__cache_make_New_arange = 0; // total arange made New
610 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
611 static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
612 static UWord stats__cline_cread64s       = 0; // # calls to s_m_read64
613 static UWord stats__cline_cread32s       = 0; // # calls to s_m_read32
614 static UWord stats__cline_cread16s       = 0; // # calls to s_m_read16
615 static UWord stats__cline_cread08s       = 0; // # calls to s_m_read8
616 static UWord stats__cline_cwrite64s      = 0; // # calls to s_m_write64
617 static UWord stats__cline_cwrite32s      = 0; // # calls to s_m_write32
618 static UWord stats__cline_cwrite16s      = 0; // # calls to s_m_write16
619 static UWord stats__cline_cwrite08s      = 0; // # calls to s_m_write8
620 static UWord stats__cline_sread08s       = 0; // # calls to s_m_set8
621 static UWord stats__cline_swrite08s      = 0; // # calls to s_m_get8
622 static UWord stats__cline_swrite16s      = 0; // # calls to s_m_get8
623 static UWord stats__cline_swrite32s      = 0; // # calls to s_m_get8
624 static UWord stats__cline_swrite64s      = 0; // # calls to s_m_get8
625 static UWord stats__cline_scopy08s       = 0; // # calls to s_m_copy8
626 static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
627 static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
628 static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
629 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
630 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
631 static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
632 static UWord stats__vts__tick            = 0; // # calls to VTS__tick
633 static UWord stats__vts__join            = 0; // # calls to VTS__join
634 static UWord stats__vts__cmpLEQ          = 0; // # calls to VTS__cmpLEQ
635 static UWord stats__vts__cmp_structural  = 0; // # calls to VTS__cmp_structural
636 static UWord stats__vts_tab_GC           = 0; // # nr of vts_tab GC
637 static UWord stats__vts_pruning          = 0; // # nr of vts pruning
638 
639 // # calls to VTS__cmp_structural w/ slow case
640 static UWord stats__vts__cmp_structural_slow = 0;
641 
642 // # calls to VTS__indexAt_SLOW
643 static UWord stats__vts__indexat_slow = 0;
644 
645 // # calls to vts_set__find__or__clone_and_add
646 static UWord stats__vts_set__focaa    = 0;
647 
648 // # calls to vts_set__find__or__clone_and_add that lead to an
649 // allocation
650 static UWord stats__vts_set__focaa_a  = 0;
651 
652 
shmem__round_to_SecMap_base(Addr a)653 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
654    return a & ~(N_SECMAP_ARANGE - 1);
655 }
shmem__get_SecMap_offset(Addr a)656 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
657    return a & (N_SECMAP_ARANGE - 1);
658 }
659 
660 
661 /*----------------------------------------------------------------*/
662 /*--- map_shmem :: WordFM Addr SecMap                          ---*/
663 /*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
664 /*----------------------------------------------------------------*/
665 
666 /*--------------- SecMap allocation --------------- */
667 
668 static HChar* shmem__bigchunk_next = NULL;
669 static HChar* shmem__bigchunk_end1 = NULL;
670 
shmem__bigchunk_alloc(SizeT n)671 static void* shmem__bigchunk_alloc ( SizeT n )
672 {
673    const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
674    tl_assert(n > 0);
675    n = VG_ROUNDUP(n, 16);
676    tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
677    tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
678              <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
679    if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
680       if (0)
681       VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
682                   (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
683       shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
684       if (shmem__bigchunk_next == NULL)
685          VG_(out_of_memory_NORETURN)(
686             "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
687       shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
688    }
689    tl_assert(shmem__bigchunk_next);
690    tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
691    tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
692    shmem__bigchunk_next += n;
693    return shmem__bigchunk_next - n;
694 }
695 
696 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
697    recycled SecMap. When a new SecMap is needed, a recycled SecMap
698    will be used in preference to allocating a new SecMap. */
699 /* We make a linked list of SecMap. The first LineZ is re-used to
700    implement the linked list. */
701 /* Returns the SecMap following sm in the free list.
702    NULL if sm is the last SecMap. sm must be on the free list. */
SecMap_freelist_next(SecMap * sm)703 static inline SecMap *SecMap_freelist_next ( SecMap* sm )
704 {
705    tl_assert (sm);
706    tl_assert (sm->magic == SecMap_free_MAGIC);
707    return SVal2Ptr (sm->linesZ[0].dict[1]);
708 }
set_SecMap_freelist_next(SecMap * sm,SecMap * next)709 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
710 {
711    tl_assert (sm);
712    tl_assert (sm->magic == SecMap_free_MAGIC);
713    tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
714    sm->linesZ[0].dict[1] = Ptr2SVal (next);
715 }
716 
717 static SecMap *SecMap_freelist = NULL;
SecMap_freelist_length(void)718 static UWord SecMap_freelist_length(void)
719 {
720    SecMap *sm;
721    UWord n = 0;
722 
723    sm = SecMap_freelist;
724    while (sm) {
725      n++;
726      sm = SecMap_freelist_next (sm);
727    }
728    return n;
729 }
730 
push_SecMap_on_freelist(SecMap * sm)731 static void push_SecMap_on_freelist(SecMap* sm)
732 {
733    if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
734    sm->magic = SecMap_free_MAGIC;
735    set_SecMap_freelist_next(sm, SecMap_freelist);
736    SecMap_freelist = sm;
737 }
738 /* Returns a free SecMap if there is one.
739    Otherwise, returns NULL. */
pop_SecMap_from_freelist(void)740 static SecMap *pop_SecMap_from_freelist(void)
741 {
742    SecMap *sm;
743 
744    sm = SecMap_freelist;
745    if (sm) {
746       tl_assert (sm->magic == SecMap_free_MAGIC);
747       SecMap_freelist = SecMap_freelist_next (sm);
748       if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
749    }
750    return sm;
751 }
752 
shmem__alloc_or_recycle_SecMap(void)753 static SecMap* shmem__alloc_or_recycle_SecMap ( void )
754 {
755    Word    i, j;
756    SecMap* sm = pop_SecMap_from_freelist();
757 
758    if (!sm) {
759       sm = shmem__bigchunk_alloc( sizeof(SecMap) );
760       stats__secmaps_allocd++;
761       stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
762       stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
763       stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
764    }
765    if (0) VG_(printf)("alloc_SecMap %p\n",sm);
766    tl_assert(sm);
767    sm->magic = SecMap_MAGIC;
768    for (i = 0; i < N_SECMAP_ZLINES; i++) {
769       sm->linesZ[i].dict[0] = SVal_NOACCESS;
770       sm->linesZ[i].dict[1] = SVal_INVALID;
771       sm->linesZ[i].dict[2] = SVal_INVALID;
772       sm->linesZ[i].dict[3] = SVal_INVALID;
773       for (j = 0; j < N_LINE_ARANGE/4; j++)
774          sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
775    }
776    return sm;
777 }
778 
779 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
780 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
781 
shmem__find_SecMap(Addr ga)782 static SecMap* shmem__find_SecMap ( Addr ga )
783 {
784    SecMap* sm    = NULL;
785    Addr    gaKey = shmem__round_to_SecMap_base(ga);
786    // Cache
787    stats__secmaps_search++;
788    if (LIKELY(gaKey == smCache[0].gaKey))
789       return smCache[0].sm;
790    if (LIKELY(gaKey == smCache[1].gaKey)) {
791       SMCacheEnt tmp = smCache[0];
792       smCache[0] = smCache[1];
793       smCache[1] = tmp;
794       return smCache[0].sm;
795    }
796    if (gaKey == smCache[2].gaKey) {
797       SMCacheEnt tmp = smCache[1];
798       smCache[1] = smCache[2];
799       smCache[2] = tmp;
800       return smCache[1].sm;
801    }
802    // end Cache
803    stats__secmaps_search_slow++;
804    if (VG_(lookupFM)( map_shmem,
805                       NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
806       tl_assert(sm != NULL);
807       smCache[2] = smCache[1];
808       smCache[1] = smCache[0];
809       smCache[0].gaKey = gaKey;
810       smCache[0].sm    = sm;
811    } else {
812       tl_assert(sm == NULL);
813    }
814    return sm;
815 }
816 
817 /* Scan the SecMap and count the SecMap that can be GC-ed.
818    If really, really does the GC of the SecMap. */
819 /* NOT TO BE CALLED FROM WITHIN libzsm. */
820 static UWord next_SecMap_GC_at = 1000;
821 __attribute__((noinline))
shmem__SecMap_do_GC(Bool really)822 static UWord shmem__SecMap_do_GC(Bool really)
823 {
824    UWord secmapW = 0;
825    Addr  gaKey;
826    UWord examined = 0;
827    UWord ok_GCed = 0;
828 
829    /* First invalidate the smCache */
830    smCache[0].gaKey = 1;
831    smCache[1].gaKey = 1;
832    smCache[2].gaKey = 1;
833    STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
834 
835    VG_(initIterFM)( map_shmem );
836    while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
837       UWord   i;
838       UWord   j;
839       UWord   n_linesF = 0;
840       SecMap* sm = (SecMap*)secmapW;
841       tl_assert(sm->magic == SecMap_MAGIC);
842       Bool ok_to_GC = True;
843 
844       examined++;
845 
846       /* Deal with the LineZs and the possible LineF of a LineZ. */
847       for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
848          LineZ* lineZ = &sm->linesZ[i];
849          if (lineZ->dict[0] != SVal_INVALID) {
850             ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
851                && !SVal__isC (lineZ->dict[1])
852                && !SVal__isC (lineZ->dict[2])
853                && !SVal__isC (lineZ->dict[3]);
854          } else {
855             LineF *lineF = LineF_Ptr(lineZ);
856             n_linesF++;
857             for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
858                ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
859          }
860       }
861       if (ok_to_GC)
862          ok_GCed++;
863       if (ok_to_GC && really) {
864         SecMap *fm_sm;
865         Addr fm_gaKey;
866         /* We cannot remove a SecMap from map_shmem while iterating.
867            So, stop iteration, remove from map_shmem, recreate the iteration
868            on the next SecMap. */
869         VG_(doneIterFM) ( map_shmem );
870         /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
871            We just need to free the lineF referenced by the linesZ. */
872         if (n_linesF > 0) {
873            for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
874               LineZ* lineZ = &sm->linesZ[i];
875               if (lineZ->dict[0] == SVal_INVALID) {
876                  VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
877                  n_linesF--;
878               }
879            }
880         }
881         if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
882           tl_assert (0);
883         stats__secmaps_in_map_shmem--;
884         tl_assert (gaKey == fm_gaKey);
885         tl_assert (sm == fm_sm);
886         stats__secmaps_scanGCed++;
887         push_SecMap_on_freelist (sm);
888         VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
889       }
890    }
891    VG_(doneIterFM)( map_shmem );
892 
893    if (really) {
894       stats__secmaps_scanGC++;
895       /* Next GC when we approach the max allocated */
896       next_SecMap_GC_at = stats__secmaps_allocd - 1000;
897       /* Unless we GCed less than 10%. We then allow to alloc 10%
898          more before GCing. This avoids doing a lot of costly GC
899          for the worst case : the 'growing phase' of an application
900          that allocates a lot of memory.
901          Worst can can be reproduced e.g. by
902              perf/memrw -t 30000000 -b 1000 -r 1 -l 1
903          that allocates around 30Gb of memory. */
904       if (ok_GCed < stats__secmaps_allocd/10)
905          next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
906 
907    }
908 
909    if (VG_(clo_stats) && really) {
910       VG_(message)(Vg_DebugMsg,
911                   "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
912                    " next GC at %lu\n",
913                    stats__secmaps_scanGC, examined, ok_GCed,
914                    next_SecMap_GC_at);
915    }
916 
917    return ok_GCed;
918 }
919 
shmem__find_or_alloc_SecMap(Addr ga)920 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
921 {
922    SecMap* sm = shmem__find_SecMap ( ga );
923    if (LIKELY(sm)) {
924       if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
925       return sm;
926    } else {
927       /* create a new one */
928       Addr gaKey = shmem__round_to_SecMap_base(ga);
929       sm = shmem__alloc_or_recycle_SecMap();
930       tl_assert(sm);
931       VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
932       stats__secmaps_in_map_shmem++;
933       if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
934       return sm;
935    }
936 }
937 
938 /* Returns the nr of linesF which are in use. Note: this is scanning
939    the secmap wordFM. So, this is to be used for statistics only. */
940 __attribute__((noinline))
shmem__SecMap_used_linesF(void)941 static UWord shmem__SecMap_used_linesF(void)
942 {
943    UWord secmapW = 0;
944    Addr  gaKey;
945    UWord inUse = 0;
946 
947    VG_(initIterFM)( map_shmem );
948    while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
949       UWord   i;
950       SecMap* sm = (SecMap*)secmapW;
951       tl_assert(sm->magic == SecMap_MAGIC);
952 
953       for (i = 0; i < N_SECMAP_ZLINES; i++) {
954          LineZ* lineZ = &sm->linesZ[i];
955          if (lineZ->dict[0] == SVal_INVALID)
956             inUse++;
957       }
958    }
959    VG_(doneIterFM)( map_shmem );
960 
961    return inUse;
962 }
963 
964 /* ------------ LineF and LineZ related ------------ */
965 
rcinc_LineF(LineF * lineF)966 static void rcinc_LineF ( LineF* lineF ) {
967    UWord i;
968    for (i = 0; i < N_LINE_ARANGE; i++)
969       SVal__rcinc(lineF->w64s[i]);
970 }
971 
rcdec_LineF(LineF * lineF)972 static void rcdec_LineF ( LineF* lineF ) {
973    UWord i;
974    for (i = 0; i < N_LINE_ARANGE; i++)
975       SVal__rcdec(lineF->w64s[i]);
976 }
977 
rcinc_LineZ(LineZ * lineZ)978 static void rcinc_LineZ ( LineZ* lineZ ) {
979    tl_assert(lineZ->dict[0] != SVal_INVALID);
980    SVal__rcinc(lineZ->dict[0]);
981    if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
982    if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
983    if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
984 }
985 
rcdec_LineZ(LineZ * lineZ)986 static void rcdec_LineZ ( LineZ* lineZ ) {
987    tl_assert(lineZ->dict[0] != SVal_INVALID);
988    SVal__rcdec(lineZ->dict[0]);
989    if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
990    if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
991    if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
992 }
993 
994 inline
write_twobit_array(UChar * arr,UWord ix,UWord b2)995 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
996    Word bix, shft, mask, prep;
997    tl_assert(ix >= 0);
998    bix  = ix >> 2;
999    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1000    mask = 3 << shft;
1001    prep = b2 << shft;
1002    arr[bix] = (arr[bix] & ~mask) | prep;
1003 }
1004 
1005 inline
read_twobit_array(UChar * arr,UWord ix)1006 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1007    Word bix, shft;
1008    tl_assert(ix >= 0);
1009    bix  = ix >> 2;
1010    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1011    return (arr[bix] >> shft) & 3;
1012 }
1013 
1014 /* We cache one free lineF, to avoid pool allocator calls.
1015    Measurement on firefox has shown that this avoids more than 90%
1016    of the PA calls. */
1017 static LineF *free_lineF = NULL;
1018 
1019 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1020    lineF has to be used. */
alloc_LineF_for_Z(LineZ * lineZ)1021 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1022 {
1023    LineF *lineF;
1024 
1025    tl_assert(lineZ->dict[0] == SVal_INVALID);
1026 
1027    if (LIKELY(free_lineF)) {
1028       lineF = free_lineF;
1029       free_lineF = NULL;
1030    } else {
1031       lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1032    }
1033    lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1034    lineZ->dict[1] = Ptr2SVal (lineF);
1035 
1036    return lineF;
1037 }
1038 
1039 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1040    back to its initial state SVal_NOACCESS (i.e. ready to be
1041    read or written just after SecMap allocation). */
clear_LineF_of_Z(LineZ * lineZ)1042 static inline void clear_LineF_of_Z (LineZ *lineZ)
1043 {
1044    LineF *lineF = LineF_Ptr(lineZ);
1045 
1046    rcdec_LineF(lineF);
1047    if (UNLIKELY(free_lineF)) {
1048       VG_(freeEltPA)( LineF_pool_allocator, lineF );
1049    } else {
1050       free_lineF = lineF;
1051    }
1052    lineZ->dict[0] = SVal_NOACCESS;
1053    lineZ->dict[1] = SVal_INVALID;
1054 }
1055 
1056 /* Given address 'tag', find either the Z or F line containing relevant
1057    data, so it can be read into the cache.
1058 */
find_ZF_for_reading(LineZ ** zp,LineF ** fp,Addr tag)1059 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1060                                   /*OUT*/LineF** fp, Addr tag ) {
1061    LineZ* lineZ;
1062    LineF* lineF;
1063    UWord   zix;
1064    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
1065    UWord   smoff = shmem__get_SecMap_offset(tag);
1066    /* since smoff is derived from a valid tag, it should be
1067       cacheline-aligned. */
1068    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1069    zix = smoff >> N_LINE_BITS;
1070    tl_assert(zix < N_SECMAP_ZLINES);
1071    lineZ = &sm->linesZ[zix];
1072    lineF = NULL;
1073    if (lineZ->dict[0] == SVal_INVALID) {
1074       lineF = LineF_Ptr (lineZ);
1075       lineZ = NULL;
1076    }
1077    *zp = lineZ;
1078    *fp = lineF;
1079 }
1080 
1081 /* Given address 'tag', return the relevant SecMap and the index of
1082    the LineZ within it, in the expectation that the line is to be
1083    overwritten.  Regardless of whether 'tag' is currently associated
1084    with a Z or F representation, to rcdec on the current
1085    representation, in recognition of the fact that the contents are
1086    just about to be overwritten. */
1087 static __attribute__((noinline))
find_Z_for_writing(SecMap ** smp,Word * zixp,Addr tag)1088 void find_Z_for_writing ( /*OUT*/SecMap** smp,
1089                           /*OUT*/Word* zixp,
1090                           Addr tag ) {
1091    LineZ* lineZ;
1092    UWord   zix;
1093    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
1094    UWord   smoff = shmem__get_SecMap_offset(tag);
1095    /* since smoff is derived from a valid tag, it should be
1096       cacheline-aligned. */
1097    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1098    zix = smoff >> N_LINE_BITS;
1099    tl_assert(zix < N_SECMAP_ZLINES);
1100    lineZ = &sm->linesZ[zix];
1101    /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1102       can be parked in it.  Hence have to rcdec it accordingly. */
1103    /* If lineZ has an associated lineF, free it up. */
1104    if (lineZ->dict[0] == SVal_INVALID)
1105       clear_LineF_of_Z(lineZ);
1106    else
1107       rcdec_LineZ(lineZ);
1108    *smp  = sm;
1109    *zixp = zix;
1110 }
1111 
1112 /* ------------ CacheLine and implicit-tree related ------------ */
1113 
1114 __attribute__((unused))
pp_CacheLine(CacheLine * cl)1115 static void pp_CacheLine ( CacheLine* cl ) {
1116    Word i;
1117    if (!cl) {
1118       VG_(printf)("%s","pp_CacheLine(NULL)\n");
1119       return;
1120    }
1121    for (i = 0; i < N_LINE_TREES; i++)
1122       VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
1123    for (i = 0; i < N_LINE_ARANGE; i++)
1124       VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
1125 }
1126 
descr_to_validbits(UShort descr)1127 static UChar descr_to_validbits ( UShort descr )
1128 {
1129    /* a.k.a Party Time for gcc's constant folder */
1130 #  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1131                 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
1132              ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
1133                           ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
1134                           ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
1135                           ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
1136                           ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
1137                           ( (b16_2) << 4)  | ( (b64)   << 3)  | \
1138                           ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
1139                           ( (b16_0) << 0) ) )
1140 
1141 #  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1142              ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1143                          ( (bit5) << 5) | ( (bit4) << 4) | \
1144                          ( (bit3) << 3) | ( (bit2) << 2) | \
1145                          ( (bit1) << 1) | ( (bit0) << 0) ) )
1146 
1147    /* these should all get folded out at compile time */
1148    tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1149    tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1150    tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1151    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1152    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1153    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1154    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1155    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1156    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1157 
1158    switch (descr) {
1159    /*
1160               +--------------------------------- TREE_DESCR_8_7
1161               |             +------------------- TREE_DESCR_8_0
1162               |             |  +---------------- TREE_DESCR_16_3
1163               |             |  | +-------------- TREE_DESCR_32_1
1164               |             |  | | +------------ TREE_DESCR_16_2
1165               |             |  | | |  +--------- TREE_DESCR_64
1166               |             |  | | |  |  +------ TREE_DESCR_16_1
1167               |             |  | | |  |  | +---- TREE_DESCR_32_0
1168               |             |  | | |  |  | | +-- TREE_DESCR_16_0
1169               |             |  | | |  |  | | |
1170               |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
1171    case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
1172                                                  return BYTE(1,1,1,1,1,1,1,1);
1173    case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
1174                                                  return BYTE(1,1,0,1,1,1,1,1);
1175    case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */
1176                                                  return BYTE(0,1,1,1,1,1,1,1);
1177    case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
1178                                                  return BYTE(0,1,0,1,1,1,1,1);
1179 
1180    case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */
1181                                                  return BYTE(1,1,1,1,1,1,0,1);
1182    case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
1183                                                  return BYTE(1,1,0,1,1,1,0,1);
1184    case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
1185                                                  return BYTE(0,1,1,1,1,1,0,1);
1186    case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
1187                                                  return BYTE(0,1,0,1,1,1,0,1);
1188 
1189    case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
1190                                                  return BYTE(1,1,1,1,0,1,1,1);
1191    case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
1192                                                  return BYTE(1,1,0,1,0,1,1,1);
1193    case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
1194                                                  return BYTE(0,1,1,1,0,1,1,1);
1195    case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
1196                                                  return BYTE(0,1,0,1,0,1,1,1);
1197 
1198    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
1199                                                  return BYTE(1,1,1,1,0,1,0,1);
1200    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
1201                                                  return BYTE(1,1,0,1,0,1,0,1);
1202    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
1203                                                  return BYTE(0,1,1,1,0,1,0,1);
1204    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
1205                                                  return BYTE(0,1,0,1,0,1,0,1);
1206 
1207    case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
1208                                                  return BYTE(0,0,0,1,1,1,1,1);
1209    case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
1210                                                  return BYTE(0,0,0,1,1,1,0,1);
1211    case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
1212                                                  return BYTE(0,0,0,1,0,1,1,1);
1213    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
1214                                                  return BYTE(0,0,0,1,0,1,0,1);
1215 
1216    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
1217                                                  return BYTE(1,1,1,1,0,0,0,1);
1218    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
1219                                                  return BYTE(1,1,0,1,0,0,0,1);
1220    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
1221                                                  return BYTE(0,1,1,1,0,0,0,1);
1222    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
1223                                                  return BYTE(0,1,0,1,0,0,0,1);
1224 
1225    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1226                                                  return BYTE(0,0,0,1,0,0,0,1);
1227 
1228    case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1229                                                  return BYTE(0,0,0,0,0,0,0,1);
1230 
1231    default: return BYTE(0,0,0,0,0,0,0,0);
1232                    /* INVALID - any valid descr produces at least one
1233                       valid bit in tree[0..7]*/
1234    }
1235    /* NOTREACHED*/
1236    tl_assert(0);
1237 
1238 #  undef DESCR
1239 #  undef BYTE
1240 }
1241 
1242 __attribute__((unused))
is_sane_Descr(UShort descr)1243 static Bool is_sane_Descr ( UShort descr ) {
1244    return descr_to_validbits(descr) != 0;
1245 }
1246 
sprintf_Descr(HChar * dst,UShort descr)1247 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1248    VG_(sprintf)(dst,
1249                 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1250                 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1251                 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1252                 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1253                 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1254                 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1255                 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1256                 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1257                 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1258                 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1259                 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1260                 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1261                 (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
1262                 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1263                 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1264                 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1265    );
1266 }
sprintf_Byte(HChar * dst,UChar byte)1267 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1268    VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1269                      (Int)((byte & 128) ? 1 : 0),
1270                      (Int)((byte &  64) ? 1 : 0),
1271                      (Int)((byte &  32) ? 1 : 0),
1272                      (Int)((byte &  16) ? 1 : 0),
1273                      (Int)((byte &   8) ? 1 : 0),
1274                      (Int)((byte &   4) ? 1 : 0),
1275                      (Int)((byte &   2) ? 1 : 0),
1276                      (Int)((byte &   1) ? 1 : 0)
1277    );
1278 }
1279 
is_sane_Descr_and_Tree(UShort descr,SVal * tree)1280 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1281    Word  i;
1282    UChar validbits = descr_to_validbits(descr);
1283    HChar buf[128], buf2[128];    // large enough
1284    if (validbits == 0)
1285       goto bad;
1286    for (i = 0; i < 8; i++) {
1287       if (validbits & (1<<i)) {
1288          if (tree[i] == SVal_INVALID)
1289             goto bad;
1290       } else {
1291          if (tree[i] != SVal_INVALID)
1292             goto bad;
1293       }
1294    }
1295    return True;
1296   bad:
1297    sprintf_Descr( buf, descr );
1298    sprintf_Byte( buf2, validbits );
1299    VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1300    VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
1301    VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
1302    for (i = 0; i < 8; i++)
1303       VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
1304    VG_(printf)("%s","}\n");
1305    return 0;
1306 }
1307 
is_sane_CacheLine(CacheLine * cl)1308 static Bool is_sane_CacheLine ( CacheLine* cl )
1309 {
1310    Word tno, cloff;
1311 
1312    if (!cl) goto bad;
1313 
1314    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1315       UShort descr = cl->descrs[tno];
1316       SVal*  tree  = &cl->svals[cloff];
1317       if (!is_sane_Descr_and_Tree(descr, tree))
1318          goto bad;
1319    }
1320    tl_assert(cloff == N_LINE_ARANGE);
1321    return True;
1322   bad:
1323    pp_CacheLine(cl);
1324    return False;
1325 }
1326 
normalise_tree(SVal * tree)1327 static UShort normalise_tree ( /*MOD*/SVal* tree )
1328 {
1329    UShort descr;
1330    /* pre: incoming tree[0..7] does not have any invalid shvals, in
1331       particular no zeroes. */
1332    if (CHECK_ZSM
1333        && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1334                    || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1335                    || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1336                    || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1337       tl_assert(0);
1338 
1339    descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1340            | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1341            | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1342    /* build 16-bit layer */
1343    if (tree[1] == tree[0]) {
1344       tree[1] = SVal_INVALID;
1345       descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1346       descr |= TREE_DESCR_16_0;
1347    }
1348    if (tree[3] == tree[2]) {
1349       tree[3] = SVal_INVALID;
1350       descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1351       descr |= TREE_DESCR_16_1;
1352    }
1353    if (tree[5] == tree[4]) {
1354       tree[5] = SVal_INVALID;
1355       descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1356       descr |= TREE_DESCR_16_2;
1357    }
1358    if (tree[7] == tree[6]) {
1359       tree[7] = SVal_INVALID;
1360       descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1361       descr |= TREE_DESCR_16_3;
1362    }
1363    /* build 32-bit layer */
1364    if (tree[2] == tree[0]
1365        && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1366       tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1367       descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1368       descr |= TREE_DESCR_32_0;
1369    }
1370    if (tree[6] == tree[4]
1371        && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1372       tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1373       descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1374       descr |= TREE_DESCR_32_1;
1375    }
1376    /* build 64-bit layer */
1377    if (tree[4] == tree[0]
1378        && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1379       tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1380       descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1381       descr |= TREE_DESCR_64;
1382    }
1383    return descr;
1384 }
1385 
1386 /* This takes a cacheline where all the data is at the leaves
1387    (w8[..]) and builds a correctly normalised tree. */
normalise_CacheLine(CacheLine * cl)1388 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1389 {
1390    Word tno, cloff;
1391    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1392       SVal* tree = &cl->svals[cloff];
1393       cl->descrs[tno] = normalise_tree( tree );
1394    }
1395    tl_assert(cloff == N_LINE_ARANGE);
1396    if (CHECK_ZSM)
1397       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1398    stats__cline_normalises++;
1399 }
1400 
1401 
1402 typedef struct { UChar count; SVal sval; } CountedSVal;
1403 
1404 static
sequentialise_CacheLine(CountedSVal * dst,Word * dstUsedP,Word nDst,CacheLine * src)1405 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1406                                /*OUT*/Word* dstUsedP,
1407                                Word nDst, CacheLine* src )
1408 {
1409    Word  tno, cloff, dstUsed;
1410 
1411    tl_assert(nDst == N_LINE_ARANGE);
1412    dstUsed = 0;
1413 
1414    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1415       UShort descr = src->descrs[tno];
1416       SVal*  tree  = &src->svals[cloff];
1417 
1418       /* sequentialise the tree described by (descr,tree). */
1419 #     define PUT(_n,_v)                                \
1420          do { dst[dstUsed  ].count = (_n);             \
1421               dst[dstUsed++].sval  = (_v);             \
1422          } while (0)
1423 
1424       /* byte 0 */
1425       if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
1426       if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1427       if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1428       if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
1429       /* byte 1 */
1430       if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
1431       /* byte 2 */
1432       if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1433       if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
1434       /* byte 3 */
1435       if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
1436       /* byte 4 */
1437       if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1438       if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1439       if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
1440       /* byte 5 */
1441       if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
1442       /* byte 6 */
1443       if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1444       if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
1445       /* byte 7 */
1446       if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
1447 
1448 #     undef PUT
1449       /* END sequentialise the tree described by (descr,tree). */
1450 
1451    }
1452    tl_assert(cloff == N_LINE_ARANGE);
1453    tl_assert(dstUsed <= nDst);
1454 
1455    *dstUsedP = dstUsed;
1456 }
1457 
1458 /* Write the cacheline 'wix' to backing store.  Where it ends up
1459    is determined by its tag field. */
cacheline_wback(UWord wix)1460 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1461 {
1462    Word        i, j, k, m;
1463    Addr        tag;
1464    SecMap*     sm;
1465    CacheLine*  cl;
1466    LineZ* lineZ;
1467    LineF* lineF;
1468    Word        zix, fix, csvalsUsed;
1469    CountedSVal csvals[N_LINE_ARANGE];
1470    SVal        sv;
1471 
1472    if (0)
1473    VG_(printf)("scache wback line %d\n", (Int)wix);
1474 
1475    tl_assert(wix >= 0 && wix < N_WAY_NENT);
1476 
1477    tag =  cache_shmem.tags0[wix];
1478    cl  = &cache_shmem.lyns0[wix];
1479 
1480    /* The cache line may have been invalidated; if so, ignore it. */
1481    if (!is_valid_scache_tag(tag))
1482       return;
1483 
1484    /* Where are we going to put it? */
1485    sm         = NULL;
1486    lineZ      = NULL;
1487    lineF      = NULL;
1488    zix = fix = -1;
1489 
1490    /* find the Z line to write in and rcdec it or the associated F
1491       line. */
1492    find_Z_for_writing( &sm, &zix, tag );
1493 
1494    tl_assert(sm);
1495    tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1496    lineZ = &sm->linesZ[zix];
1497 
1498    /* Generate the data to be stored */
1499    if (CHECK_ZSM)
1500       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1501 
1502    csvalsUsed = -1;
1503    sequentialise_CacheLine( csvals, &csvalsUsed,
1504                             N_LINE_ARANGE, cl );
1505    tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1506    if (0) VG_(printf)("%ld ", csvalsUsed);
1507 
1508    lineZ->dict[0] = lineZ->dict[1]
1509                   = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1510 
1511    /* i indexes actual shadow values, k is cursor in csvals */
1512    i = 0;
1513    for (k = 0; k < csvalsUsed; k++) {
1514 
1515       sv = csvals[k].sval;
1516       if (CHECK_ZSM)
1517          tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1518       /* do we already have it? */
1519       if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1520       if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1521       if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1522       if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1523       /* no.  look for a free slot. */
1524       if (CHECK_ZSM)
1525          tl_assert(sv != SVal_INVALID);
1526       if (lineZ->dict[0]
1527           == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1528       if (lineZ->dict[1]
1529           == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1530       if (lineZ->dict[2]
1531           == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1532       if (lineZ->dict[3]
1533           == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1534       break; /* we'll have to use the f rep */
1535      dict_ok:
1536       m = csvals[k].count;
1537       if (m == 8) {
1538          write_twobit_array( lineZ->ix2s, i+0, j );
1539          write_twobit_array( lineZ->ix2s, i+1, j );
1540          write_twobit_array( lineZ->ix2s, i+2, j );
1541          write_twobit_array( lineZ->ix2s, i+3, j );
1542          write_twobit_array( lineZ->ix2s, i+4, j );
1543          write_twobit_array( lineZ->ix2s, i+5, j );
1544          write_twobit_array( lineZ->ix2s, i+6, j );
1545          write_twobit_array( lineZ->ix2s, i+7, j );
1546          i += 8;
1547       }
1548       else if (m == 4) {
1549          write_twobit_array( lineZ->ix2s, i+0, j );
1550          write_twobit_array( lineZ->ix2s, i+1, j );
1551          write_twobit_array( lineZ->ix2s, i+2, j );
1552          write_twobit_array( lineZ->ix2s, i+3, j );
1553          i += 4;
1554       }
1555       else if (m == 1) {
1556          write_twobit_array( lineZ->ix2s, i+0, j );
1557          i += 1;
1558       }
1559       else if (m == 2) {
1560          write_twobit_array( lineZ->ix2s, i+0, j );
1561          write_twobit_array( lineZ->ix2s, i+1, j );
1562          i += 2;
1563       }
1564       else {
1565          tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1566       }
1567 
1568    }
1569 
1570    if (LIKELY(i == N_LINE_ARANGE)) {
1571       /* Construction of the compressed representation was
1572          successful. */
1573       rcinc_LineZ(lineZ);
1574       stats__cache_Z_wbacks++;
1575    } else {
1576       /* Cannot use the compressed(z) representation.  Use the full(f)
1577          rep instead. */
1578       tl_assert(i >= 0 && i < N_LINE_ARANGE);
1579       lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1580       lineF = alloc_LineF_for_Z (lineZ);
1581       i = 0;
1582       for (k = 0; k < csvalsUsed; k++) {
1583          if (CHECK_ZSM)
1584             tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1585          sv = csvals[k].sval;
1586          if (CHECK_ZSM)
1587             tl_assert(sv != SVal_INVALID);
1588          for (m = csvals[k].count; m > 0; m--) {
1589             lineF->w64s[i] = sv;
1590             i++;
1591          }
1592       }
1593       tl_assert(i == N_LINE_ARANGE);
1594       rcinc_LineF(lineF);
1595       stats__cache_F_wbacks++;
1596    }
1597 }
1598 
1599 /* Fetch the cacheline 'wix' from the backing store.  The tag
1600    associated with 'wix' is assumed to have already been filled in;
1601    hence that is used to determine where in the backing store to read
1602    from. */
cacheline_fetch(UWord wix)1603 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1604 {
1605    Word       i;
1606    Addr       tag;
1607    CacheLine* cl;
1608    LineZ*     lineZ;
1609    LineF*     lineF;
1610 
1611    if (0)
1612    VG_(printf)("scache fetch line %d\n", (Int)wix);
1613 
1614    tl_assert(wix >= 0 && wix < N_WAY_NENT);
1615 
1616    tag =  cache_shmem.tags0[wix];
1617    cl  = &cache_shmem.lyns0[wix];
1618 
1619    /* reject nonsense requests */
1620    tl_assert(is_valid_scache_tag(tag));
1621 
1622    lineZ = NULL;
1623    lineF = NULL;
1624    find_ZF_for_reading( &lineZ, &lineF, tag );
1625    tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1626 
1627    /* expand the data into the bottom layer of the tree, then get
1628       cacheline_normalise to build the descriptor array. */
1629    if (lineF) {
1630       for (i = 0; i < N_LINE_ARANGE; i++) {
1631          cl->svals[i] = lineF->w64s[i];
1632       }
1633       stats__cache_F_fetches++;
1634    } else {
1635       for (i = 0; i < N_LINE_ARANGE; i++) {
1636          UWord ix = read_twobit_array( lineZ->ix2s, i );
1637          if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1638          cl->svals[i] = lineZ->dict[ix];
1639          if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1640       }
1641       stats__cache_Z_fetches++;
1642    }
1643    normalise_CacheLine( cl );
1644 }
1645 
1646 /* Invalid the cachelines corresponding to the given range, which
1647    must start and end on a cacheline boundary. */
shmem__invalidate_scache_range(Addr ga,SizeT szB)1648 static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1649 {
1650    Word wix;
1651 
1652    /* ga must be on a cacheline boundary. */
1653    tl_assert (is_valid_scache_tag (ga));
1654    /* szB must be a multiple of cacheline size. */
1655    tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1656 
1657 
1658    Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1659    Word nwix = szB / N_LINE_ARANGE;
1660 
1661    if (nwix > N_WAY_NENT)
1662       nwix = N_WAY_NENT; // no need to check several times the same entry.
1663 
1664    for (wix = 0; wix < nwix; wix++) {
1665       if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1666          cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1667       ga_ix++;
1668       if (UNLIKELY(ga_ix == N_WAY_NENT))
1669          ga_ix = 0;
1670    }
1671 }
1672 
1673 
shmem__flush_and_invalidate_scache(void)1674 static void shmem__flush_and_invalidate_scache ( void ) {
1675    Word wix;
1676    Addr tag;
1677    if (0) VG_(printf)("%s","scache flush and invalidate\n");
1678    tl_assert(!is_valid_scache_tag(1));
1679    for (wix = 0; wix < N_WAY_NENT; wix++) {
1680       tag = cache_shmem.tags0[wix];
1681       if (tag == 1/*INVALID*/) {
1682          /* already invalid; nothing to do */
1683       } else {
1684          tl_assert(is_valid_scache_tag(tag));
1685          cacheline_wback( wix );
1686       }
1687       cache_shmem.tags0[wix] = 1/*INVALID*/;
1688    }
1689    stats__cache_flushes_invals++;
1690 }
1691 
1692 
aligned16(Addr a)1693 static inline Bool aligned16 ( Addr a ) {
1694    return 0 == (a & 1);
1695 }
aligned32(Addr a)1696 static inline Bool aligned32 ( Addr a ) {
1697    return 0 == (a & 3);
1698 }
aligned64(Addr a)1699 static inline Bool aligned64 ( Addr a ) {
1700    return 0 == (a & 7);
1701 }
get_cacheline_offset(Addr a)1702 static inline UWord get_cacheline_offset ( Addr a ) {
1703    return (UWord)(a & (N_LINE_ARANGE - 1));
1704 }
cacheline_ROUNDUP(Addr a)1705 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1706    return ROUNDUP(a, N_LINE_ARANGE);
1707 }
cacheline_ROUNDDN(Addr a)1708 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1709    return ROUNDDN(a, N_LINE_ARANGE);
1710 }
get_treeno(Addr a)1711 static inline UWord get_treeno ( Addr a ) {
1712    return get_cacheline_offset(a) >> 3;
1713 }
get_tree_offset(Addr a)1714 static inline UWord get_tree_offset ( Addr a ) {
1715    return a & 7;
1716 }
1717 
1718 static __attribute__((noinline))
1719        CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
get_cacheline(Addr a)1720 static inline CacheLine* get_cacheline ( Addr a )
1721 {
1722    /* tag is 'a' with the in-line offset masked out,
1723       eg a[31]..a[4] 0000 */
1724    Addr       tag = a & ~(N_LINE_ARANGE - 1);
1725    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1726    stats__cache_totrefs++;
1727    if (LIKELY(tag == cache_shmem.tags0[wix])) {
1728       return &cache_shmem.lyns0[wix];
1729    } else {
1730       return get_cacheline_MISS( a );
1731    }
1732 }
1733 
1734 static __attribute__((noinline))
get_cacheline_MISS(Addr a)1735        CacheLine* get_cacheline_MISS ( Addr a )
1736 {
1737    /* tag is 'a' with the in-line offset masked out,
1738       eg a[31]..a[4] 0000 */
1739 
1740    CacheLine* cl;
1741    Addr*      tag_old_p;
1742    Addr       tag = a & ~(N_LINE_ARANGE - 1);
1743    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1744 
1745    tl_assert(tag != cache_shmem.tags0[wix]);
1746 
1747    /* Dump the old line into the backing store. */
1748    stats__cache_totmisses++;
1749 
1750    cl        = &cache_shmem.lyns0[wix];
1751    tag_old_p = &cache_shmem.tags0[wix];
1752 
1753    if (is_valid_scache_tag( *tag_old_p )) {
1754       /* EXPENSIVE and REDUNDANT: callee does it */
1755       if (CHECK_ZSM)
1756          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1757       cacheline_wback( wix );
1758    }
1759    /* and reload the new one */
1760    *tag_old_p = tag;
1761    cacheline_fetch( wix );
1762    if (CHECK_ZSM)
1763       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1764    return cl;
1765 }
1766 
pulldown_to_32(SVal * tree,UWord toff,UShort descr)1767 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1768    stats__cline_64to32pulldown++;
1769    switch (toff) {
1770       case 0: case 4:
1771          tl_assert(descr & TREE_DESCR_64);
1772          tree[4] = tree[0];
1773          descr &= ~TREE_DESCR_64;
1774          descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1775          break;
1776       default:
1777          tl_assert(0);
1778    }
1779    return descr;
1780 }
1781 
pulldown_to_16(SVal * tree,UWord toff,UShort descr)1782 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1783    stats__cline_32to16pulldown++;
1784    switch (toff) {
1785       case 0: case 2:
1786          if (!(descr & TREE_DESCR_32_0)) {
1787             descr = pulldown_to_32(tree, 0, descr);
1788          }
1789          tl_assert(descr & TREE_DESCR_32_0);
1790          tree[2] = tree[0];
1791          descr &= ~TREE_DESCR_32_0;
1792          descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1793          break;
1794       case 4: case 6:
1795          if (!(descr & TREE_DESCR_32_1)) {
1796             descr = pulldown_to_32(tree, 4, descr);
1797          }
1798          tl_assert(descr & TREE_DESCR_32_1);
1799          tree[6] = tree[4];
1800          descr &= ~TREE_DESCR_32_1;
1801          descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1802          break;
1803       default:
1804          tl_assert(0);
1805    }
1806    return descr;
1807 }
1808 
pulldown_to_8(SVal * tree,UWord toff,UShort descr)1809 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1810    stats__cline_16to8pulldown++;
1811    switch (toff) {
1812       case 0: case 1:
1813          if (!(descr & TREE_DESCR_16_0)) {
1814             descr = pulldown_to_16(tree, 0, descr);
1815          }
1816          tl_assert(descr & TREE_DESCR_16_0);
1817          tree[1] = tree[0];
1818          descr &= ~TREE_DESCR_16_0;
1819          descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1820          break;
1821       case 2: case 3:
1822          if (!(descr & TREE_DESCR_16_1)) {
1823             descr = pulldown_to_16(tree, 2, descr);
1824          }
1825          tl_assert(descr & TREE_DESCR_16_1);
1826          tree[3] = tree[2];
1827          descr &= ~TREE_DESCR_16_1;
1828          descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1829          break;
1830       case 4: case 5:
1831          if (!(descr & TREE_DESCR_16_2)) {
1832             descr = pulldown_to_16(tree, 4, descr);
1833          }
1834          tl_assert(descr & TREE_DESCR_16_2);
1835          tree[5] = tree[4];
1836          descr &= ~TREE_DESCR_16_2;
1837          descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1838          break;
1839       case 6: case 7:
1840          if (!(descr & TREE_DESCR_16_3)) {
1841             descr = pulldown_to_16(tree, 6, descr);
1842          }
1843          tl_assert(descr & TREE_DESCR_16_3);
1844          tree[7] = tree[6];
1845          descr &= ~TREE_DESCR_16_3;
1846          descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1847          break;
1848       default:
1849          tl_assert(0);
1850    }
1851    return descr;
1852 }
1853 
1854 
pullup_descr_to_16(UShort descr,UWord toff)1855 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1856    UShort mask;
1857    switch (toff) {
1858       case 0:
1859          mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1860          tl_assert( (descr & mask) == mask );
1861          descr &= ~mask;
1862          descr |= TREE_DESCR_16_0;
1863          break;
1864       case 2:
1865          mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1866          tl_assert( (descr & mask) == mask );
1867          descr &= ~mask;
1868          descr |= TREE_DESCR_16_1;
1869          break;
1870       case 4:
1871          mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1872          tl_assert( (descr & mask) == mask );
1873          descr &= ~mask;
1874          descr |= TREE_DESCR_16_2;
1875          break;
1876       case 6:
1877          mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1878          tl_assert( (descr & mask) == mask );
1879          descr &= ~mask;
1880          descr |= TREE_DESCR_16_3;
1881          break;
1882       default:
1883          tl_assert(0);
1884    }
1885    return descr;
1886 }
1887 
pullup_descr_to_32(UShort descr,UWord toff)1888 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1889    UShort mask;
1890    switch (toff) {
1891       case 0:
1892          if (!(descr & TREE_DESCR_16_0))
1893             descr = pullup_descr_to_16(descr, 0);
1894          if (!(descr & TREE_DESCR_16_1))
1895             descr = pullup_descr_to_16(descr, 2);
1896          mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1897          tl_assert( (descr & mask) == mask );
1898          descr &= ~mask;
1899          descr |= TREE_DESCR_32_0;
1900          break;
1901       case 4:
1902          if (!(descr & TREE_DESCR_16_2))
1903             descr = pullup_descr_to_16(descr, 4);
1904          if (!(descr & TREE_DESCR_16_3))
1905             descr = pullup_descr_to_16(descr, 6);
1906          mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1907          tl_assert( (descr & mask) == mask );
1908          descr &= ~mask;
1909          descr |= TREE_DESCR_32_1;
1910          break;
1911       default:
1912          tl_assert(0);
1913    }
1914    return descr;
1915 }
1916 
valid_value_is_above_me_32(UShort descr,UWord toff)1917 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1918    switch (toff) {
1919       case 0: case 4:
1920          return 0 != (descr & TREE_DESCR_64);
1921       default:
1922          tl_assert(0);
1923    }
1924 }
1925 
valid_value_is_below_me_16(UShort descr,UWord toff)1926 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1927    switch (toff) {
1928       case 0:
1929          return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1930       case 2:
1931          return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1932       case 4:
1933          return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1934       case 6:
1935          return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1936       default:
1937          tl_assert(0);
1938    }
1939 }
1940 
1941 /* ------------ Cache management ------------ */
1942 
zsm_flush_cache(void)1943 static void zsm_flush_cache ( void )
1944 {
1945    shmem__flush_and_invalidate_scache();
1946 }
1947 
1948 
zsm_init(void)1949 static void zsm_init ( void )
1950 {
1951    tl_assert( sizeof(UWord) == sizeof(Addr) );
1952 
1953    tl_assert(map_shmem == NULL);
1954    map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1955                            HG_(free),
1956                            NULL/*unboxed UWord cmp*/);
1957    /* Invalidate all cache entries. */
1958    tl_assert(!is_valid_scache_tag(1));
1959    for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1960       cache_shmem.tags0[wix] = 1/*INVALID*/;
1961    }
1962 
1963    LineF_pool_allocator = VG_(newPA) (
1964                              sizeof(LineF),
1965                              /* Nr elements/pool to fill a core arena block
1966                                 taking some arena overhead into account. */
1967                              (4 * 1024 * 1024 - 200)/sizeof(LineF),
1968                              HG_(zalloc),
1969                              "libhb.LineF_storage.pool",
1970                              HG_(free)
1971                           );
1972 
1973    /* a SecMap must contain an integral number of CacheLines */
1974    tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1975    /* also ... a CacheLine holds an integral number of trees */
1976    tl_assert(0 == (N_LINE_ARANGE % 8));
1977 }
1978 
1979 /////////////////////////////////////////////////////////////////
1980 /////////////////////////////////////////////////////////////////
1981 //                                                             //
1982 // SECTION END compressed shadow memory                        //
1983 //                                                             //
1984 /////////////////////////////////////////////////////////////////
1985 /////////////////////////////////////////////////////////////////
1986 
1987 
1988 
1989 /////////////////////////////////////////////////////////////////
1990 /////////////////////////////////////////////////////////////////
1991 //                                                             //
1992 // SECTION BEGIN vts primitives                                //
1993 //                                                             //
1994 /////////////////////////////////////////////////////////////////
1995 /////////////////////////////////////////////////////////////////
1996 
1997 
1998 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1999    being compact stand-ins for Thr*'s.  Use these functions to map
2000    between them. */
2001 static ThrID Thr__to_ThrID   ( Thr*  thr   ); /* fwds */
2002 static Thr*  Thr__from_ThrID ( ThrID thrid ); /* fwds */
2003 
2004 __attribute__((noreturn))
scalarts_limitations_fail_NORETURN(Bool due_to_nThrs)2005 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2006 {
2007    if (due_to_nThrs) {
2008       const HChar* s =
2009          "\n"
2010          "Helgrind: cannot continue, run aborted: too many threads.\n"
2011          "Sorry.  Helgrind can only handle programs that create\n"
2012          "%'llu or fewer threads over their entire lifetime.\n"
2013          "\n";
2014       VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2015    } else {
2016       const HChar* s =
2017          "\n"
2018          "Helgrind: cannot continue, run aborted: too many\n"
2019          "synchronisation events.  Sorry. Helgrind can only handle\n"
2020          "programs which perform %'llu or fewer\n"
2021          "inter-thread synchronisation events (locks, unlocks, etc).\n"
2022          "\n";
2023       VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2024    }
2025    VG_(exit)(1);
2026    /*NOTREACHED*/
2027    tl_assert(0); /*wtf?!*/
2028 }
2029 
2030 
2031 /* The dead thread (ThrID, actually) tables.  A thread may only be
2032    listed here if we have been notified thereof by libhb_async_exit.
2033    New entries are added at the end.  The order isn't important, but
2034    the ThrID values must be unique.
2035    verydead_thread_table_not_pruned lists the identity of the threads
2036    that died since the previous round of pruning.
2037    Once pruning is done, these ThrID are added in verydead_thread_table.
2038    We don't actually need to keep the set of threads that have ever died --
2039    only the threads that have died since the previous round of
2040    pruning.  But it's useful for sanity check purposes to keep the
2041    entire set, so we do. */
2042 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2043 static XArray* /* of ThrID */ verydead_thread_table = NULL;
2044 
2045 /* Arbitrary total ordering on ThrIDs. */
cmp__ThrID(const void * v1,const void * v2)2046 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2047    ThrID id1 = *(const ThrID*)v1;
2048    ThrID id2 = *(const ThrID*)v2;
2049    if (id1 < id2) return -1;
2050    if (id1 > id2) return 1;
2051    return 0;
2052 }
2053 
verydead_thread_tables_init(void)2054 static void verydead_thread_tables_init ( void )
2055 {
2056    tl_assert(!verydead_thread_table);
2057    tl_assert(!verydead_thread_table_not_pruned);
2058    verydead_thread_table
2059      = VG_(newXA)( HG_(zalloc),
2060                    "libhb.verydead_thread_table_init.1",
2061                    HG_(free), sizeof(ThrID) );
2062    VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2063    verydead_thread_table_not_pruned
2064      = VG_(newXA)( HG_(zalloc),
2065                    "libhb.verydead_thread_table_init.2",
2066                    HG_(free), sizeof(ThrID) );
2067    VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2068 }
2069 
verydead_thread_table_sort_and_check(XArray * thrids)2070 static void verydead_thread_table_sort_and_check (XArray* thrids)
2071 {
2072    UWord i;
2073 
2074    VG_(sortXA)( thrids );
2075    /* Sanity check: check for unique .sts.thr values. */
2076    UWord nBT = VG_(sizeXA)( thrids );
2077    if (nBT > 0) {
2078       ThrID thrid1, thrid2;
2079       thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2080       for (i = 1; i < nBT; i++) {
2081          thrid1 = thrid2;
2082          thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2083          tl_assert(thrid1 < thrid2);
2084       }
2085    }
2086    /* Ok, so the dead thread table thrids has unique and in-order keys. */
2087 }
2088 
2089 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
2090    a backlink for the caller's convenience.  Since we have no idea
2091    what to set that to in the library, it always gets set to
2092    VtsID_INVALID. */
2093 typedef
2094    struct {
2095       VtsID    id;
2096       UInt     usedTS;
2097       UInt     sizeTS;
2098       ScalarTS ts[0];
2099    }
2100    VTS;
2101 
2102 /* Allocate a VTS capable of storing 'sizeTS' entries. */
2103 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
2104 
2105 /* Make a clone of 'vts', sizing the new array to exactly match the
2106    number of ScalarTSs present. */
2107 static VTS* VTS__clone ( const HChar* who, VTS* vts );
2108 
2109 /* Make a clone of 'vts' with the thrids in 'thrids' removed.  The new
2110    array is sized exactly to hold the number of required elements.
2111    'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2112    must be in strictly increasing order. */
2113 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2114 
2115 /* Delete this VTS in its entirety. */
2116 static void VTS__delete ( VTS* vts );
2117 
2118 /* Create a new singleton VTS in 'out'.  Caller must have
2119    pre-allocated 'out' sufficiently big to hold the result in all
2120    possible cases. */
2121 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2122 
2123 /* Create in 'out' a VTS which is the same as 'vts' except with
2124    vts[me]++, so to speak.  Caller must have pre-allocated 'out'
2125    sufficiently big to hold the result in all possible cases. */
2126 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2127 
2128 /* Create in 'out' a VTS which is the join (max) of 'a' and
2129    'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2130    the result in all possible cases. */
2131 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2132 
2133 /* Compute the partial ordering relation of the two args.  Although we
2134    could be completely general and return an enumeration value (EQ,
2135    LT, GT, UN), in fact we only need LEQ, and so we may as well
2136    hardwire that fact.
2137 
2138    Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2139    invald ThrID).  In the latter case, the returned ThrID indicates
2140    the discovered point for which they are not.  There may be more
2141    than one such point, but we only care about seeing one of them, not
2142    all of them.  This rather strange convention is used because
2143    sometimes we want to know the actual index at which they first
2144    differ. */
2145 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2146 
2147 /* Compute an arbitrary structural (total) ordering on the two args,
2148    based on their VCs, so they can be looked up in a table, tree, etc.
2149    Returns -1, 0 or 1. */
2150 static Word VTS__cmp_structural ( VTS* a, VTS* b );
2151 
2152 /* Debugging only.  Display the given VTS. */
2153 static void VTS__show ( const VTS* vts );
2154 
2155 /* Debugging only.  Return vts[index], so to speak. */
2156 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2157 
2158 /* Notify the VTS machinery that a thread has been declared
2159    comprehensively dead: that is, it has done an async exit AND it has
2160    been joined with.  This should ensure that its local clocks (.viR
2161    and .viW) will never again change, and so all mentions of this
2162    thread from all VTSs in the system may be removed. */
2163 static void VTS__declare_thread_very_dead ( Thr* idx );
2164 
2165 /*--------------- to do with Vector Timestamps ---------------*/
2166 
is_sane_VTS(VTS * vts)2167 static Bool is_sane_VTS ( VTS* vts )
2168 {
2169    UWord     i, n;
2170    ScalarTS  *st1, *st2;
2171    if (!vts) return False;
2172    if (vts->usedTS > vts->sizeTS) return False;
2173    n = vts->usedTS;
2174    if (n == 1) {
2175       st1 = &vts->ts[0];
2176       if (st1->tym == 0)
2177          return False;
2178    }
2179    else
2180    if (n >= 2) {
2181       for (i = 0; i < n-1; i++) {
2182          st1 = &vts->ts[i];
2183          st2 = &vts->ts[i+1];
2184          if (st1->thrid >= st2->thrid)
2185             return False;
2186          if (st1->tym == 0 || st2->tym == 0)
2187             return False;
2188       }
2189    }
2190    return True;
2191 }
2192 
2193 
2194 /* Create a new, empty VTS.
2195 */
VTS__new(const HChar * who,UInt sizeTS)2196 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
2197 {
2198    VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2199    tl_assert(vts->usedTS == 0);
2200    vts->sizeTS = sizeTS;
2201    *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
2202    return vts;
2203 }
2204 
2205 /* Clone this VTS.
2206 */
VTS__clone(const HChar * who,VTS * vts)2207 static VTS* VTS__clone ( const HChar* who, VTS* vts )
2208 {
2209    tl_assert(vts);
2210    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2211    UInt nTS = vts->usedTS;
2212    VTS* clone = VTS__new(who, nTS);
2213    clone->id = vts->id;
2214    clone->sizeTS = nTS;
2215    clone->usedTS = nTS;
2216    UInt i;
2217    for (i = 0; i < nTS; i++) {
2218       clone->ts[i] = vts->ts[i];
2219    }
2220    tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2221    return clone;
2222 }
2223 
2224 
2225 /* Make a clone of a VTS with specified ThrIDs removed.  'thridsToDel'
2226    must be in strictly increasing order.  We could obviously do this
2227    much more efficiently (in linear time) if necessary.
2228 */
VTS__subtract(const HChar * who,VTS * vts,XArray * thridsToDel)2229 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2230 {
2231    UInt i, j;
2232    tl_assert(vts);
2233    tl_assert(thridsToDel);
2234    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2235    UInt nTS = vts->usedTS;
2236    /* Figure out how many ScalarTSs will remain in the output. */
2237    UInt nReq = nTS;
2238    for (i = 0; i < nTS; i++) {
2239       ThrID thrid = vts->ts[i].thrid;
2240       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2241          nReq--;
2242    }
2243    tl_assert(nReq <= nTS);
2244    /* Copy the ones that will remain. */
2245    VTS* res = VTS__new(who, nReq);
2246    j = 0;
2247    for (i = 0; i < nTS; i++) {
2248       ThrID thrid = vts->ts[i].thrid;
2249       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2250          continue;
2251       res->ts[j++] = vts->ts[i];
2252    }
2253    tl_assert(j == nReq);
2254    tl_assert(j == res->sizeTS);
2255    res->usedTS = j;
2256    tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2257    return res;
2258 }
2259 
2260 
2261 /* Delete this VTS in its entirety.
2262 */
VTS__delete(VTS * vts)2263 static void VTS__delete ( VTS* vts )
2264 {
2265    tl_assert(vts);
2266    tl_assert(vts->usedTS <= vts->sizeTS);
2267    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2268    HG_(free)(vts);
2269 }
2270 
2271 
2272 /* Create a new singleton VTS.
2273 */
VTS__singleton(VTS * out,Thr * thr,ULong tym)2274 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2275 {
2276    tl_assert(thr);
2277    tl_assert(tym >= 1);
2278    tl_assert(out);
2279    tl_assert(out->usedTS == 0);
2280    tl_assert(out->sizeTS >= 1);
2281    UInt hi = out->usedTS++;
2282    out->ts[hi].thrid = Thr__to_ThrID(thr);
2283    out->ts[hi].tym   = tym;
2284 }
2285 
2286 
2287 /* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
2288    not modified.
2289 */
VTS__tick(VTS * out,Thr * me,VTS * vts)2290 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2291 {
2292    UInt      i, n;
2293    ThrID     me_thrid;
2294    Bool      found = False;
2295 
2296    stats__vts__tick++;
2297 
2298    tl_assert(out);
2299    tl_assert(out->usedTS == 0);
2300    if (vts->usedTS >= ThrID_MAX_VALID)
2301       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2302    tl_assert(out->sizeTS >= 1 + vts->usedTS);
2303 
2304    tl_assert(me);
2305    me_thrid = Thr__to_ThrID(me);
2306    tl_assert(is_sane_VTS(vts));
2307    n = vts->usedTS;
2308 
2309    /* Copy all entries which precede 'me'. */
2310    for (i = 0; i < n; i++) {
2311       ScalarTS* here = &vts->ts[i];
2312       if (UNLIKELY(here->thrid >= me_thrid))
2313          break;
2314       UInt hi = out->usedTS++;
2315       out->ts[hi] = *here;
2316    }
2317 
2318    /* 'i' now indicates the next entry to copy, if any.
2319        There are 3 possibilities:
2320        (a) there is no next entry (we used them all up already):
2321            add (me_thrid,1) to the output, and quit
2322        (b) there is a next entry, and its thrid > me_thrid:
2323            add (me_thrid,1) to the output, then copy the remaining entries
2324        (c) there is a next entry, and its thrid == me_thrid:
2325            copy it to the output but increment its timestamp value.
2326            Then copy the remaining entries.  (c) is the common case.
2327    */
2328    tl_assert(i >= 0 && i <= n);
2329    if (i == n) { /* case (a) */
2330       UInt hi = out->usedTS++;
2331       out->ts[hi].thrid = me_thrid;
2332       out->ts[hi].tym   = 1;
2333    } else {
2334       /* cases (b) and (c) */
2335       ScalarTS* here = &vts->ts[i];
2336       if (me_thrid == here->thrid) { /* case (c) */
2337          if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2338             /* We're hosed.  We have to stop. */
2339             scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2340          }
2341          UInt hi = out->usedTS++;
2342          out->ts[hi].thrid = here->thrid;
2343          out->ts[hi].tym   = here->tym + 1;
2344          i++;
2345          found = True;
2346       } else { /* case (b) */
2347          UInt hi = out->usedTS++;
2348          out->ts[hi].thrid = me_thrid;
2349          out->ts[hi].tym   = 1;
2350       }
2351       /* And copy any remaining entries. */
2352       for (/*keepgoing*/; i < n; i++) {
2353          ScalarTS* here2 = &vts->ts[i];
2354          UInt hi = out->usedTS++;
2355          out->ts[hi] = *here2;
2356       }
2357    }
2358 
2359    tl_assert(is_sane_VTS(out));
2360    tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2361    tl_assert(out->usedTS <= out->sizeTS);
2362 }
2363 
2364 
2365 /* Return a new VTS constructed as the join (max) of the 2 args.
2366    Neither arg is modified.
2367 */
VTS__join(VTS * out,VTS * a,VTS * b)2368 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2369 {
2370    UInt     ia, ib, useda, usedb;
2371    ULong    tyma, tymb, tymMax;
2372    ThrID    thrid;
2373    UInt     ncommon = 0;
2374 
2375    stats__vts__join++;
2376 
2377    tl_assert(a);
2378    tl_assert(b);
2379    useda = a->usedTS;
2380    usedb = b->usedTS;
2381 
2382    tl_assert(out);
2383    tl_assert(out->usedTS == 0);
2384    /* overly conservative test, but doing better involves comparing
2385       the two VTSs, which we don't want to do at this point. */
2386    if (useda + usedb >= ThrID_MAX_VALID)
2387       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2388    tl_assert(out->sizeTS >= useda + usedb);
2389 
2390    ia = ib = 0;
2391 
2392    while (1) {
2393 
2394       /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2395          from a and b in order, where thrid is the next ThrID
2396          occurring in either a or b, and tyma/b are the relevant
2397          scalar timestamps, taking into account implicit zeroes. */
2398       tl_assert(ia >= 0 && ia <= useda);
2399       tl_assert(ib >= 0 && ib <= usedb);
2400 
2401       if        (ia == useda && ib == usedb) {
2402          /* both empty - done */
2403          break;
2404 
2405       } else if (ia == useda && ib != usedb) {
2406          /* a empty, use up b */
2407          ScalarTS* tmpb = &b->ts[ib];
2408          thrid = tmpb->thrid;
2409          tyma  = 0;
2410          tymb  = tmpb->tym;
2411          ib++;
2412 
2413       } else if (ia != useda && ib == usedb) {
2414          /* b empty, use up a */
2415          ScalarTS* tmpa = &a->ts[ia];
2416          thrid = tmpa->thrid;
2417          tyma  = tmpa->tym;
2418          tymb  = 0;
2419          ia++;
2420 
2421       } else {
2422          /* both not empty; extract lowest-ThrID'd triple */
2423          ScalarTS* tmpa = &a->ts[ia];
2424          ScalarTS* tmpb = &b->ts[ib];
2425          if (tmpa->thrid < tmpb->thrid) {
2426             /* a has the lowest unconsidered ThrID */
2427             thrid = tmpa->thrid;
2428             tyma  = tmpa->tym;
2429             tymb  = 0;
2430             ia++;
2431          } else if (tmpa->thrid > tmpb->thrid) {
2432             /* b has the lowest unconsidered ThrID */
2433             thrid = tmpb->thrid;
2434             tyma  = 0;
2435             tymb  = tmpb->tym;
2436             ib++;
2437          } else {
2438             /* they both next mention the same ThrID */
2439             tl_assert(tmpa->thrid == tmpb->thrid);
2440             thrid = tmpa->thrid; /* == tmpb->thrid */
2441             tyma  = tmpa->tym;
2442             tymb  = tmpb->tym;
2443             ia++;
2444             ib++;
2445             ncommon++;
2446          }
2447       }
2448 
2449       /* having laboriously determined (thr, tyma, tymb), do something
2450          useful with it. */
2451       tymMax = tyma > tymb ? tyma : tymb;
2452       if (tymMax > 0) {
2453          UInt hi = out->usedTS++;
2454          out->ts[hi].thrid = thrid;
2455          out->ts[hi].tym   = tymMax;
2456       }
2457 
2458    }
2459 
2460    tl_assert(is_sane_VTS(out));
2461    tl_assert(out->usedTS <= out->sizeTS);
2462    tl_assert(out->usedTS == useda + usedb - ncommon);
2463 }
2464 
2465 
2466 /* Determine if 'a' <= 'b', in the partial ordering.  Returns zero if
2467    they are, or the first ThrID for which they are not (no valid ThrID
2468    has the value zero).  This rather strange convention is used
2469    because sometimes we want to know the actual index at which they
2470    first differ. */
VTS__cmpLEQ(VTS * a,VTS * b)2471 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2472 {
2473    Word  ia, ib, useda, usedb;
2474    ULong tyma, tymb;
2475 
2476    stats__vts__cmpLEQ++;
2477 
2478    tl_assert(a);
2479    tl_assert(b);
2480    useda = a->usedTS;
2481    usedb = b->usedTS;
2482 
2483    ia = ib = 0;
2484 
2485    while (1) {
2486 
2487       /* This logic is to enumerate doubles (tyma, tymb) drawn
2488          from a and b in order, and tyma/b are the relevant
2489          scalar timestamps, taking into account implicit zeroes. */
2490       ThrID thrid;
2491 
2492       tl_assert(ia >= 0 && ia <= useda);
2493       tl_assert(ib >= 0 && ib <= usedb);
2494 
2495       if        (ia == useda && ib == usedb) {
2496          /* both empty - done */
2497          break;
2498 
2499       } else if (ia == useda && ib != usedb) {
2500          /* a empty, use up b */
2501          ScalarTS* tmpb = &b->ts[ib];
2502          tyma  = 0;
2503          tymb  = tmpb->tym;
2504          thrid = tmpb->thrid;
2505          ib++;
2506 
2507       } else if (ia != useda && ib == usedb) {
2508          /* b empty, use up a */
2509          ScalarTS* tmpa = &a->ts[ia];
2510          tyma  = tmpa->tym;
2511          thrid = tmpa->thrid;
2512          tymb  = 0;
2513          ia++;
2514 
2515       } else {
2516          /* both not empty; extract lowest-ThrID'd triple */
2517          ScalarTS* tmpa = &a->ts[ia];
2518          ScalarTS* tmpb = &b->ts[ib];
2519          if (tmpa->thrid < tmpb->thrid) {
2520             /* a has the lowest unconsidered ThrID */
2521             tyma  = tmpa->tym;
2522             thrid = tmpa->thrid;
2523             tymb  = 0;
2524             ia++;
2525          }
2526          else
2527          if (tmpa->thrid > tmpb->thrid) {
2528             /* b has the lowest unconsidered ThrID */
2529             tyma  = 0;
2530             tymb  = tmpb->tym;
2531             thrid = tmpb->thrid;
2532             ib++;
2533          } else {
2534             /* they both next mention the same ThrID */
2535             tl_assert(tmpa->thrid == tmpb->thrid);
2536             tyma  = tmpa->tym;
2537             thrid = tmpa->thrid;
2538             tymb  = tmpb->tym;
2539             ia++;
2540             ib++;
2541          }
2542       }
2543 
2544       /* having laboriously determined (tyma, tymb), do something
2545          useful with it. */
2546       if (tyma > tymb) {
2547          /* not LEQ at this index.  Quit, since the answer is
2548             determined already. */
2549          tl_assert(thrid >= 1024);
2550          return thrid;
2551       }
2552    }
2553 
2554    return 0; /* all points are LEQ => return an invalid ThrID */
2555 }
2556 
2557 
2558 /* Compute an arbitrary structural (total) ordering on the two args,
2559    based on their VCs, so they can be looked up in a table, tree, etc.
2560    Returns -1, 0 or 1.  (really just 'deriving Ord' :-) This can be
2561    performance critical so there is some effort expended to make it sa
2562    fast as possible.
2563 */
VTS__cmp_structural(VTS * a,VTS * b)2564 Word VTS__cmp_structural ( VTS* a, VTS* b )
2565 {
2566    /* We just need to generate an arbitrary total ordering based on
2567       a->ts and b->ts.  Preferably do it in a way which comes across likely
2568       differences relatively quickly. */
2569    Word     i;
2570    Word     useda = 0,    usedb = 0;
2571    ScalarTS *ctsa = NULL, *ctsb = NULL;
2572 
2573    stats__vts__cmp_structural++;
2574 
2575    tl_assert(a);
2576    tl_assert(b);
2577 
2578    ctsa = &a->ts[0]; useda = a->usedTS;
2579    ctsb = &b->ts[0]; usedb = b->usedTS;
2580 
2581    if (LIKELY(useda == usedb)) {
2582       ScalarTS *tmpa = NULL, *tmpb = NULL;
2583       stats__vts__cmp_structural_slow++;
2584       /* Same length vectors.  Find the first difference, if any, as
2585          fast as possible. */
2586       for (i = 0; i < useda; i++) {
2587          tmpa = &ctsa[i];
2588          tmpb = &ctsb[i];
2589          if (LIKELY(tmpa->tym == tmpb->tym
2590                     && tmpa->thrid == tmpb->thrid))
2591             continue;
2592          else
2593             break;
2594       }
2595       if (UNLIKELY(i == useda)) {
2596          /* They're identical. */
2597          return 0;
2598       } else {
2599          tl_assert(i >= 0 && i < useda);
2600          if (tmpa->tym < tmpb->tym) return -1;
2601          if (tmpa->tym > tmpb->tym) return 1;
2602          if (tmpa->thrid < tmpb->thrid) return -1;
2603          if (tmpa->thrid > tmpb->thrid) return 1;
2604          /* we just established them as non-identical, hence: */
2605       }
2606       /*NOTREACHED*/
2607       tl_assert(0);
2608    }
2609 
2610    if (useda < usedb) return -1;
2611    if (useda > usedb) return 1;
2612    /*NOTREACHED*/
2613    tl_assert(0);
2614 }
2615 
2616 
2617 /* Debugging only.  Display the given VTS.
2618 */
VTS__show(const VTS * vts)2619 static void VTS__show ( const VTS* vts )
2620 {
2621    Word      i, n;
2622    tl_assert(vts);
2623 
2624    VG_(printf)("[");
2625    n =  vts->usedTS;
2626    for (i = 0; i < n; i++) {
2627       const ScalarTS *st = &vts->ts[i];
2628       VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2629    }
2630    VG_(printf)("]");
2631 }
2632 
2633 
2634 /* Debugging only.  Return vts[index], so to speak.
2635 */
VTS__indexAt_SLOW(VTS * vts,Thr * idx)2636 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2637 {
2638    UWord i, n;
2639    ThrID idx_thrid = Thr__to_ThrID(idx);
2640    stats__vts__indexat_slow++;
2641    tl_assert(vts);
2642    n = vts->usedTS;
2643    for (i = 0; i < n; i++) {
2644       ScalarTS* st = &vts->ts[i];
2645       if (st->thrid == idx_thrid)
2646          return st->tym;
2647    }
2648    return 0;
2649 }
2650 
2651 
2652 /* See comment on prototype above.
2653 */
VTS__declare_thread_very_dead(Thr * thr)2654 static void VTS__declare_thread_very_dead ( Thr* thr )
2655 {
2656    if (0) VG_(printf)("VTQ:  tae %p\n", thr);
2657 
2658    tl_assert(thr->llexit_done);
2659    tl_assert(thr->joinedwith_done);
2660 
2661    ThrID nyu;
2662    nyu = Thr__to_ThrID(thr);
2663    VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2664 
2665    /* We can only get here if we're assured that we'll never again
2666       need to look at this thread's ::viR or ::viW.  Set them to
2667       VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2668       mostly so that we don't wind up pruning them (as that would be
2669       nonsensical: the only interesting ScalarTS entry for a dead
2670       thread is its own index, and the pruning will remove that.). */
2671    VtsID__rcdec(thr->viR);
2672    VtsID__rcdec(thr->viW);
2673    thr->viR = VtsID_INVALID;
2674    thr->viW = VtsID_INVALID;
2675 }
2676 
2677 
2678 /////////////////////////////////////////////////////////////////
2679 /////////////////////////////////////////////////////////////////
2680 //                                                             //
2681 // SECTION END vts primitives                                  //
2682 //                                                             //
2683 /////////////////////////////////////////////////////////////////
2684 /////////////////////////////////////////////////////////////////
2685 
2686 
2687 
2688 /////////////////////////////////////////////////////////////////
2689 /////////////////////////////////////////////////////////////////
2690 //                                                             //
2691 // SECTION BEGIN main library                                  //
2692 //                                                             //
2693 /////////////////////////////////////////////////////////////////
2694 /////////////////////////////////////////////////////////////////
2695 
2696 
2697 /////////////////////////////////////////////////////////
2698 //                                                     //
2699 // VTS set                                             //
2700 //                                                     //
2701 /////////////////////////////////////////////////////////
2702 
2703 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2704 
vts_set_init(void)2705 static void vts_set_init ( void )
2706 {
2707    tl_assert(!vts_set);
2708    vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2709                          HG_(free),
2710                          (Word(*)(UWord,UWord))VTS__cmp_structural );
2711 }
2712 
2713 /* Given a VTS, look in vts_set to see if we already have a
2714    structurally identical one.  If yes, return the pair (True, pointer
2715    to the existing one).  If no, clone this one, add the clone to the
2716    set, and return (False, pointer to the clone). */
vts_set__find__or__clone_and_add(VTS ** res,VTS * cand)2717 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2718 {
2719    UWord keyW, valW;
2720    stats__vts_set__focaa++;
2721    tl_assert(cand->id == VtsID_INVALID);
2722    /* lookup cand (by value) */
2723    if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2724       /* found it */
2725       tl_assert(valW == 0);
2726       /* if this fails, cand (by ref) was already present (!) */
2727       tl_assert(keyW != (UWord)cand);
2728       *res = (VTS*)keyW;
2729       return True;
2730    } else {
2731       /* not present.  Clone, add and return address of clone. */
2732       stats__vts_set__focaa_a++;
2733       VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2734       tl_assert(clone != cand);
2735       VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2736       *res = clone;
2737       return False;
2738    }
2739 }
2740 
2741 
2742 /////////////////////////////////////////////////////////
2743 //                                                     //
2744 // VTS table                                           //
2745 //                                                     //
2746 /////////////////////////////////////////////////////////
2747 
2748 static void VtsID__invalidate_caches ( void ); /* fwds */
2749 
2750 /* A type to hold VTS table entries.  Invariants:
2751    If .vts == NULL, then this entry is not in use, so:
2752    - .rc == 0
2753    - this entry is on the freelist (unfortunately, does not imply
2754      any constraints on value for u.freelink)
2755    If .vts != NULL, then this entry is in use:
2756    - .vts is findable in vts_set
2757    - .vts->id == this entry number
2758    - no specific value for .rc (even 0 is OK)
2759    - this entry is not on freelist, so u.freelink == VtsID_INVALID
2760 */
2761 typedef
2762    struct {
2763       VTS*  vts;      /* vts, in vts_set */
2764       UWord rc;       /* reference count - enough for entire aspace */
2765       union {
2766          VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2767          VtsID remap;    /* used only during pruning, for used entries */
2768       } u;
2769       /* u.freelink only used when vts == NULL,
2770          u.remap only used when vts != NULL, during pruning. */
2771    }
2772    VtsTE;
2773 
2774 /* The VTS table. */
2775 static XArray* /* of VtsTE */ vts_tab = NULL;
2776 
2777 /* An index into the VTS table, indicating the start of the list of
2778    free (available for use) entries.  If the list is empty, this is
2779    VtsID_INVALID. */
2780 static VtsID vts_tab_freelist = VtsID_INVALID;
2781 
2782 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2783    vts_tab equals or exceeds this size.  After GC, the value here is
2784    set appropriately so as to check for the next GC point. */
2785 static Word vts_next_GC_at = 1000;
2786 
vts_tab_init(void)2787 static void vts_tab_init ( void )
2788 {
2789    vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2790                          HG_(free), sizeof(VtsTE) );
2791    vts_tab_freelist = VtsID_INVALID;
2792 }
2793 
2794 /* Add ii to the free list, checking that it looks out-of-use. */
add_to_free_list(VtsID ii)2795 static void add_to_free_list ( VtsID ii )
2796 {
2797    VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2798    tl_assert(ie->vts == NULL);
2799    tl_assert(ie->rc == 0);
2800    tl_assert(ie->u.freelink == VtsID_INVALID);
2801    ie->u.freelink = vts_tab_freelist;
2802    vts_tab_freelist = ii;
2803 }
2804 
2805 /* Get an entry from the free list.  This will return VtsID_INVALID if
2806    the free list is empty. */
get_from_free_list(void)2807 static VtsID get_from_free_list ( void )
2808 {
2809    VtsID  ii;
2810    VtsTE* ie;
2811    if (vts_tab_freelist == VtsID_INVALID)
2812       return VtsID_INVALID;
2813    ii = vts_tab_freelist;
2814    ie = VG_(indexXA)( vts_tab, ii );
2815    tl_assert(ie->vts == NULL);
2816    tl_assert(ie->rc == 0);
2817    vts_tab_freelist = ie->u.freelink;
2818    return ii;
2819 }
2820 
2821 /* Produce a new VtsID that can be used, either by getting it from
2822    the freelist, or, if that is empty, by expanding vts_tab. */
get_new_VtsID(void)2823 static VtsID get_new_VtsID ( void )
2824 {
2825    VtsID ii;
2826    VtsTE te;
2827    ii = get_from_free_list();
2828    if (ii != VtsID_INVALID)
2829       return ii;
2830    te.vts = NULL;
2831    te.rc = 0;
2832    te.u.freelink = VtsID_INVALID;
2833    ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2834    return ii;
2835 }
2836 
2837 
2838 /* Indirect callback from lib_zsm. */
VtsID__rcinc(VtsID ii)2839 static void VtsID__rcinc ( VtsID ii )
2840 {
2841    VtsTE* ie;
2842    /* VG_(indexXA) does a range check for us */
2843    ie = VG_(indexXA)( vts_tab, ii );
2844    tl_assert(ie->vts); /* else it's not in use */
2845    tl_assert(ie->rc < ~0UL); /* else we can't continue */
2846    tl_assert(ie->vts->id == ii);
2847    ie->rc++;
2848 }
2849 
2850 /* Indirect callback from lib_zsm. */
VtsID__rcdec(VtsID ii)2851 static void VtsID__rcdec ( VtsID ii )
2852 {
2853    VtsTE* ie;
2854    /* VG_(indexXA) does a range check for us */
2855    ie = VG_(indexXA)( vts_tab, ii );
2856    tl_assert(ie->vts); /* else it's not in use */
2857    tl_assert(ie->rc > 0); /* else RC snafu */
2858    tl_assert(ie->vts->id == ii);
2859    ie->rc--;
2860 }
2861 
2862 
2863 /* Look up 'cand' in our collection of VTSs.  If present, return the
2864    VtsID for the pre-existing version.  If not present, clone it, add
2865    the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2866    it, and return that. */
vts_tab__find__or__clone_and_add(VTS * cand)2867 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2868 {
2869    VTS* in_tab = NULL;
2870    tl_assert(cand->id == VtsID_INVALID);
2871    Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2872    tl_assert(in_tab);
2873    if (already_have) {
2874       /* We already have a copy of 'cand'.  Use that. */
2875       VtsTE* ie;
2876       tl_assert(in_tab->id != VtsID_INVALID);
2877       ie = VG_(indexXA)( vts_tab, in_tab->id );
2878       tl_assert(ie->vts == in_tab);
2879       return in_tab->id;
2880    } else {
2881       VtsID  ii = get_new_VtsID();
2882       VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2883       ie->vts = in_tab;
2884       ie->rc = 0;
2885       ie->u.freelink = VtsID_INVALID;
2886       in_tab->id = ii;
2887       return ii;
2888    }
2889 }
2890 
2891 
show_vts_stats(const HChar * caller)2892 static void show_vts_stats ( const HChar* caller )
2893 {
2894    UWord nSet, nTab, nLive;
2895    ULong totrc;
2896    UWord n, i;
2897    nSet = VG_(sizeFM)( vts_set );
2898    nTab = VG_(sizeXA)( vts_tab );
2899    totrc = 0;
2900    nLive = 0;
2901    n = VG_(sizeXA)( vts_tab );
2902    for (i = 0; i < n; i++) {
2903       VtsTE* ie = VG_(indexXA)( vts_tab, i );
2904       if (ie->vts) {
2905          nLive++;
2906          totrc += (ULong)ie->rc;
2907       } else {
2908          tl_assert(ie->rc == 0);
2909       }
2910    }
2911    VG_(printf)("  show_vts_stats %s\n", caller);
2912    VG_(printf)("    vts_tab size %4lu\n", nTab);
2913    VG_(printf)("    vts_tab live %4lu\n", nLive);
2914    VG_(printf)("    vts_set size %4lu\n", nSet);
2915    VG_(printf)("        total rc %4llu\n", totrc);
2916 }
2917 
2918 
2919 /* --- Helpers for VtsID pruning --- */
2920 
2921 static
remap_VtsID(XArray * old_tab,XArray * new_tab,VtsID * ii)2922 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2923                    /*MOD*/XArray* /* of VtsTE */ new_tab,
2924                    VtsID* ii )
2925 {
2926    VtsTE *old_te, *new_te;
2927    VtsID old_id, new_id;
2928    /* We're relying here on VG_(indexXA)'s range checking to assert on
2929       any stupid values, in particular *ii == VtsID_INVALID. */
2930    old_id = *ii;
2931    old_te = VG_(indexXA)( old_tab, old_id );
2932    old_te->rc--;
2933    new_id = old_te->u.remap;
2934    new_te = VG_(indexXA)( new_tab, new_id );
2935    new_te->rc++;
2936    *ii = new_id;
2937 }
2938 
2939 static
remap_VtsIDs_in_SVal(XArray * old_tab,XArray * new_tab,SVal * s)2940 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2941                             /*MOD*/XArray* /* of VtsTE */ new_tab,
2942                             SVal* s )
2943 {
2944    SVal old_sv, new_sv;
2945    old_sv = *s;
2946    if (SVal__isC(old_sv)) {
2947       VtsID rMin, wMin;
2948       rMin = SVal__unC_Rmin(old_sv);
2949       wMin = SVal__unC_Wmin(old_sv);
2950       remap_VtsID( old_tab, new_tab, &rMin );
2951       remap_VtsID( old_tab, new_tab, &wMin );
2952       new_sv = SVal__mkC( rMin, wMin );
2953       *s = new_sv;
2954   }
2955 }
2956 
2957 
2958 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2959 __attribute__((noinline))
vts_tab__do_GC(Bool show_stats)2960 static void vts_tab__do_GC ( Bool show_stats )
2961 {
2962    UWord i, nTab, nLive, nFreed;
2963 
2964    /* ---------- BEGIN VTS GC ---------- */
2965    /* check this is actually necessary. */
2966    tl_assert(vts_tab_freelist == VtsID_INVALID);
2967 
2968    /* empty the caches for partial order checks and binary joins.  We
2969       could do better and prune out the entries to be deleted, but it
2970       ain't worth the hassle. */
2971    VtsID__invalidate_caches();
2972 
2973    /* First, make the reference counts up to date. */
2974    zsm_flush_cache();
2975 
2976    nTab = VG_(sizeXA)( vts_tab );
2977 
2978    if (show_stats) {
2979       VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2980       show_vts_stats("before GC");
2981    }
2982 
2983    /* Now we can inspect the entire vts_tab.  Any entries with zero
2984       .rc fields are now no longer in use and can be put back on the
2985       free list, removed from vts_set, and deleted. */
2986    nFreed = 0;
2987    for (i = 0; i < nTab; i++) {
2988       Bool present;
2989       UWord oldK = 0, oldV = 12345;
2990       VtsTE* te = VG_(indexXA)( vts_tab, i );
2991       if (te->vts == NULL) {
2992          tl_assert(te->rc == 0);
2993          continue; /* already on the free list (presumably) */
2994       }
2995       if (te->rc > 0)
2996          continue; /* in use */
2997       /* Ok, we got one we can free. */
2998       tl_assert(te->vts->id == i);
2999       /* first, remove it from vts_set. */
3000       present = VG_(delFromFM)( vts_set,
3001                                 &oldK, &oldV, (UWord)te->vts );
3002       tl_assert(present); /* else it isn't in vts_set ?! */
3003       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3004       tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3005       /* now free the VTS itself */
3006       VTS__delete(te->vts);
3007       te->vts = NULL;
3008       /* and finally put this entry on the free list */
3009       tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3010       add_to_free_list( i );
3011       nFreed++;
3012    }
3013 
3014    /* Now figure out when the next GC should be.  We'll allow the
3015       number of VTSs to double before GCing again.  Except of course
3016       that since we can't (or, at least, don't) shrink vts_tab, we
3017       can't set the threshold value smaller than it. */
3018    tl_assert(nFreed <= nTab);
3019    nLive = nTab - nFreed;
3020    tl_assert(nLive >= 0 && nLive <= nTab);
3021    vts_next_GC_at = 2 * nLive;
3022    if (vts_next_GC_at < nTab)
3023       vts_next_GC_at = nTab;
3024 
3025    if (show_stats) {
3026       show_vts_stats("after GC");
3027       VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3028    }
3029 
3030    stats__vts_tab_GC++;
3031    if (VG_(clo_stats)) {
3032       tl_assert(nTab > 0);
3033       VG_(message)(Vg_DebugMsg,
3034                    "libhb: VTS GC: #%lu  old size %lu  live %lu  (%2llu%%)\n",
3035                    stats__vts_tab_GC,
3036                    nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3037    }
3038    /* ---------- END VTS GC ---------- */
3039 
3040    /* Decide whether to do VTS pruning.  We have one of three
3041       settings. */
3042    static UInt pruning_auto_ctr = 0; /* do not make non-static */
3043 
3044    Bool do_pruning = False;
3045    switch (HG_(clo_vts_pruning)) {
3046       case 0: /* never */
3047          break;
3048       case 1: /* auto */
3049          do_pruning = (++pruning_auto_ctr % 5) == 0;
3050          break;
3051       case 2: /* always */
3052          do_pruning = True;
3053          break;
3054       default:
3055          tl_assert(0);
3056    }
3057 
3058    /* The rest of this routine only handles pruning, so we can
3059       quit at this point if it is not to be done. */
3060    if (!do_pruning)
3061       return;
3062    /* No need to do pruning if no thread died since the last pruning as
3063       no VtsTE can be pruned. */
3064    if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3065       return;
3066 
3067    /* ---------- BEGIN VTS PRUNING ---------- */
3068    /* Sort and check the very dead threads that died since the last pruning.
3069       Sorting is used for the check and so that we can quickly look
3070       up the dead-thread entries as we work through the VTSs. */
3071    verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3072 
3073    /* We will run through the old table, and create a new table and
3074       set, at the same time setting the u.remap entries in the old
3075       table to point to the new entries.  Then, visit every VtsID in
3076       the system, and replace all of them with new ones, using the
3077       u.remap entries in the old table.  Finally, we can delete the old
3078       table and set. */
3079 
3080    XArray* /* of VtsTE */ new_tab
3081       = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3082                     HG_(free), sizeof(VtsTE) );
3083 
3084    /* WordFM VTS* void */
3085    WordFM* new_set
3086       = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3087                     HG_(free),
3088                     (Word(*)(UWord,UWord))VTS__cmp_structural );
3089 
3090    /* Visit each old VTS.  For each one:
3091 
3092       * make a pruned version
3093 
3094       * search new_set for the pruned version, yielding either
3095         Nothing (not present) or the new VtsID for it.
3096 
3097       * if not present, allocate a new VtsID for it, insert (pruned
3098         VTS, new VtsID) in the tree, and set
3099         remap_table[old VtsID] = new VtsID.
3100 
3101       * if present, set remap_table[old VtsID] = new VtsID, where
3102         new VtsID was determined by the tree lookup.  Then free up
3103         the clone.
3104    */
3105 
3106    UWord nBeforePruning = 0, nAfterPruning = 0;
3107    UWord nSTSsBefore = 0, nSTSsAfter = 0;
3108    VtsID new_VtsID_ctr = 0;
3109 
3110    for (i = 0; i < nTab; i++) {
3111 
3112       /* For each old VTS .. */
3113       VtsTE* old_te  = VG_(indexXA)( vts_tab, i );
3114       VTS*   old_vts = old_te->vts;
3115 
3116       /* Skip it if not in use */
3117       if (old_te->rc == 0) {
3118          tl_assert(old_vts == NULL);
3119          continue;
3120       }
3121       tl_assert(old_te->u.remap == VtsID_INVALID);
3122       tl_assert(old_vts != NULL);
3123       tl_assert(old_vts->id == i);
3124       tl_assert(old_vts->ts != NULL);
3125 
3126       /* It is in use. Make a pruned version. */
3127       nBeforePruning++;
3128       nSTSsBefore += old_vts->usedTS;
3129       VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3130                                    old_vts, verydead_thread_table_not_pruned);
3131       tl_assert(new_vts->sizeTS == new_vts->usedTS);
3132       tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3133                 == 0x0ddC0ffeeBadF00dULL);
3134 
3135       /* Get rid of the old VTS and the tree entry.  It's a bit more
3136          complex to incrementally delete the VTSs now than to nuke
3137          them all after we're done, but the upside is that we don't
3138          wind up temporarily storing potentially two complete copies
3139          of each VTS and hence spiking memory use. */
3140       UWord oldK = 0, oldV = 12345;
3141       Bool  present = VG_(delFromFM)( vts_set,
3142                                       &oldK, &oldV, (UWord)old_vts );
3143       tl_assert(present); /* else it isn't in vts_set ?! */
3144       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3145       tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3146       /* now free the VTS itself */
3147       VTS__delete(old_vts);
3148       old_te->vts = NULL;
3149       old_vts = NULL;
3150 
3151       /* NO MENTIONS of old_vts allowed beyond this point. */
3152 
3153       /* Ok, we have the pruned copy in new_vts.  See if a
3154          structurally identical version is already present in new_set.
3155          If so, delete the one we just made and move on; if not, add
3156          it. */
3157       VTS*  identical_version = NULL;
3158       UWord valW = 12345;
3159       if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3160                         (UWord)new_vts)) {
3161          // already have it
3162          tl_assert(valW == 0);
3163          tl_assert(identical_version != NULL);
3164          tl_assert(identical_version != new_vts);
3165          VTS__delete(new_vts);
3166          new_vts = identical_version;
3167          tl_assert(new_vts->id != VtsID_INVALID);
3168       } else {
3169          tl_assert(valW == 12345);
3170          tl_assert(identical_version == NULL);
3171          new_vts->id = new_VtsID_ctr++;
3172          Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3173          tl_assert(!b);
3174          VtsTE new_te;
3175          new_te.vts      = new_vts;
3176          new_te.rc       = 0;
3177          new_te.u.freelink = VtsID_INVALID;
3178          Word j = VG_(addToXA)( new_tab, &new_te );
3179          tl_assert(j <= i);
3180          tl_assert(j == new_VtsID_ctr - 1);
3181          // stats
3182          nAfterPruning++;
3183          nSTSsAfter += new_vts->usedTS;
3184       }
3185       old_te->u.remap = new_vts->id;
3186 
3187    } /* for (i = 0; i < nTab; i++) */
3188 
3189    /* Move very dead thread from verydead_thread_table_not_pruned to
3190       verydead_thread_table. Sort and check verydead_thread_table
3191       to verify a thread was reported very dead only once. */
3192    {
3193       UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3194 
3195       for (i = 0; i < nBT; i++) {
3196          ThrID thrid =
3197             *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3198          VG_(addToXA)( verydead_thread_table, &thrid );
3199       }
3200       verydead_thread_table_sort_and_check (verydead_thread_table);
3201       VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3202    }
3203 
3204    /* At this point, we have:
3205       * the old VTS table, with its u.remap entries set,
3206         and with all .vts == NULL.
3207       * the old VTS tree should be empty, since it and the old VTSs
3208         it contained have been incrementally deleted was we worked
3209         through the old table.
3210       * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3211         == VtsID_INVALID.
3212       * the new VTS tree.
3213    */
3214    tl_assert( VG_(sizeFM)(vts_set) == 0 );
3215 
3216    /* Now actually apply the mapping. */
3217    /* Visit all the VtsIDs in the entire system.  Where do we expect
3218       to find them?
3219       (a) in shadow memory -- the LineZs and LineFs
3220       (b) in our collection of struct _Thrs.
3221       (c) in our collection of struct _SOs.
3222       Nowhere else, AFAICS.  Not in the zsm cache, because that just
3223       got invalidated.
3224 
3225       Using the u.remap fields in vts_tab, map each old VtsID to a new
3226       VtsID.  For each old VtsID, dec its rc; and for each new one,
3227       inc it.  This sets up the new refcounts, and it also gives a
3228       cheap sanity check of the old ones: all old refcounts should be
3229       zero after this operation.
3230    */
3231 
3232    /* Do the mappings for (a) above: iterate over the Primary shadow
3233       mem map (WordFM Addr SecMap*). */
3234    UWord secmapW = 0;
3235    VG_(initIterFM)( map_shmem );
3236    while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3237       UWord   j;
3238       SecMap* sm = (SecMap*)secmapW;
3239       tl_assert(sm->magic == SecMap_MAGIC);
3240       /* Deal with the LineZs */
3241       for (i = 0; i < N_SECMAP_ZLINES; i++) {
3242          LineZ* lineZ = &sm->linesZ[i];
3243          if (lineZ->dict[0] != SVal_INVALID) {
3244             for (j = 0; j < 4; j++)
3245                remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3246          } else {
3247             LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3248             for (j = 0; j < N_LINE_ARANGE; j++)
3249                remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3250          }
3251       }
3252    }
3253    VG_(doneIterFM)( map_shmem );
3254 
3255    /* Do the mappings for (b) above: visit our collection of struct
3256       _Thrs. */
3257    Thread* hgthread = get_admin_threads();
3258    tl_assert(hgthread);
3259    while (hgthread) {
3260       Thr* hbthr = hgthread->hbthr;
3261       tl_assert(hbthr);
3262       /* Threads that are listed in the prunable set have their viR
3263          and viW set to VtsID_INVALID, so we can't mess with them. */
3264       if (hbthr->llexit_done && hbthr->joinedwith_done) {
3265          tl_assert(hbthr->viR == VtsID_INVALID);
3266          tl_assert(hbthr->viW == VtsID_INVALID);
3267          hgthread = hgthread->admin;
3268          continue;
3269       }
3270       remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3271       remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3272       hgthread = hgthread->admin;
3273    }
3274 
3275    /* Do the mappings for (c) above: visit the struct _SOs. */
3276    SO* so = admin_SO;
3277    while (so) {
3278       if (so->viR != VtsID_INVALID)
3279          remap_VtsID( vts_tab, new_tab, &so->viR );
3280       if (so->viW != VtsID_INVALID)
3281          remap_VtsID( vts_tab, new_tab, &so->viW );
3282       so = so->admin_next;
3283    }
3284 
3285    /* So, we're nearly done (with this incredibly complex operation).
3286       Check the refcounts for the old VtsIDs all fell to zero, as
3287       expected.  Any failure is serious. */
3288    for (i = 0; i < nTab; i++) {
3289       VtsTE* te = VG_(indexXA)( vts_tab, i );
3290       tl_assert(te->vts == NULL);
3291       /* This is the assert proper.  Note we're also asserting
3292          zeroness for old entries which are unmapped.  That's OK. */
3293       tl_assert(te->rc == 0);
3294    }
3295 
3296    /* Install the new table and set. */
3297    VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3298    vts_set = new_set;
3299    VG_(deleteXA)( vts_tab );
3300    vts_tab = new_tab;
3301 
3302    /* The freelist of vts_tab entries is empty now, because we've
3303       compacted all of the live entries at the low end of the
3304       table. */
3305    vts_tab_freelist = VtsID_INVALID;
3306 
3307    /* Sanity check vts_set and vts_tab. */
3308 
3309    /* Because all the live entries got slid down to the bottom of vts_tab: */
3310    tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3311 
3312    /* Assert that the vts_tab and vts_set entries point at each other
3313       in the required way */
3314    UWord wordK = 0, wordV = 0;
3315    VG_(initIterFM)( vts_set );
3316    while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3317       tl_assert(wordK != 0);
3318       tl_assert(wordV == 0);
3319       VTS* vts = (VTS*)wordK;
3320       tl_assert(vts->id != VtsID_INVALID);
3321       VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3322       tl_assert(te->vts == vts);
3323    }
3324    VG_(doneIterFM)( vts_set );
3325 
3326    /* Also iterate over the table, and check each entry is
3327       plausible. */
3328    nTab = VG_(sizeXA)( vts_tab );
3329    for (i = 0; i < nTab; i++) {
3330       VtsTE* te = VG_(indexXA)( vts_tab, i );
3331       tl_assert(te->vts);
3332       tl_assert(te->vts->id == i);
3333       tl_assert(te->rc > 0); /* 'cos we just GC'd */
3334       tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3335       /* value of te->u.remap  not relevant */
3336    }
3337 
3338    /* And we're done.  Bwahahaha. Ha. Ha. Ha. */
3339    stats__vts_pruning++;
3340    if (VG_(clo_stats)) {
3341       tl_assert(nTab > 0);
3342       VG_(message)(
3343          Vg_DebugMsg,
3344          "libhb: VTS PR: #%lu  before %lu (avg sz %lu)  "
3345             "after %lu (avg sz %lu)\n",
3346          stats__vts_pruning,
3347          nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3348          nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3349       );
3350    }
3351    /* ---------- END VTS PRUNING ---------- */
3352 }
3353 
3354 
3355 /////////////////////////////////////////////////////////
3356 //                                                     //
3357 // Vts IDs                                             //
3358 //                                                     //
3359 /////////////////////////////////////////////////////////
3360 
3361 //////////////////////////
3362 /* A temporary, max-sized VTS which is used as a temporary (the first
3363    argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3364 static VTS* temp_max_sized_VTS = NULL;
3365 
3366 //////////////////////////
3367 static ULong stats__cmpLEQ_queries = 0;
3368 static ULong stats__cmpLEQ_misses  = 0;
3369 static ULong stats__join2_queries  = 0;
3370 static ULong stats__join2_misses   = 0;
3371 
ROL32(UInt w,Int n)3372 static inline UInt ROL32 ( UInt w, Int n ) {
3373    w = (w << n) | (w >> (32-n));
3374    return w;
3375 }
hash_VtsIDs(VtsID vi1,VtsID vi2,UInt nTab)3376 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3377    UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3378    return hash % nTab;
3379 }
3380 
3381 #define N_CMPLEQ_CACHE 1023
3382 static
3383    struct { VtsID vi1; VtsID vi2; Bool leq; }
3384    cmpLEQ_cache[N_CMPLEQ_CACHE];
3385 
3386 #define N_JOIN2_CACHE 1023
3387 static
3388    struct { VtsID vi1; VtsID vi2; VtsID res; }
3389    join2_cache[N_JOIN2_CACHE];
3390 
VtsID__invalidate_caches(void)3391 static void VtsID__invalidate_caches ( void ) {
3392    Int i;
3393    for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3394       cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3395       cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3396       cmpLEQ_cache[i].leq = False;
3397    }
3398    for (i = 0; i < N_JOIN2_CACHE; i++) {
3399      join2_cache[i].vi1 = VtsID_INVALID;
3400      join2_cache[i].vi2 = VtsID_INVALID;
3401      join2_cache[i].res = VtsID_INVALID;
3402    }
3403 }
3404 //////////////////////////
3405 
3406 //static Bool VtsID__is_valid ( VtsID vi ) {
3407 //   VtsTE* ve;
3408 //   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3409 //      return False;
3410 //   ve = VG_(indexXA)( vts_tab, vi );
3411 //   if (!ve->vts)
3412 //      return False;
3413 //   tl_assert(ve->vts->id == vi);
3414 //   return True;
3415 //}
3416 
VtsID__to_VTS(VtsID vi)3417 static VTS* VtsID__to_VTS ( VtsID vi ) {
3418    VtsTE* te = VG_(indexXA)( vts_tab, vi );
3419    tl_assert(te->vts);
3420    return te->vts;
3421 }
3422 
VtsID__pp(VtsID vi)3423 static void VtsID__pp ( VtsID vi ) {
3424    VTS* vts = VtsID__to_VTS(vi);
3425    VTS__show( vts );
3426 }
3427 
3428 /* compute partial ordering relation of vi1 and vi2. */
3429 __attribute__((noinline))
VtsID__cmpLEQ_WRK(VtsID vi1,VtsID vi2)3430 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3431    UInt hash;
3432    Bool leq;
3433    VTS  *v1, *v2;
3434    //if (vi1 == vi2) return True;
3435    tl_assert(vi1 != vi2);
3436    ////++
3437    stats__cmpLEQ_queries++;
3438    hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3439    if (cmpLEQ_cache[hash].vi1 == vi1
3440        && cmpLEQ_cache[hash].vi2 == vi2)
3441       return cmpLEQ_cache[hash].leq;
3442    stats__cmpLEQ_misses++;
3443    ////--
3444    v1  = VtsID__to_VTS(vi1);
3445    v2  = VtsID__to_VTS(vi2);
3446    leq = VTS__cmpLEQ( v1, v2 ) == 0;
3447    ////++
3448    cmpLEQ_cache[hash].vi1 = vi1;
3449    cmpLEQ_cache[hash].vi2 = vi2;
3450    cmpLEQ_cache[hash].leq = leq;
3451    ////--
3452    return leq;
3453 }
VtsID__cmpLEQ(VtsID vi1,VtsID vi2)3454 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3455    return LIKELY(vi1 == vi2)  ? True  : VtsID__cmpLEQ_WRK(vi1, vi2);
3456 }
3457 
3458 /* compute binary join */
3459 __attribute__((noinline))
VtsID__join2_WRK(VtsID vi1,VtsID vi2)3460 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3461    UInt  hash;
3462    VtsID res;
3463    VTS   *vts1, *vts2;
3464    //if (vi1 == vi2) return vi1;
3465    tl_assert(vi1 != vi2);
3466    ////++
3467    stats__join2_queries++;
3468    hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3469    if (join2_cache[hash].vi1 == vi1
3470        && join2_cache[hash].vi2 == vi2)
3471       return join2_cache[hash].res;
3472    stats__join2_misses++;
3473    ////--
3474    vts1 = VtsID__to_VTS(vi1);
3475    vts2 = VtsID__to_VTS(vi2);
3476    temp_max_sized_VTS->usedTS = 0;
3477    VTS__join(temp_max_sized_VTS, vts1,vts2);
3478    res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3479    ////++
3480    join2_cache[hash].vi1 = vi1;
3481    join2_cache[hash].vi2 = vi2;
3482    join2_cache[hash].res = res;
3483    ////--
3484    return res;
3485 }
VtsID__join2(VtsID vi1,VtsID vi2)3486 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3487    return LIKELY(vi1 == vi2)  ? vi1  : VtsID__join2_WRK(vi1, vi2);
3488 }
3489 
3490 /* create a singleton VTS, namely [thr:1] */
VtsID__mk_Singleton(Thr * thr,ULong tym)3491 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3492    temp_max_sized_VTS->usedTS = 0;
3493    VTS__singleton(temp_max_sized_VTS, thr,tym);
3494    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3495 }
3496 
3497 /* tick operation, creates value 1 if specified index is absent */
VtsID__tick(VtsID vi,Thr * idx)3498 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3499    VTS* vts = VtsID__to_VTS(vi);
3500    temp_max_sized_VTS->usedTS = 0;
3501    VTS__tick(temp_max_sized_VTS, idx,vts);
3502    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3503 }
3504 
3505 /* index into a VTS (only for assertions) */
VtsID__indexAt(VtsID vi,Thr * idx)3506 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3507    VTS* vts = VtsID__to_VTS(vi);
3508    return VTS__indexAt_SLOW( vts, idx );
3509 }
3510 
3511 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3512    any, really) element in vi1 which is pointwise greater-than the
3513    corresponding element in vi2.  If no such element exists, return
3514    NULL.  This needs to be fairly quick since it is called every time
3515    a race is detected. */
VtsID__findFirst_notLEQ(VtsID vi1,VtsID vi2)3516 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3517 {
3518    VTS  *vts1, *vts2;
3519    Thr*  diffthr;
3520    ThrID diffthrid;
3521    tl_assert(vi1 != vi2);
3522    vts1 = VtsID__to_VTS(vi1);
3523    vts2 = VtsID__to_VTS(vi2);
3524    tl_assert(vts1 != vts2);
3525    diffthrid = VTS__cmpLEQ(vts1, vts2);
3526    diffthr = Thr__from_ThrID(diffthrid);
3527    tl_assert(diffthr); /* else they are LEQ ! */
3528    return diffthr;
3529 }
3530 
3531 
3532 /////////////////////////////////////////////////////////
3533 //                                                     //
3534 // Filters                                             //
3535 //                                                     //
3536 /////////////////////////////////////////////////////////
3537 
3538 /* Forget everything we know -- clear the filter and let everything
3539    through.  This needs to be as fast as possible, since it is called
3540    every time the running thread changes, and every time a thread's
3541    vector clocks change, which can be quite frequent.  The obvious
3542    fast way to do this is simply to stuff in tags which we know are
3543    not going to match anything, since they're not aligned to the start
3544    of a line. */
Filter__clear(Filter * fi,const HChar * who)3545 static void Filter__clear ( Filter* fi, const HChar* who )
3546 {
3547    UWord i;
3548    if (0) VG_(printf)("  Filter__clear(%p, %s)\n", fi, who);
3549    for (i = 0; i < FI_NUM_LINES; i += 8) {
3550       fi->tags[i+0] = 1; /* impossible value -- cannot match */
3551       fi->tags[i+1] = 1;
3552       fi->tags[i+2] = 1;
3553       fi->tags[i+3] = 1;
3554       fi->tags[i+4] = 1;
3555       fi->tags[i+5] = 1;
3556       fi->tags[i+6] = 1;
3557       fi->tags[i+7] = 1;
3558    }
3559    tl_assert(i == FI_NUM_LINES);
3560 }
3561 
3562 /* Clearing an arbitrary range in the filter.  Unfortunately
3563    we have to do this due to core-supplied new/die-mem events. */
3564 
Filter__clear_1byte(Filter * fi,Addr a)3565 static void Filter__clear_1byte ( Filter* fi, Addr a )
3566 {
3567    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3568    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3569    FiLine* line   = &fi->lines[lineno];
3570    UWord   loff   = (a - atag) / 8;
3571    UShort  mask   = 0x3 << (2 * (a & 7));
3572    /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3573    if (LIKELY( fi->tags[lineno] == atag )) {
3574       /* hit.  clear the bits. */
3575       UShort  u16  = line->u16s[loff];
3576       line->u16s[loff] = u16 & ~mask; /* clear them */
3577    } else {
3578       /* miss.  The filter doesn't hold this address, so ignore. */
3579    }
3580 }
3581 
Filter__clear_8bytes_aligned(Filter * fi,Addr a)3582 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3583 {
3584    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3585    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3586    FiLine* line   = &fi->lines[lineno];
3587    UWord   loff   = (a - atag) / 8;
3588    if (LIKELY( fi->tags[lineno] == atag )) {
3589       line->u16s[loff] = 0;
3590    } else {
3591     /* miss.  The filter doesn't hold this address, so ignore. */
3592    }
3593 }
3594 
3595 /* Only used to verify the fast Filter__clear_range */
3596 __attribute__((unused))
Filter__clear_range_SLOW(Filter * fi,Addr a,UWord len)3597 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
3598 {
3599    tl_assert (CHECK_ZSM);
3600 
3601    /* slowly do part preceding 8-alignment */
3602    while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3603       Filter__clear_1byte( fi, a );
3604       a++;
3605       len--;
3606    }
3607    /* vector loop */
3608    while (len >= 8) {
3609       Filter__clear_8bytes_aligned( fi, a );
3610       a += 8;
3611       len -= 8;
3612    }
3613    /* slowly do tail */
3614    while (UNLIKELY(len > 0)) {
3615       Filter__clear_1byte( fi, a );
3616       a++;
3617       len--;
3618    }
3619 }
3620 
Filter__clear_range(Filter * fi,Addr a,UWord len)3621 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3622 {
3623 #  if CHECK_ZSM > 0
3624    /* We check the below more complex algorithm with the simple one.
3625       This check is very expensive : we do first the slow way on a
3626       copy of the data, then do it the fast way. On RETURN, we check
3627       the two values are equal. */
3628    Filter fi_check = *fi;
3629    Filter__clear_range_SLOW(&fi_check, a, len);
3630 #  define RETURN goto check_and_return
3631 #  else
3632 #  define RETURN return
3633 #  endif
3634 
3635    Addr    begtag = FI_GET_TAG(a);       /* tag of range begin */
3636 
3637    Addr    end = a + len - 1;
3638    Addr    endtag = FI_GET_TAG(end); /* tag of range end. */
3639 
3640    UWord rlen = len; /* remaining length to clear */
3641 
3642    Addr    c = a; /* Current position we are clearing. */
3643    UWord   clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3644    FiLine* cline; /* Current line we are clearing */
3645    UWord   cloff; /* Current offset in line we are clearing, when clearing
3646                      partial lines. */
3647 
3648    UShort u16;
3649 
3650    STATIC_ASSERT (FI_LINE_SZB == 32);
3651    // Below assumes filter lines are 32 bytes
3652 
3653    if (LIKELY(fi->tags[clineno] == begtag)) {
3654       /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3655       /* First filter line matches begtag.
3656          If c is not at the filter line begin, the below will clear
3657          the filter line bytes starting from c. */
3658       cline = &fi->lines[clineno];
3659       cloff = (c - begtag) / 8;
3660 
3661       /* First the byte(s) needed to reach 8-alignment */
3662       if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3663          /* hiB is the nr of bytes (higher addresses) from c to reach
3664             8-aligment. */
3665          UWord hiB = 8 - (c & 7);
3666          /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3667             mask is  C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3668             range    7..7   6..7  5..7  4..7  3..7  2..7    1..7 */
3669          UShort mask = 0xFFFF << (16 - 2*hiB);
3670 
3671          u16  = cline->u16s[cloff];
3672          if (LIKELY(rlen >= hiB)) {
3673             cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3674             rlen -= hiB;
3675             c += hiB;
3676             cloff += 1;
3677          } else {
3678             /* Only have the bits for rlen bytes bytes. */
3679             mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3680             cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3681             RETURN;  // We have cleared all what we can.
3682          }
3683       }
3684       /* c is now 8 aligned. Clear by 8 aligned bytes,
3685          till c is filter-line aligned */
3686       while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3687          cline->u16s[cloff] = 0;
3688          c += 8;
3689          rlen -= 8;
3690          cloff += 1;
3691       }
3692    } else {
3693       c = begtag + FI_LINE_SZB;
3694       if (c > end)
3695          RETURN;   // We have cleared all what we can.
3696       rlen -= c - a;
3697    }
3698    // We have changed c, so re-establish clineno.
3699    clineno = FI_GET_LINENO(c);
3700 
3701    if (rlen >= FI_LINE_SZB) {
3702       /* Here, c is filter line-aligned. Clear all full lines that
3703          overlap with the range starting at c, made of a full lines */
3704       UWord nfull = rlen / FI_LINE_SZB;
3705       UWord full_len = nfull * FI_LINE_SZB;
3706       rlen -= full_len;
3707       if (nfull > FI_NUM_LINES)
3708          nfull = FI_NUM_LINES; // no need to check several times the same entry.
3709 
3710       for (UWord n = 0; n < nfull; n++) {
3711          if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3712             cline = &fi->lines[clineno];
3713             cline->u16s[0] = 0;
3714             cline->u16s[1] = 0;
3715             cline->u16s[2] = 0;
3716             cline->u16s[3] = 0;
3717             STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3718          }
3719          clineno++;
3720          if (UNLIKELY(clineno == FI_NUM_LINES))
3721             clineno = 0;
3722       }
3723 
3724       c += full_len;
3725       clineno = FI_GET_LINENO(c);
3726    }
3727 
3728    if (CHECK_ZSM) {
3729       tl_assert(VG_IS_8_ALIGNED(c));
3730       tl_assert(clineno == FI_GET_LINENO(c));
3731    }
3732 
3733    /* Do the last filter line, if it was not cleared as a full filter line */
3734    if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3735       cline = &fi->lines[clineno];
3736       cloff = (c - endtag) / 8;
3737       if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3738 
3739       /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3740          8 bytes. */
3741       while (rlen >= 8) {
3742          cline->u16s[cloff] = 0;
3743          c += 8;
3744          rlen -= 8;
3745          cloff += 1;
3746       }
3747       /* Then the remaining byte(s) */
3748       if (rlen > 0) {
3749          /* nr of bytes from c to reach end. */
3750          UWord loB = rlen;
3751          /* Compute mask representing loB bytes [c..c+loB[ :
3752             mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3753          UShort mask = 0xFFFF >> (16 - 2*loB);
3754 
3755          u16  = cline->u16s[cloff];
3756          cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3757       }
3758    }
3759 
3760 #  if CHECK_ZSM > 0
3761    check_and_return:
3762    tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3763 #  endif
3764 #  undef RETURN
3765 }
3766 
3767 /* ------ Read handlers for the filter. ------ */
3768 
Filter__ok_to_skip_crd64(Filter * fi,Addr a)3769 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3770 {
3771    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3772       return False;
3773    {
3774      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3775      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3776      FiLine* line   = &fi->lines[lineno];
3777      UWord   loff   = (a - atag) / 8;
3778      UShort  mask   = 0xAAAA;
3779      if (LIKELY( fi->tags[lineno] == atag )) {
3780         /* hit.  check line and update. */
3781         UShort u16  = line->u16s[loff];
3782         Bool   ok   = (u16 & mask) == mask; /* all R bits set? */
3783         line->u16s[loff] = u16 | mask; /* set them */
3784         return ok;
3785      } else {
3786         /* miss.  nuke existing line and re-use it. */
3787         UWord i;
3788         fi->tags[lineno] = atag;
3789         for (i = 0; i < FI_LINE_SZB / 8; i++)
3790            line->u16s[i] = 0;
3791         line->u16s[loff] = mask;
3792         return False;
3793      }
3794    }
3795 }
3796 
Filter__ok_to_skip_crd32(Filter * fi,Addr a)3797 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3798 {
3799    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3800       return False;
3801    {
3802      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3803      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3804      FiLine* line   = &fi->lines[lineno];
3805      UWord   loff   = (a - atag) / 8;
3806      UShort  mask   = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3807      if (LIKELY( fi->tags[lineno] == atag )) {
3808         /* hit.  check line and update. */
3809         UShort  u16  = line->u16s[loff];
3810         Bool    ok   = (u16 & mask) == mask; /* 4 x R bits set? */
3811         line->u16s[loff] = u16 | mask; /* set them */
3812         return ok;
3813      } else {
3814         /* miss.  nuke existing line and re-use it. */
3815         UWord   i;
3816         fi->tags[lineno] = atag;
3817         for (i = 0; i < FI_LINE_SZB / 8; i++)
3818            line->u16s[i] = 0;
3819         line->u16s[loff] = mask;
3820         return False;
3821      }
3822    }
3823 }
3824 
Filter__ok_to_skip_crd16(Filter * fi,Addr a)3825 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3826 {
3827    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3828       return False;
3829    {
3830      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3831      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3832      FiLine* line   = &fi->lines[lineno];
3833      UWord   loff   = (a - atag) / 8;
3834      UShort  mask   = 0xA << (2 * (a & 6));
3835      /* mask is A000, 0A00, 00A0 or 000A */
3836      if (LIKELY( fi->tags[lineno] == atag )) {
3837         /* hit.  check line and update. */
3838         UShort  u16  = line->u16s[loff];
3839         Bool    ok   = (u16 & mask) == mask; /* 2 x R bits set? */
3840         line->u16s[loff] = u16 | mask; /* set them */
3841         return ok;
3842      } else {
3843         /* miss.  nuke existing line and re-use it. */
3844         UWord   i;
3845         fi->tags[lineno] = atag;
3846         for (i = 0; i < FI_LINE_SZB / 8; i++)
3847            line->u16s[i] = 0;
3848         line->u16s[loff] = mask;
3849         return False;
3850      }
3851    }
3852 }
3853 
Filter__ok_to_skip_crd08(Filter * fi,Addr a)3854 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3855 {
3856    {
3857      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3858      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3859      FiLine* line   = &fi->lines[lineno];
3860      UWord   loff   = (a - atag) / 8;
3861      UShort  mask   = 0x2 << (2 * (a & 7));
3862      /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3863      if (LIKELY( fi->tags[lineno] == atag )) {
3864         /* hit.  check line and update. */
3865         UShort  u16  = line->u16s[loff];
3866         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
3867         line->u16s[loff] = u16 | mask; /* set them */
3868         return ok;
3869      } else {
3870         /* miss.  nuke existing line and re-use it. */
3871         UWord   i;
3872         fi->tags[lineno] = atag;
3873         for (i = 0; i < FI_LINE_SZB / 8; i++)
3874            line->u16s[i] = 0;
3875         line->u16s[loff] = mask;
3876         return False;
3877      }
3878    }
3879 }
3880 
3881 
3882 /* ------ Write handlers for the filter. ------ */
3883 
Filter__ok_to_skip_cwr64(Filter * fi,Addr a)3884 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3885 {
3886    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3887       return False;
3888    {
3889      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3890      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3891      FiLine* line   = &fi->lines[lineno];
3892      UWord   loff   = (a - atag) / 8;
3893      UShort  mask   = 0xFFFF;
3894      if (LIKELY( fi->tags[lineno] == atag )) {
3895         /* hit.  check line and update. */
3896         UShort u16  = line->u16s[loff];
3897         Bool   ok   = (u16 & mask) == mask; /* all R & W bits set? */
3898         line->u16s[loff] = u16 | mask; /* set them */
3899         return ok;
3900      } else {
3901         /* miss.  nuke existing line and re-use it. */
3902         UWord i;
3903         fi->tags[lineno] = atag;
3904         for (i = 0; i < FI_LINE_SZB / 8; i++)
3905            line->u16s[i] = 0;
3906         line->u16s[loff] = mask;
3907         return False;
3908      }
3909    }
3910 }
3911 
Filter__ok_to_skip_cwr32(Filter * fi,Addr a)3912 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3913 {
3914    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3915       return False;
3916    {
3917      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3918      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3919      FiLine* line   = &fi->lines[lineno];
3920      UWord   loff   = (a - atag) / 8;
3921      UShort  mask   = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3922      if (LIKELY( fi->tags[lineno] == atag )) {
3923         /* hit.  check line and update. */
3924         UShort  u16  = line->u16s[loff];
3925         Bool    ok   = (u16 & mask) == mask; /* 4 x R & W bits set? */
3926         line->u16s[loff] = u16 | mask; /* set them */
3927         return ok;
3928      } else {
3929         /* miss.  nuke existing line and re-use it. */
3930         UWord   i;
3931         fi->tags[lineno] = atag;
3932         for (i = 0; i < FI_LINE_SZB / 8; i++)
3933            line->u16s[i] = 0;
3934         line->u16s[loff] = mask;
3935         return False;
3936      }
3937    }
3938 }
3939 
Filter__ok_to_skip_cwr16(Filter * fi,Addr a)3940 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3941 {
3942    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3943       return False;
3944    {
3945      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3946      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3947      FiLine* line   = &fi->lines[lineno];
3948      UWord   loff   = (a - atag) / 8;
3949      UShort  mask   = 0xF << (2 * (a & 6));
3950      /* mask is F000, 0F00, 00F0 or 000F */
3951      if (LIKELY( fi->tags[lineno] == atag )) {
3952         /* hit.  check line and update. */
3953         UShort  u16  = line->u16s[loff];
3954         Bool    ok   = (u16 & mask) == mask; /* 2 x R & W bits set? */
3955         line->u16s[loff] = u16 | mask; /* set them */
3956         return ok;
3957      } else {
3958         /* miss.  nuke existing line and re-use it. */
3959         UWord   i;
3960         fi->tags[lineno] = atag;
3961         for (i = 0; i < FI_LINE_SZB / 8; i++)
3962            line->u16s[i] = 0;
3963         line->u16s[loff] = mask;
3964         return False;
3965      }
3966    }
3967 }
3968 
Filter__ok_to_skip_cwr08(Filter * fi,Addr a)3969 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3970 {
3971    {
3972      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3973      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3974      FiLine* line   = &fi->lines[lineno];
3975      UWord   loff   = (a - atag) / 8;
3976      UShort  mask   = 0x3 << (2 * (a & 7));
3977      /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3978      if (LIKELY( fi->tags[lineno] == atag )) {
3979         /* hit.  check line and update. */
3980         UShort  u16  = line->u16s[loff];
3981         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
3982         line->u16s[loff] = u16 | mask; /* set them */
3983         return ok;
3984      } else {
3985         /* miss.  nuke existing line and re-use it. */
3986         UWord   i;
3987         fi->tags[lineno] = atag;
3988         for (i = 0; i < FI_LINE_SZB / 8; i++)
3989            line->u16s[i] = 0;
3990         line->u16s[loff] = mask;
3991         return False;
3992      }
3993    }
3994 }
3995 
3996 
3997 /////////////////////////////////////////////////////////
3998 //                                                     //
3999 // Threads                                             //
4000 //                                                     //
4001 /////////////////////////////////////////////////////////
4002 
4003 /* Maps ThrID values to their Thr*s (which contain ThrID values that
4004    should point back to the relevant slot in the array.  Lowest
4005    numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4006 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4007 
4008 /* And a counter to dole out ThrID values.  For rationale/background,
4009    see comments on definition of ScalarTS (far) above. */
4010 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4011 
Thr__to_ThrID(Thr * thr)4012 static ThrID Thr__to_ThrID ( Thr* thr ) {
4013    return thr->thrid;
4014 }
Thr__from_ThrID(UInt thrid)4015 static Thr* Thr__from_ThrID ( UInt thrid ) {
4016    Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4017    tl_assert(thr->thrid == thrid);
4018    return thr;
4019 }
4020 
Thr__new(void)4021 static Thr* Thr__new ( void )
4022 {
4023    Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4024    thr->viR = VtsID_INVALID;
4025    thr->viW = VtsID_INVALID;
4026    thr->llexit_done = False;
4027    thr->joinedwith_done = False;
4028    thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4029    if (HG_(clo_history_level) == 1)
4030       thr->local_Kws_n_stacks
4031          = VG_(newXA)( HG_(zalloc),
4032                        "libhb.Thr__new.3 (local_Kws_and_stacks)",
4033                        HG_(free), sizeof(ULong_n_EC) );
4034 
4035    /* Add this Thr* <-> ThrID binding to the mapping, and
4036       cross-check */
4037    if (!thrid_to_thr_map) {
4038       thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4039                                      HG_(free), sizeof(Thr*) );
4040    }
4041 
4042    if (thrid_counter >= ThrID_MAX_VALID) {
4043       /* We're hosed.  We have to stop. */
4044       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4045    }
4046 
4047    thr->thrid = thrid_counter++;
4048    Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4049    tl_assert(ix + 1024 == thr->thrid);
4050 
4051    return thr;
4052 }
4053 
note_local_Kw_n_stack_for(Thr * thr)4054 static void note_local_Kw_n_stack_for ( Thr* thr )
4055 {
4056    Word       nPresent;
4057    ULong_n_EC pair;
4058    tl_assert(thr);
4059 
4060    // We only collect this info at history level 1 (approx)
4061    if (HG_(clo_history_level) != 1)
4062       return;
4063 
4064    /* This is the scalar Kw for thr. */
4065    pair.ull = VtsID__indexAt( thr->viW, thr );
4066    pair.ec  = main_get_EC( thr );
4067    tl_assert(pair.ec);
4068    tl_assert(thr->local_Kws_n_stacks);
4069 
4070    /* check that we're not adding duplicates */
4071    nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4072 
4073    /* Throw away old stacks, if necessary.  We can't accumulate stuff
4074       indefinitely. */
4075    if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4076       VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4077       nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4078       if (0)
4079          VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p (!!! gc !!!)\n",
4080                      thr, pair.ull, pair.ec );
4081    }
4082 
4083    if (nPresent > 0) {
4084       ULong_n_EC* prevPair
4085          = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4086       tl_assert( prevPair->ull <= pair.ull );
4087    }
4088 
4089    if (nPresent == 0)
4090       pair.ec = NULL;
4091 
4092    VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
4093 
4094    if (0)
4095       VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p\n",
4096                   thr, pair.ull, pair.ec );
4097    if (0)
4098       VG_(pp_ExeContext)(pair.ec);
4099 }
4100 
cmp__ULong_n_EC__by_ULong(const ULong_n_EC * pair1,const ULong_n_EC * pair2)4101 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4102                                        const ULong_n_EC* pair2 )
4103 {
4104    if (pair1->ull < pair2->ull) return -1;
4105    if (pair1->ull > pair2->ull) return 1;
4106    return 0;
4107 }
4108 
4109 
4110 /////////////////////////////////////////////////////////
4111 //                                                     //
4112 // Shadow Values                                       //
4113 //                                                     //
4114 /////////////////////////////////////////////////////////
4115 
4116 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4117 // hb_zsm.h.  We have to do everything else here.
4118 
4119 /* SVal is 64 bit unsigned int.
4120 
4121       <---------30--------->    <---------30--------->
4122    00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
4123    10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
4124    11 0--------------------0 00 0--------------------0   A: SVal_INVALID
4125 
4126 */
4127 #define SVAL_TAGMASK (3ULL << 62)
4128 
SVal__isC(SVal s)4129 static inline Bool SVal__isC ( SVal s ) {
4130    return (0ULL << 62) == (s & SVAL_TAGMASK);
4131 }
SVal__mkC(VtsID rmini,VtsID wmini)4132 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4133    //tl_assert(VtsID__is_valid(rmini));
4134    //tl_assert(VtsID__is_valid(wmini));
4135    return (((ULong)rmini) << 32) | ((ULong)wmini);
4136 }
SVal__unC_Rmin(SVal s)4137 static inline VtsID SVal__unC_Rmin ( SVal s ) {
4138    tl_assert(SVal__isC(s));
4139    return (VtsID)(s >> 32);
4140 }
SVal__unC_Wmin(SVal s)4141 static inline VtsID SVal__unC_Wmin ( SVal s ) {
4142    tl_assert(SVal__isC(s));
4143    return (VtsID)(s & 0xFFFFFFFFULL);
4144 }
4145 
SVal__isA(SVal s)4146 static inline Bool SVal__isA ( SVal s ) {
4147    return (2ULL << 62) == (s & SVAL_TAGMASK);
4148 }
4149 __attribute__((unused))
SVal__mkA(void)4150 static inline SVal SVal__mkA ( void ) {
4151    return 2ULL << 62;
4152 }
4153 
4154 /* Direct callback from lib_zsm. */
SVal__rcinc(SVal s)4155 static inline void SVal__rcinc ( SVal s ) {
4156    if (SVal__isC(s)) {
4157       VtsID__rcinc( SVal__unC_Rmin(s) );
4158       VtsID__rcinc( SVal__unC_Wmin(s) );
4159    }
4160 }
4161 
4162 /* Direct callback from lib_zsm. */
SVal__rcdec(SVal s)4163 static inline void SVal__rcdec ( SVal s ) {
4164    if (SVal__isC(s)) {
4165       VtsID__rcdec( SVal__unC_Rmin(s) );
4166       VtsID__rcdec( SVal__unC_Wmin(s) );
4167    }
4168 }
4169 
SVal2Ptr(SVal s)4170 static inline void *SVal2Ptr (SVal s)
4171 {
4172    return (void*)(UWord)s;
4173 }
4174 
Ptr2SVal(void * ptr)4175 static inline SVal Ptr2SVal (void* ptr)
4176 {
4177    return (SVal)(UWord)ptr;
4178 }
4179 
4180 
4181 
4182 /////////////////////////////////////////////////////////
4183 //                                                     //
4184 // Change-event map2                                   //
4185 //                                                     //
4186 /////////////////////////////////////////////////////////
4187 
4188 /* This is in two parts:
4189 
4190    1. A hash table of RCECs.  This is a set of reference-counted stack
4191       traces.  When the reference count of a stack trace becomes zero,
4192       it is removed from the set and freed up.  The intent is to have
4193       a set of stack traces which can be referred to from (2), but to
4194       only represent each one once.  The set is indexed/searched by
4195       ordering on the stack trace vectors.
4196 
4197    2. A Hash table of OldRefs.  These store information about each old
4198       ref that we need to record.  Hash table key is the address of the
4199       location for which the information is recorded.  For LRU
4200       purposes, each OldRef in the hash table is also on a doubly
4201       linked list maintaining the order in which the OldRef were most
4202       recently accessed.
4203       Each OldRef also maintains the stamp at which it was last accessed.
4204       With these stamps, we can quickly check which of 2 OldRef is the
4205       'newest', without having to scan the full list of LRU OldRef.
4206 
4207       The important part of an OldRef is, however, its acc component.
4208       This binds a TSW triple (thread, size, R/W) to an RCEC.
4209 
4210       We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4211       Then we do exact LRU discarding.  For each discarded OldRef we must
4212       of course decrement the reference count on the RCEC it
4213       refers to, in order that entries from (1) eventually get
4214       discarded too.
4215 */
4216 
4217 static UWord stats__evm__lookup_found = 0;
4218 static UWord stats__evm__lookup_notfound = 0;
4219 
4220 static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4221 static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4222 static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4223 static UWord stats__ctxt_rcdec_calls = 0;
4224 static UWord stats__ctxt_rcec_gc_discards = 0;
4225 
4226 static UWord stats__ctxt_tab_curr = 0;
4227 static UWord stats__ctxt_tab_max  = 0;
4228 
4229 static UWord stats__ctxt_tab_qs   = 0;
4230 static UWord stats__ctxt_tab_cmps = 0;
4231 
4232 
4233 ///////////////////////////////////////////////////////
4234 //// Part (1): A hash table of RCECs
4235 ///
4236 
4237 #define N_FRAMES 8
4238 
4239 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
4240 #define RCEC_MAGIC 0xab88abb2UL
4241 
4242 //#define N_RCEC_TAB 98317 /* prime */
4243 #define N_RCEC_TAB 196613 /* prime */
4244 
4245 typedef
4246    struct _RCEC {
4247       UWord magic;  /* sanity check only */
4248       struct _RCEC* next;
4249       UWord rc;
4250       UWord rcX; /* used for crosschecking */
4251       UWord frames_hash;          /* hash of all the frames */
4252       UWord frames[N_FRAMES];
4253    }
4254    RCEC;
4255 
4256 //////////// BEGIN RCEC pool allocator
4257 static PoolAlloc* rcec_pool_allocator;
alloc_RCEC(void)4258 static RCEC* alloc_RCEC ( void ) {
4259    return VG_(allocEltPA) ( rcec_pool_allocator );
4260 }
4261 
free_RCEC(RCEC * rcec)4262 static void free_RCEC ( RCEC* rcec ) {
4263    tl_assert(rcec->magic == RCEC_MAGIC);
4264    VG_(freeEltPA)( rcec_pool_allocator, rcec );
4265 }
4266 //////////// END RCEC pool allocator
4267 
4268 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4269 
4270 /* Count of allocated RCEC having ref count > 0 */
4271 static UWord RCEC_referenced = 0;
4272 
4273 /* Gives an arbitrary total order on RCEC .frames fields */
RCEC__cmp_by_frames(RCEC * ec1,RCEC * ec2)4274 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4275    Word i;
4276    tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4277    tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
4278    if (ec1->frames_hash < ec2->frames_hash) return -1;
4279    if (ec1->frames_hash > ec2->frames_hash) return  1;
4280    for (i = 0; i < N_FRAMES; i++) {
4281       if (ec1->frames[i] < ec2->frames[i]) return -1;
4282       if (ec1->frames[i] > ec2->frames[i]) return  1;
4283    }
4284    return 0;
4285 }
4286 
4287 
4288 /* Dec the ref of this RCEC. */
ctxt__rcdec(RCEC * ec)4289 static void ctxt__rcdec ( RCEC* ec )
4290 {
4291    stats__ctxt_rcdec_calls++;
4292    tl_assert(ec && ec->magic == RCEC_MAGIC);
4293    tl_assert(ec->rc > 0);
4294    ec->rc--;
4295    if (ec->rc == 0)
4296       RCEC_referenced--;
4297 }
4298 
ctxt__rcinc(RCEC * ec)4299 static void ctxt__rcinc ( RCEC* ec )
4300 {
4301    tl_assert(ec && ec->magic == RCEC_MAGIC);
4302    if (ec->rc == 0)
4303       RCEC_referenced++;
4304    ec->rc++;
4305 }
4306 
4307 
4308 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4309    move it one step closer to the front of the list, so as to make
4310    subsequent searches for it cheaper. */
move_RCEC_one_step_forward(RCEC ** headp,RCEC * ec)4311 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4312 {
4313    RCEC *ec0, *ec1, *ec2;
4314    if (ec == *headp)
4315       tl_assert(0); /* already at head of list */
4316    tl_assert(ec != NULL);
4317    ec0 = *headp;
4318    ec1 = NULL;
4319    ec2 = NULL;
4320    while (True) {
4321       if (ec0 == NULL || ec0 == ec) break;
4322       ec2 = ec1;
4323       ec1 = ec0;
4324       ec0 = ec0->next;
4325    }
4326    tl_assert(ec0 == ec);
4327    if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4328       RCEC* tmp;
4329       /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4330          predecessor.  Swap ec0 and ec1, that is, move ec0 one step
4331          closer to the start of the list. */
4332       tl_assert(ec2->next == ec1);
4333       tl_assert(ec1->next == ec0);
4334       tmp = ec0->next;
4335       ec2->next = ec0;
4336       ec0->next = ec1;
4337       ec1->next = tmp;
4338    }
4339    else
4340    if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4341       /* it's second in the list. */
4342       tl_assert(*headp == ec1);
4343       tl_assert(ec1->next == ec0);
4344       ec1->next = ec0->next;
4345       ec0->next = ec1;
4346       *headp = ec0;
4347    }
4348 }
4349 
4350 
4351 /* Find the given RCEC in the tree, and return a pointer to it.  Or,
4352    if not present, add the given one to the tree (by making a copy of
4353    it, so the caller can immediately deallocate the original) and
4354    return a pointer to the copy.  The caller can safely have 'example'
4355    on its stack, since we will always return a pointer to a copy of
4356    it, not to the original.  Note that the inserted node will have .rc
4357    of zero and so the caller must immediately increment it. */
4358 __attribute__((noinline))
ctxt__find_or_add(RCEC * example)4359 static RCEC* ctxt__find_or_add ( RCEC* example )
4360 {
4361    UWord hent;
4362    RCEC* copy;
4363    tl_assert(example && example->magic == RCEC_MAGIC);
4364    tl_assert(example->rc == 0);
4365 
4366    /* Search the hash table to see if we already have it. */
4367    stats__ctxt_tab_qs++;
4368    hent = example->frames_hash % N_RCEC_TAB;
4369    copy = contextTab[hent];
4370    while (1) {
4371       if (!copy) break;
4372       tl_assert(copy->magic == RCEC_MAGIC);
4373       stats__ctxt_tab_cmps++;
4374       if (0 == RCEC__cmp_by_frames(copy, example)) break;
4375       copy = copy->next;
4376    }
4377 
4378    if (copy) {
4379       tl_assert(copy != example);
4380       /* optimisation: if it's not at the head of its list, move 1
4381          step fwds, to make future searches cheaper */
4382       if (copy != contextTab[hent]) {
4383          move_RCEC_one_step_forward( &contextTab[hent], copy );
4384       }
4385    } else {
4386       copy = alloc_RCEC();
4387       tl_assert(copy != example);
4388       *copy = *example;
4389       copy->next = contextTab[hent];
4390       contextTab[hent] = copy;
4391       stats__ctxt_tab_curr++;
4392       if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4393          stats__ctxt_tab_max = stats__ctxt_tab_curr;
4394    }
4395    return copy;
4396 }
4397 
ROLW(UWord w,Int n)4398 static inline UWord ROLW ( UWord w, Int n )
4399 {
4400    Int bpw = 8 * sizeof(UWord);
4401    w = (w << n) | (w >> (bpw-n));
4402    return w;
4403 }
4404 
4405 __attribute__((noinline))
get_RCEC(Thr * thr)4406 static RCEC* get_RCEC ( Thr* thr )
4407 {
4408    UWord hash, i;
4409    RCEC  example;
4410    example.magic = RCEC_MAGIC;
4411    example.rc = 0;
4412    example.rcX = 0;
4413    example.next = NULL;
4414    main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
4415    hash = 0;
4416    for (i = 0; i < N_FRAMES; i++) {
4417       hash ^= example.frames[i];
4418       hash = ROLW(hash, 19);
4419    }
4420    example.frames_hash = hash;
4421    return ctxt__find_or_add( &example );
4422 }
4423 
4424 ///////////////////////////////////////////////////////
4425 //// Part (2):
4426 ///  A hashtable guest-addr -> OldRef, that refers to (1)
4427 ///  Note: we use the guest address as key. This means that the entries
4428 ///  for multiple threads accessing the same address will land in the same
4429 ///  bucket. It might be nice to have a better distribution of the
4430 ///  OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4431 ///  The problem is that when a race is reported on a ga, we need to retrieve
4432 ///  efficiently the accesses to ga by other threads, only using the ga.
4433 ///  Measurements on firefox have shown that the chain length is reasonable.
4434 
4435 /* Records an access: a thread, a context (size & writeness) and the
4436    number of held locks. The size (1,2,4,8) is stored as is in szB.
4437    Note that szB uses more bits than needed to store a size up to 8.
4438    This allows to use a TSW as a fully initialised UInt e.g. in
4439    cmp_oldref_tsw. If needed, a more compact representation of szB
4440    can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4441    size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4442 typedef
4443    struct {
4444       UInt      thrid  : SCALARTS_N_THRBITS;
4445       UInt      szB    : 32 - SCALARTS_N_THRBITS - 1;
4446       UInt      isW    : 1;
4447    } TSW; // Thread+Size+Writeness
4448 typedef
4449    struct {
4450       TSW       tsw;
4451       WordSetID locksHeldW;
4452       RCEC*     rcec;
4453    }
4454    Thr_n_RCEC;
4455 
4456 typedef
4457    struct OldRef {
4458       struct OldRef *ht_next; // to link hash table nodes together.
4459       UWord  ga; // hash_table key, == address for which we record an access.
4460       struct OldRef *prev; // to refs older than this one
4461       struct OldRef *next; // to refs newer that this one
4462       UWord stamp; // allows to order (by time of access) 2 OldRef
4463       Thr_n_RCEC acc;
4464    }
4465    OldRef;
4466 
4467 /* Returns the or->tsw as an UInt */
oldref_tsw(const OldRef * or)4468 static inline UInt oldref_tsw (const OldRef* or)
4469 {
4470    return *(const UInt*)(&or->acc.tsw);
4471 }
4472 
4473 /* Compare the tsw component for 2 OldRef.
4474    Used for OldRef hashtable (which already verifies equality of the
4475    'key' part. */
cmp_oldref_tsw(const void * node1,const void * node2)4476 static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4477 {
4478    const UInt tsw1 = oldref_tsw(node1);
4479    const UInt tsw2 = oldref_tsw(node2);
4480 
4481    if (tsw1 < tsw2) return -1;
4482    if (tsw1 > tsw2) return  1;
4483    return 0;
4484 }
4485 
4486 
4487 //////////// BEGIN OldRef pool allocator
4488 static PoolAlloc* oldref_pool_allocator;
4489 // Note: We only allocate elements in this pool allocator, we never free them.
4490 // We stop allocating elements at VG_(clo_conflict_cache_size).
4491 //////////// END OldRef pool allocator
4492 
4493 static OldRef mru;
4494 static OldRef lru;
4495 // A double linked list, chaining all OldREf in a mru/lru order.
4496 // mru/lru are sentinel nodes.
4497 // Whenever an oldref is re-used, its position is changed as the most recently
4498 // used (i.e. pointed to by mru.prev).
4499 // When a new oldref is needed, it is allocated from the pool
4500 //  if we have not yet reached --conflict-cache-size.
4501 // Otherwise, if all oldref have already been allocated,
4502 // the least recently used (i.e. pointed to by lru.next) is re-used.
4503 // When an OldRef is used, it is moved as the most recently used entry
4504 // (i.e. pointed to by mru.prev).
4505 
4506 // Removes r from the double linked list
4507 // Note: we do not need to test for special cases such as
4508 // NULL next or prev pointers, because we have sentinel nodes
4509 // at both sides of the list. So, a node is always forward and
4510 // backward linked.
OldRef_unchain(OldRef * r)4511 static inline void OldRef_unchain(OldRef *r)
4512 {
4513    r->next->prev = r->prev;
4514    r->prev->next = r->next;
4515 }
4516 
4517 // Insert new as the newest OldRef
4518 // Similarly to OldRef_unchain, no need to test for NULL
4519 // pointers, as e.g. mru.prev is always guaranteed to point
4520 // to a non NULL node (lru when the list is empty).
OldRef_newest(OldRef * new)4521 static inline void OldRef_newest(OldRef *new)
4522 {
4523    new->next = &mru;
4524    new->prev = mru.prev;
4525    mru.prev = new;
4526    new->prev->next = new;
4527 }
4528 
4529 
4530 static VgHashTable* oldrefHT    = NULL; /* Hash table* OldRef* */
4531 static UWord     oldrefHTN    = 0;    /* # elems in oldrefHT */
4532 /* Note: the nr of ref in the oldrefHT will always be equal to
4533    the nr of elements that were allocated from the OldRef pool allocator
4534    as we never free an OldRef : we just re-use them. */
4535 
4536 
4537 /* allocates a new OldRef or re-use the lru one if all allowed OldRef
4538    have already been allocated. */
alloc_or_reuse_OldRef(void)4539 static OldRef* alloc_or_reuse_OldRef ( void )
4540 {
4541    if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4542       oldrefHTN++;
4543       return VG_(allocEltPA) ( oldref_pool_allocator );
4544    } else {
4545       OldRef *oldref_ht;
4546       OldRef *oldref = lru.next;
4547 
4548       OldRef_unchain(oldref);
4549       oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4550       tl_assert (oldref == oldref_ht);
4551       ctxt__rcdec( oldref->acc.rcec );
4552       return oldref;
4553    }
4554 }
4555 
4556 
min_UInt(UInt a,UInt b)4557 inline static UInt min_UInt ( UInt a, UInt b ) {
4558    return a < b ? a : b;
4559 }
4560 
4561 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
4562    first interval is lower, 1 if the first interval is higher, and 0
4563    if there is any overlap.  Redundant paranoia with casting is there
4564    following what looked distinctly like a bug in gcc-4.1.2, in which
4565    some of the comparisons were done signedly instead of
4566    unsignedly. */
4567 /* Copied from exp-ptrcheck/sg_main.c */
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)4568 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4569                                             Addr a2, SizeT n2 ) {
4570    UWord a1w = (UWord)a1;
4571    UWord n1w = (UWord)n1;
4572    UWord a2w = (UWord)a2;
4573    UWord n2w = (UWord)n2;
4574    tl_assert(n1w > 0 && n2w > 0);
4575    if (a1w + n1w <= a2w) return -1L;
4576    if (a2w + n2w <= a1w) return 1L;
4577    return 0;
4578 }
4579 
4580 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4581 
event_map_bind(Addr a,SizeT szB,Bool isW,Thr * thr)4582 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4583 {
4584    OldRef  example;
4585    OldRef* ref;
4586    RCEC*   rcec;
4587 
4588    tl_assert(thr);
4589    ThrID thrid = thr->thrid;
4590    tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4591 
4592    WordSetID locksHeldW = thr->hgthread->locksetW;
4593 
4594    rcec = get_RCEC( thr );
4595 
4596    tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4597    // Check for most frequent cases first
4598    // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4599 
4600    /* Look in the oldrefHT to see if we already have a record for this
4601       address/thr/sz/isW. */
4602    example.ga = a;
4603    example.acc.tsw = (TSW) {.thrid = thrid,
4604                             .szB = szB,
4605                             .isW = (UInt)(isW & 1)};
4606    ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4607 
4608    if (ref) {
4609       /* We already have a record for this address and this (thrid, R/W,
4610          size) triple. */
4611       tl_assert (ref->ga == a);
4612 
4613       /* thread 'thr' has an entry.  Update its RCEC, if it differs. */
4614       if (rcec == ref->acc.rcec)
4615          stats__ctxt_eq_tsw_eq_rcec++;
4616       else {
4617          stats__ctxt_eq_tsw_neq_rcec++;
4618          ctxt__rcdec( ref->acc.rcec );
4619          ctxt__rcinc(rcec);
4620          ref->acc.rcec       = rcec;
4621       }
4622       tl_assert(ref->acc.tsw.thrid == thrid);
4623       /* Update the stamp, RCEC and the W-held lockset. */
4624       ref->stamp = event_map_stamp;
4625       ref->acc.locksHeldW = locksHeldW;
4626 
4627       OldRef_unchain(ref);
4628       OldRef_newest(ref);
4629 
4630    } else {
4631       /* We don't have a record for this address+triple.  Create a new one. */
4632       stats__ctxt_neq_tsw_neq_rcec++;
4633       ref = alloc_or_reuse_OldRef();
4634       ref->ga = a;
4635       ref->acc.tsw = (TSW) {.thrid  = thrid,
4636                             .szB    = szB,
4637                             .isW    = (UInt)(isW & 1)};
4638       ref->stamp = event_map_stamp;
4639       ref->acc.locksHeldW = locksHeldW;
4640       ref->acc.rcec       = rcec;
4641       ctxt__rcinc(rcec);
4642 
4643       VG_(HT_add_node) ( oldrefHT, ref );
4644       OldRef_newest (ref);
4645    }
4646    event_map_stamp++;
4647 }
4648 
4649 
4650 /* Extract info from the conflicting-access machinery.
4651    Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
libhb_event_map_lookup(ExeContext ** resEC,Thr ** resThr,SizeT * resSzB,Bool * resIsW,WordSetID * locksHeldW,Thr * thr,Addr a,SizeT szB,Bool isW)4652 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4653                               /*OUT*/Thr**        resThr,
4654                               /*OUT*/SizeT*       resSzB,
4655                               /*OUT*/Bool*        resIsW,
4656                               /*OUT*/WordSetID*   locksHeldW,
4657                               Thr* thr, Addr a, SizeT szB, Bool isW )
4658 {
4659    Word    i, j;
4660    OldRef *ref = NULL;
4661    SizeT  ref_szB = 0;
4662 
4663    OldRef *cand_ref;
4664    SizeT  cand_ref_szB;
4665    Addr   cand_a;
4666 
4667    Addr toCheck[15];
4668    Int  nToCheck = 0;
4669 
4670    tl_assert(thr);
4671    tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4672 
4673    ThrID thrid = thr->thrid;
4674 
4675    toCheck[nToCheck++] = a;
4676    for (i = -7; i < (Word)szB; i++) {
4677       if (i != 0)
4678          toCheck[nToCheck++] = a + i;
4679    }
4680    tl_assert(nToCheck <= 15);
4681 
4682    /* Now see if we can find a suitable matching event for
4683       any of the addresses in toCheck[0 .. nToCheck-1]. */
4684    for (j = 0; j < nToCheck; j++) {
4685 
4686       cand_a = toCheck[j];
4687       //      VG_(printf)("test %ld %p\n", j, cand_a);
4688 
4689       /* Find the first HT element for this address.
4690          We might have several of these. They will be linked via ht_next.
4691          We however need to check various elements as the list contains
4692          all elements that map to the same bucket. */
4693       for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
4694            cand_ref; cand_ref = cand_ref->ht_next) {
4695          if (cand_ref->ga != cand_a)
4696             /* OldRef for another address in this HT bucket. Ignore. */
4697             continue;
4698 
4699          if (cand_ref->acc.tsw.thrid == thrid)
4700             /* This is an access by the same thread, but we're only
4701                interested in accesses from other threads.  Ignore. */
4702             continue;
4703 
4704          if ((!cand_ref->acc.tsw.isW) && (!isW))
4705             /* We don't want to report a read racing against another
4706                read; that's stupid.  So in this case move on. */
4707             continue;
4708 
4709          cand_ref_szB        = cand_ref->acc.tsw.szB;
4710          if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
4711             /* No overlap with the access we're asking about.  Ignore. */
4712             continue;
4713 
4714          /* We have a match. Keep this match if it is newer than
4715             the previous match. Note that stamp are Unsigned Words, and
4716             for long running applications, event_map_stamp might have cycled.
4717             So, 'roll' each stamp using event_map_stamp to have the
4718             stamps in the good order, in case event_map_stamp recycled. */
4719          if (!ref
4720              || (ref->stamp - event_map_stamp)
4721                    < (cand_ref->stamp - event_map_stamp)) {
4722             ref = cand_ref;
4723             ref_szB = cand_ref_szB;
4724          }
4725       }
4726 
4727       if (ref) {
4728          /* return with success */
4729          Int n, maxNFrames;
4730          RCEC*     ref_rcec = ref->acc.rcec;
4731          tl_assert(ref->acc.tsw.thrid);
4732          tl_assert(ref_rcec);
4733          tl_assert(ref_rcec->magic == RCEC_MAGIC);
4734          tl_assert(ref_szB >= 1);
4735          /* Count how many non-zero frames we have. */
4736          maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4737          for (n = 0; n < maxNFrames; n++) {
4738             if (0 == ref_rcec->frames[n]) break;
4739          }
4740          *resEC      = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames,
4741                                                             n);
4742          *resThr     = Thr__from_ThrID(ref->acc.tsw.thrid);
4743          *resSzB     = ref_szB;
4744          *resIsW     = ref->acc.tsw.isW;
4745          *locksHeldW = ref->acc.locksHeldW;
4746          stats__evm__lookup_found++;
4747          return True;
4748       }
4749 
4750       /* consider next address in toCheck[] */
4751    } /* for (j = 0; j < nToCheck; j++) */
4752 
4753    /* really didn't find anything. */
4754    stats__evm__lookup_notfound++;
4755    return False;
4756 }
4757 
4758 
libhb_event_map_access_history(Addr a,SizeT szB,Access_t fn)4759 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
4760 {
4761    OldRef *ref = lru.next;
4762    SizeT ref_szB;
4763    Int n;
4764 
4765    while (ref != &mru) {
4766       ref_szB = ref->acc.tsw.szB;
4767       if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
4768          RCEC* ref_rcec = ref->acc.rcec;
4769          for (n = 0; n < N_FRAMES; n++) {
4770             if (0 == ref_rcec->frames[n]) {
4771                break;
4772             }
4773          }
4774          (*fn)(ref_rcec->frames, n,
4775                Thr__from_ThrID(ref->acc.tsw.thrid),
4776                ref->ga,
4777                ref_szB,
4778                ref->acc.tsw.isW,
4779                ref->acc.locksHeldW);
4780       }
4781       tl_assert (ref->next == &mru
4782                  || ((ref->stamp - event_map_stamp)
4783                         < ref->next->stamp - event_map_stamp));
4784       ref = ref->next;
4785    }
4786 }
4787 
event_map_init(void)4788 static void event_map_init ( void )
4789 {
4790    Word i;
4791 
4792    /* Context (RCEC) pool allocator */
4793    rcec_pool_allocator = VG_(newPA) (
4794                              sizeof(RCEC),
4795                              1000 /* RCECs per pool */,
4796                              HG_(zalloc),
4797                              "libhb.event_map_init.1 (RCEC pools)",
4798                              HG_(free)
4799                           );
4800 
4801    /* Context table */
4802    tl_assert(!contextTab);
4803    contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
4804                              N_RCEC_TAB * sizeof(RCEC*) );
4805    for (i = 0; i < N_RCEC_TAB; i++)
4806       contextTab[i] = NULL;
4807 
4808    /* Oldref pool allocator */
4809    oldref_pool_allocator = VG_(newPA)(
4810                                sizeof(OldRef),
4811                                1000 /* OldRefs per pool */,
4812                                HG_(zalloc),
4813                                "libhb.event_map_init.3 (OldRef pools)",
4814                                HG_(free)
4815                             );
4816 
4817    /* Oldref hashtable */
4818    tl_assert(!oldrefHT);
4819    oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
4820 
4821    oldrefHTN = 0;
4822    mru.prev = &lru;
4823    mru.next = NULL;
4824    lru.prev = NULL;
4825    lru.next = &mru;
4826    mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
4827                                    .szB = 0,
4828                                    .isW = 0},
4829                            .locksHeldW = 0,
4830                            .rcec = NULL};
4831    lru.acc = mru.acc;
4832 }
4833 
event_map__check_reference_counts(void)4834 static void event_map__check_reference_counts ( void )
4835 {
4836    RCEC*   rcec;
4837    OldRef* oldref;
4838    Word    i;
4839    UWord   nEnts = 0;
4840 
4841    /* Set the 'check' reference counts to zero.  Also, optionally
4842       check that the real reference counts are non-zero.  We allow
4843       these to fall to zero before a GC, but the GC must get rid of
4844       all those that are zero, hence none should be zero after a
4845       GC. */
4846    for (i = 0; i < N_RCEC_TAB; i++) {
4847       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4848          nEnts++;
4849          tl_assert(rcec);
4850          tl_assert(rcec->magic == RCEC_MAGIC);
4851          rcec->rcX = 0;
4852       }
4853    }
4854 
4855    /* check that the stats are sane */
4856    tl_assert(nEnts == stats__ctxt_tab_curr);
4857    tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4858 
4859    /* visit all the referencing points, inc check ref counts */
4860    VG_(HT_ResetIter)( oldrefHT );
4861    oldref = VG_(HT_Next)( oldrefHT );
4862    while (oldref) {
4863       tl_assert (oldref->acc.tsw.thrid);
4864       tl_assert (oldref->acc.rcec);
4865       tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
4866       oldref->acc.rcec->rcX++;
4867       oldref = VG_(HT_Next)( oldrefHT );
4868    }
4869 
4870    /* compare check ref counts with actual */
4871    for (i = 0; i < N_RCEC_TAB; i++) {
4872       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4873          tl_assert(rcec->rc == rcec->rcX);
4874       }
4875    }
4876 }
4877 
4878 __attribute__((noinline))
do_RCEC_GC(void)4879 static void do_RCEC_GC ( void )
4880 {
4881    UInt i;
4882 
4883    if (VG_(clo_stats)) {
4884       static UInt ctr = 1;
4885       VG_(message)(Vg_DebugMsg,
4886                   "libhb: RCEC GC: #%u  %lu slots,"
4887                    " %lu cur ents(ref'd %lu),"
4888                    " %lu max ents\n",
4889                    ctr++,
4890                    (UWord)N_RCEC_TAB,
4891                    stats__ctxt_tab_curr, RCEC_referenced,
4892                    stats__ctxt_tab_max );
4893    }
4894    tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
4895 
4896    /* Throw away all RCECs with zero reference counts */
4897    for (i = 0; i < N_RCEC_TAB; i++) {
4898       RCEC** pp = &contextTab[i];
4899       RCEC*  p  = *pp;
4900       while (p) {
4901          if (p->rc == 0) {
4902             *pp = p->next;
4903             free_RCEC(p);
4904             p = *pp;
4905             tl_assert(stats__ctxt_tab_curr > 0);
4906             stats__ctxt_rcec_gc_discards++;
4907             stats__ctxt_tab_curr--;
4908          } else {
4909             pp = &p->next;
4910             p = p->next;
4911          }
4912       }
4913    }
4914 
4915    tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
4916 }
4917 
4918 /////////////////////////////////////////////////////////
4919 //                                                     //
4920 // Core MSM                                            //
4921 //                                                     //
4922 /////////////////////////////////////////////////////////
4923 
4924 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4925    Nov 08, and again after [...],
4926    June 09. */
4927 
4928 static ULong stats__msmcread         = 0;
4929 static ULong stats__msmcread_change  = 0;
4930 static ULong stats__msmcwrite        = 0;
4931 static ULong stats__msmcwrite_change = 0;
4932 
4933 /* Some notes on the H1 history mechanism:
4934 
4935    Transition rules are:
4936 
4937    read_{Kr,Kw}(Cr,Cw)  = (Cr,           Cr `join` Kw)
4938    write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4939 
4940    After any access by a thread T to a location L, L's constraint pair
4941    (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4942 
4943    After a race by thread T conflicting with some previous access by
4944    some other thread U, for a location with constraint (before
4945    processing the later access) (Cr,Cw), then Cw[U] is the segment in
4946    which the previously access lies.
4947 
4948    Hence in record_race_info, we pass in Cfailed and Kfailed, which
4949    are compared so as to find out which thread(s) this access
4950    conflicts with.  Once that is established, we also require the
4951    pre-update Cw for the location, so we can index into it for those
4952    threads, to get the scalar clock values for the point at which the
4953    former accesses were made.  (In fact we only bother to do any of
4954    this for an arbitrarily chosen one of the conflicting threads, as
4955    that's simpler, it avoids flooding the user with vast amounts of
4956    mostly useless information, and because the program is wrong if it
4957    contains any races at all -- so we don't really need to show all
4958    conflicting access pairs initially, so long as we only show none if
4959    none exist).
4960 
4961    ---
4962 
4963    That requires the auxiliary proof that
4964 
4965       (Cr `join` Kw)[T] == Kw[T]
4966 
4967    Why should that be true?  Because for any thread T, Kw[T] >= the
4968    scalar clock value for T known by any other thread.  In other
4969    words, because T's value for its own scalar clock is at least as up
4970    to date as the value for it known by any other thread (that is true
4971    for both the R- and W- scalar clocks).  Hence no other thread will
4972    be able to feed in a value for that element (indirectly via a
4973    constraint) which will exceed Kw[T], and hence the join cannot
4974    cause that particular element to advance.
4975 */
4976 
4977 __attribute__((noinline))
record_race_info(Thr * acc_thr,Addr acc_addr,SizeT szB,Bool isWrite,VtsID Cfailed,VtsID Kfailed,VtsID Cw)4978 static void record_race_info ( Thr* acc_thr,
4979                                Addr acc_addr, SizeT szB, Bool isWrite,
4980                                VtsID Cfailed,
4981                                VtsID Kfailed,
4982                                VtsID Cw )
4983 {
4984    /* Call here to report a race.  We just hand it onwards to
4985       HG_(record_error_Race).  If that in turn discovers that the
4986       error is going to be collected, then, at history_level 2, that
4987       queries the conflicting-event map.  The alternative would be to
4988       query it right here.  But that causes a lot of pointless queries
4989       for errors which will shortly be discarded as duplicates, and
4990       can become a performance overhead; so we defer the query until
4991       we know the error is not a duplicate. */
4992 
4993    /* Stacks for the bounds of the (or one of the) conflicting
4994       segment(s).  These are only set at history_level 1. */
4995    ExeContext* hist1_seg_start = NULL;
4996    ExeContext* hist1_seg_end   = NULL;
4997    Thread*     hist1_conf_thr  = NULL;
4998 
4999    tl_assert(acc_thr);
5000    tl_assert(acc_thr->hgthread);
5001    tl_assert(acc_thr->hgthread->hbthr == acc_thr);
5002    tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5003 
5004    if (HG_(clo_history_level) == 1) {
5005       Bool found;
5006       Word firstIx, lastIx;
5007       ULong_n_EC key;
5008 
5009       /* At history_level 1, we must round up the relevant stack-pair
5010          for the conflicting segment right now.  This is because
5011          deferring it is complex; we can't (easily) put Kfailed and
5012          Cfailed into the XError and wait for later without
5013          getting tied up in difficulties with VtsID reference
5014          counting.  So just do it now. */
5015       Thr*  confThr;
5016       ULong confTym = 0;
5017       /* Which thread are we in conflict with?  There may be more than
5018          one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5019          (in fact it's the one with the lowest Thr* value). */
5020       confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
5021       /* This must exist!  since if it was NULL then there's no
5022          conflict (semantics of return value of
5023          VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5024          called us, just checked exactly this -- that there was in
5025          fact a race. */
5026       tl_assert(confThr);
5027 
5028       /* Get the scalar clock value that the conflicting thread
5029          introduced into the constraint.  A careful examination of the
5030          base machine rules shows that this must be the same as the
5031          conflicting thread's scalar clock when it created this
5032          constraint.  Hence we know the scalar clock of the
5033          conflicting thread when the conflicting access was made. */
5034       confTym = VtsID__indexAt( Cfailed, confThr );
5035 
5036       /* Using this scalar clock, index into the conflicting thread's
5037          collection of stack traces made each time its vector clock
5038          (hence its scalar clock) changed.  This gives the stack
5039          traces at the start and end of the conflicting segment (well,
5040          as per comment just above, of one of the conflicting
5041          segments, if there are more than one). */
5042       key.ull = confTym;
5043       key.ec  = NULL;
5044       /* tl_assert(confThr); -- asserted just above */
5045       tl_assert(confThr->local_Kws_n_stacks);
5046       firstIx = lastIx = 0;
5047       found = VG_(lookupXA_UNSAFE)(
5048                  confThr->local_Kws_n_stacks,
5049                  &key, &firstIx, &lastIx,
5050                  (XACmpFn_t)cmp__ULong_n_EC__by_ULong
5051               );
5052       if (0) VG_(printf)("record_race_info %u %u %u  confThr %p "
5053                          "confTym %llu found %d (%ld,%ld)\n",
5054                          Cfailed, Kfailed, Cw,
5055                          confThr, confTym, found, firstIx, lastIx);
5056       /* We can't indefinitely collect stack traces at VTS
5057          transitions, since we'd eventually run out of memory.  Hence
5058          note_local_Kw_n_stack_for will eventually throw away old
5059          ones, which in turn means we might fail to find index value
5060          confTym in the array. */
5061       if (found) {
5062          ULong_n_EC *pair_start, *pair_end;
5063          pair_start
5064             = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
5065          hist1_seg_start = pair_start->ec;
5066          if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
5067             pair_end
5068                = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
5069                                             lastIx+1 );
5070             /* from properties of VG_(lookupXA) and the comparison fn used: */
5071             tl_assert(pair_start->ull < pair_end->ull);
5072             hist1_seg_end = pair_end->ec;
5073             /* Could do a bit better here.  It may be that pair_end
5074                doesn't have a stack, but the following entries in the
5075                array have the same scalar Kw and to have a stack.  So
5076                we should search a bit further along the array than
5077                lastIx+1 if hist1_seg_end is NULL. */
5078          } else {
5079             if (!confThr->llexit_done)
5080                hist1_seg_end = main_get_EC( confThr );
5081          }
5082          // seg_start could be NULL iff this is the first stack in the thread
5083          //if (seg_start) VG_(pp_ExeContext)(seg_start);
5084          //if (seg_end)   VG_(pp_ExeContext)(seg_end);
5085          hist1_conf_thr = confThr->hgthread;
5086       }
5087    }
5088 
5089    HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
5090                            szB, isWrite,
5091                            hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5092 }
5093 
is_sane_SVal_C(SVal sv)5094 static Bool is_sane_SVal_C ( SVal sv ) {
5095    Bool leq;
5096    if (!SVal__isC(sv)) return True;
5097    leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5098    return leq;
5099 }
5100 
5101 
5102 /* Compute new state following a read */
msmcread(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)5103 static inline SVal msmcread ( SVal svOld,
5104                               /* The following are only needed for
5105                                  creating error reports. */
5106                               Thr* acc_thr,
5107                               Addr acc_addr, SizeT szB )
5108 {
5109    SVal svNew = SVal_INVALID;
5110    stats__msmcread++;
5111 
5112    /* Redundant sanity check on the constraints */
5113    if (CHECK_MSM) {
5114       tl_assert(is_sane_SVal_C(svOld));
5115    }
5116 
5117    if (LIKELY(SVal__isC(svOld))) {
5118       VtsID tviR  = acc_thr->viR;
5119       VtsID tviW  = acc_thr->viW;
5120       VtsID rmini = SVal__unC_Rmin(svOld);
5121       VtsID wmini = SVal__unC_Wmin(svOld);
5122       Bool  leq   = VtsID__cmpLEQ(rmini,tviR);
5123       if (LIKELY(leq)) {
5124          /* no race */
5125          /* Note: RWLOCK subtlety: use tviW, not tviR */
5126          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5127          goto out;
5128       } else {
5129          /* assert on sanity of constraints. */
5130          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5131          tl_assert(leqxx);
5132          // same as in non-race case
5133          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5134          record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
5135                            rmini, /* Cfailed */
5136                            tviR,  /* Kfailed */
5137                            wmini  /* Cw */ );
5138          goto out;
5139       }
5140    }
5141    if (SVal__isA(svOld)) {
5142       /* reading no-access memory (sigh); leave unchanged */
5143       /* check for no pollution */
5144       tl_assert(svOld == SVal_NOACCESS);
5145       svNew = SVal_NOACCESS;
5146       goto out;
5147    }
5148    if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5149    tl_assert(0);
5150 
5151   out:
5152    if (CHECK_MSM) {
5153       tl_assert(is_sane_SVal_C(svNew));
5154    }
5155    if (UNLIKELY(svNew != svOld)) {
5156       tl_assert(svNew != SVal_INVALID);
5157       if (HG_(clo_history_level) >= 2
5158           && SVal__isC(svOld) && SVal__isC(svNew)) {
5159          event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5160          stats__msmcread_change++;
5161       }
5162    }
5163    return svNew;
5164 }
5165 
5166 
5167 /* Compute new state following a write */
msmcwrite(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)5168 static inline SVal msmcwrite ( SVal svOld,
5169                               /* The following are only needed for
5170                                  creating error reports. */
5171                               Thr* acc_thr,
5172                               Addr acc_addr, SizeT szB )
5173 {
5174    SVal svNew = SVal_INVALID;
5175    stats__msmcwrite++;
5176 
5177    /* Redundant sanity check on the constraints */
5178    if (CHECK_MSM) {
5179       tl_assert(is_sane_SVal_C(svOld));
5180    }
5181 
5182    if (LIKELY(SVal__isC(svOld))) {
5183       VtsID tviW  = acc_thr->viW;
5184       VtsID wmini = SVal__unC_Wmin(svOld);
5185       Bool  leq   = VtsID__cmpLEQ(wmini,tviW);
5186       if (LIKELY(leq)) {
5187          /* no race */
5188          svNew = SVal__mkC( tviW, tviW );
5189          goto out;
5190       } else {
5191          VtsID rmini = SVal__unC_Rmin(svOld);
5192          /* assert on sanity of constraints. */
5193          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5194          tl_assert(leqxx);
5195          // same as in non-race case
5196          // proof: in the non-race case, we have
5197          //    rmini <= wmini (invar on constraints)
5198          //    tviW <= tviR (invar on thread clocks)
5199          //    wmini <= tviW (from run-time check)
5200          // hence from transitivity of <= we have
5201          //    rmini <= wmini <= tviW
5202          // and so join(rmini,tviW) == tviW
5203          // and    join(wmini,tviW) == tviW
5204          // qed.
5205          svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5206                             VtsID__join2(wmini, tviW) );
5207          record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5208                            wmini, /* Cfailed */
5209                            tviW,  /* Kfailed */
5210                            wmini  /* Cw */ );
5211          goto out;
5212       }
5213    }
5214    if (SVal__isA(svOld)) {
5215       /* writing no-access memory (sigh); leave unchanged */
5216       /* check for no pollution */
5217       tl_assert(svOld == SVal_NOACCESS);
5218       svNew = SVal_NOACCESS;
5219       goto out;
5220    }
5221    if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5222    tl_assert(0);
5223 
5224   out:
5225    if (CHECK_MSM) {
5226       tl_assert(is_sane_SVal_C(svNew));
5227    }
5228    if (UNLIKELY(svNew != svOld)) {
5229       tl_assert(svNew != SVal_INVALID);
5230       if (HG_(clo_history_level) >= 2
5231           && SVal__isC(svOld) && SVal__isC(svNew)) {
5232          event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5233          stats__msmcwrite_change++;
5234       }
5235    }
5236    return svNew;
5237 }
5238 
5239 
5240 /////////////////////////////////////////////////////////
5241 //                                                     //
5242 // Apply core MSM to specific memory locations         //
5243 //                                                     //
5244 /////////////////////////////////////////////////////////
5245 
5246 /*------------- ZSM accesses: 8 bit sapply ------------- */
5247 
zsm_sapply08__msmcread(Thr * thr,Addr a)5248 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5249    CacheLine* cl;
5250    UWord      cloff, tno, toff;
5251    SVal       svOld, svNew;
5252    UShort     descr;
5253    stats__cline_cread08s++;
5254    cl    = get_cacheline(a);
5255    cloff = get_cacheline_offset(a);
5256    tno   = get_treeno(a);
5257    toff  = get_tree_offset(a); /* == 0 .. 7 */
5258    descr = cl->descrs[tno];
5259    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5260       SVal* tree = &cl->svals[tno << 3];
5261       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5262       if (CHECK_ZSM)
5263          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5264    }
5265    svOld = cl->svals[cloff];
5266    svNew = msmcread( svOld, thr,a,1 );
5267    if (CHECK_ZSM)
5268       tl_assert(svNew != SVal_INVALID);
5269    cl->svals[cloff] = svNew;
5270 }
5271 
zsm_sapply08__msmcwrite(Thr * thr,Addr a)5272 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5273    CacheLine* cl;
5274    UWord      cloff, tno, toff;
5275    SVal       svOld, svNew;
5276    UShort     descr;
5277    stats__cline_cwrite08s++;
5278    cl    = get_cacheline(a);
5279    cloff = get_cacheline_offset(a);
5280    tno   = get_treeno(a);
5281    toff  = get_tree_offset(a); /* == 0 .. 7 */
5282    descr = cl->descrs[tno];
5283    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5284       SVal* tree = &cl->svals[tno << 3];
5285       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5286       if (CHECK_ZSM)
5287          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5288    }
5289    svOld = cl->svals[cloff];
5290    svNew = msmcwrite( svOld, thr,a,1 );
5291    if (CHECK_ZSM)
5292       tl_assert(svNew != SVal_INVALID);
5293    cl->svals[cloff] = svNew;
5294 }
5295 
5296 /*------------- ZSM accesses: 16 bit sapply ------------- */
5297 
zsm_sapply16__msmcread(Thr * thr,Addr a)5298 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5299    CacheLine* cl;
5300    UWord      cloff, tno, toff;
5301    SVal       svOld, svNew;
5302    UShort     descr;
5303    stats__cline_cread16s++;
5304    if (UNLIKELY(!aligned16(a))) goto slowcase;
5305    cl    = get_cacheline(a);
5306    cloff = get_cacheline_offset(a);
5307    tno   = get_treeno(a);
5308    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5309    descr = cl->descrs[tno];
5310    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5311       if (valid_value_is_below_me_16(descr, toff)) {
5312          goto slowcase;
5313       } else {
5314          SVal* tree = &cl->svals[tno << 3];
5315          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5316       }
5317       if (CHECK_ZSM)
5318          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5319    }
5320    svOld = cl->svals[cloff];
5321    svNew = msmcread( svOld, thr,a,2 );
5322    if (CHECK_ZSM)
5323       tl_assert(svNew != SVal_INVALID);
5324    cl->svals[cloff] = svNew;
5325    return;
5326   slowcase: /* misaligned, or must go further down the tree */
5327    stats__cline_16to8splits++;
5328    zsm_sapply08__msmcread( thr, a + 0 );
5329    zsm_sapply08__msmcread( thr, a + 1 );
5330 }
5331 
zsm_sapply16__msmcwrite(Thr * thr,Addr a)5332 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5333    CacheLine* cl;
5334    UWord      cloff, tno, toff;
5335    SVal       svOld, svNew;
5336    UShort     descr;
5337    stats__cline_cwrite16s++;
5338    if (UNLIKELY(!aligned16(a))) goto slowcase;
5339    cl    = get_cacheline(a);
5340    cloff = get_cacheline_offset(a);
5341    tno   = get_treeno(a);
5342    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5343    descr = cl->descrs[tno];
5344    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5345       if (valid_value_is_below_me_16(descr, toff)) {
5346          goto slowcase;
5347       } else {
5348          SVal* tree = &cl->svals[tno << 3];
5349          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5350       }
5351       if (CHECK_ZSM)
5352          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5353    }
5354    svOld = cl->svals[cloff];
5355    svNew = msmcwrite( svOld, thr,a,2 );
5356    if (CHECK_ZSM)
5357       tl_assert(svNew != SVal_INVALID);
5358    cl->svals[cloff] = svNew;
5359    return;
5360   slowcase: /* misaligned, or must go further down the tree */
5361    stats__cline_16to8splits++;
5362    zsm_sapply08__msmcwrite( thr, a + 0 );
5363    zsm_sapply08__msmcwrite( thr, a + 1 );
5364 }
5365 
5366 /*------------- ZSM accesses: 32 bit sapply ------------- */
5367 
zsm_sapply32__msmcread(Thr * thr,Addr a)5368 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5369    CacheLine* cl;
5370    UWord      cloff, tno, toff;
5371    SVal       svOld, svNew;
5372    UShort     descr;
5373    stats__cline_cread32s++;
5374    if (UNLIKELY(!aligned32(a))) goto slowcase;
5375    cl    = get_cacheline(a);
5376    cloff = get_cacheline_offset(a);
5377    tno   = get_treeno(a);
5378    toff  = get_tree_offset(a); /* == 0 or 4 */
5379    descr = cl->descrs[tno];
5380    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5381       if (valid_value_is_above_me_32(descr, toff)) {
5382          SVal* tree = &cl->svals[tno << 3];
5383          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5384       } else {
5385          goto slowcase;
5386       }
5387       if (CHECK_ZSM)
5388          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5389    }
5390    svOld = cl->svals[cloff];
5391    svNew = msmcread( svOld, thr,a,4 );
5392    if (CHECK_ZSM)
5393       tl_assert(svNew != SVal_INVALID);
5394    cl->svals[cloff] = svNew;
5395    return;
5396   slowcase: /* misaligned, or must go further down the tree */
5397    stats__cline_32to16splits++;
5398    zsm_sapply16__msmcread( thr, a + 0 );
5399    zsm_sapply16__msmcread( thr, a + 2 );
5400 }
5401 
zsm_sapply32__msmcwrite(Thr * thr,Addr a)5402 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5403    CacheLine* cl;
5404    UWord      cloff, tno, toff;
5405    SVal       svOld, svNew;
5406    UShort     descr;
5407    stats__cline_cwrite32s++;
5408    if (UNLIKELY(!aligned32(a))) goto slowcase;
5409    cl    = get_cacheline(a);
5410    cloff = get_cacheline_offset(a);
5411    tno   = get_treeno(a);
5412    toff  = get_tree_offset(a); /* == 0 or 4 */
5413    descr = cl->descrs[tno];
5414    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5415       if (valid_value_is_above_me_32(descr, toff)) {
5416          SVal* tree = &cl->svals[tno << 3];
5417          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5418       } else {
5419          goto slowcase;
5420       }
5421       if (CHECK_ZSM)
5422          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5423    }
5424    svOld = cl->svals[cloff];
5425    svNew = msmcwrite( svOld, thr,a,4 );
5426    if (CHECK_ZSM)
5427       tl_assert(svNew != SVal_INVALID);
5428    cl->svals[cloff] = svNew;
5429    return;
5430   slowcase: /* misaligned, or must go further down the tree */
5431    stats__cline_32to16splits++;
5432    zsm_sapply16__msmcwrite( thr, a + 0 );
5433    zsm_sapply16__msmcwrite( thr, a + 2 );
5434 }
5435 
5436 /*------------- ZSM accesses: 64 bit sapply ------------- */
5437 
zsm_sapply64__msmcread(Thr * thr,Addr a)5438 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5439    CacheLine* cl;
5440    UWord      cloff, tno;
5441    //UWord      toff;
5442    SVal       svOld, svNew;
5443    UShort     descr;
5444    stats__cline_cread64s++;
5445    if (UNLIKELY(!aligned64(a))) goto slowcase;
5446    cl    = get_cacheline(a);
5447    cloff = get_cacheline_offset(a);
5448    tno   = get_treeno(a);
5449    //toff  = get_tree_offset(a); /* == 0, unused */
5450    descr = cl->descrs[tno];
5451    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5452       goto slowcase;
5453    }
5454    svOld = cl->svals[cloff];
5455    svNew = msmcread( svOld, thr,a,8 );
5456    if (CHECK_ZSM)
5457       tl_assert(svNew != SVal_INVALID);
5458    cl->svals[cloff] = svNew;
5459    return;
5460   slowcase: /* misaligned, or must go further down the tree */
5461    stats__cline_64to32splits++;
5462    zsm_sapply32__msmcread( thr, a + 0 );
5463    zsm_sapply32__msmcread( thr, a + 4 );
5464 }
5465 
zsm_sapply64__msmcwrite(Thr * thr,Addr a)5466 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5467    CacheLine* cl;
5468    UWord      cloff, tno;
5469    //UWord      toff;
5470    SVal       svOld, svNew;
5471    UShort     descr;
5472    stats__cline_cwrite64s++;
5473    if (UNLIKELY(!aligned64(a))) goto slowcase;
5474    cl    = get_cacheline(a);
5475    cloff = get_cacheline_offset(a);
5476    tno   = get_treeno(a);
5477    //toff  = get_tree_offset(a); /* == 0, unused */
5478    descr = cl->descrs[tno];
5479    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5480       goto slowcase;
5481    }
5482    svOld = cl->svals[cloff];
5483    svNew = msmcwrite( svOld, thr,a,8 );
5484    if (CHECK_ZSM)
5485       tl_assert(svNew != SVal_INVALID);
5486    cl->svals[cloff] = svNew;
5487    return;
5488   slowcase: /* misaligned, or must go further down the tree */
5489    stats__cline_64to32splits++;
5490    zsm_sapply32__msmcwrite( thr, a + 0 );
5491    zsm_sapply32__msmcwrite( thr, a + 4 );
5492 }
5493 
5494 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5495 
5496 static
zsm_swrite08(Addr a,SVal svNew)5497 void zsm_swrite08 ( Addr a, SVal svNew ) {
5498    CacheLine* cl;
5499    UWord      cloff, tno, toff;
5500    UShort     descr;
5501    stats__cline_swrite08s++;
5502    cl    = get_cacheline(a);
5503    cloff = get_cacheline_offset(a);
5504    tno   = get_treeno(a);
5505    toff  = get_tree_offset(a); /* == 0 .. 7 */
5506    descr = cl->descrs[tno];
5507    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5508       SVal* tree = &cl->svals[tno << 3];
5509       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5510       if (CHECK_ZSM)
5511          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5512    }
5513    tl_assert(svNew != SVal_INVALID);
5514    cl->svals[cloff] = svNew;
5515 }
5516 
5517 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5518 
5519 static
zsm_swrite16(Addr a,SVal svNew)5520 void zsm_swrite16 ( Addr a, SVal svNew ) {
5521    CacheLine* cl;
5522    UWord      cloff, tno, toff;
5523    UShort     descr;
5524    stats__cline_swrite16s++;
5525    if (UNLIKELY(!aligned16(a))) goto slowcase;
5526    cl    = get_cacheline(a);
5527    cloff = get_cacheline_offset(a);
5528    tno   = get_treeno(a);
5529    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5530    descr = cl->descrs[tno];
5531    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5532       if (valid_value_is_below_me_16(descr, toff)) {
5533          /* Writing at this level.  Need to fix up 'descr'. */
5534          cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5535          /* At this point, the tree does not match cl->descr[tno] any
5536             more.  The assignments below will fix it up. */
5537       } else {
5538          /* We can't indiscriminately write on the w16 node as in the
5539             w64 case, as that might make the node inconsistent with
5540             its parent.  So first, pull down to this level. */
5541          SVal* tree = &cl->svals[tno << 3];
5542          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5543       if (CHECK_ZSM)
5544          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5545       }
5546    }
5547    tl_assert(svNew != SVal_INVALID);
5548    cl->svals[cloff + 0] = svNew;
5549    cl->svals[cloff + 1] = SVal_INVALID;
5550    return;
5551   slowcase: /* misaligned */
5552    stats__cline_16to8splits++;
5553    zsm_swrite08( a + 0, svNew );
5554    zsm_swrite08( a + 1, svNew );
5555 }
5556 
5557 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5558 
5559 static
zsm_swrite32(Addr a,SVal svNew)5560 void zsm_swrite32 ( Addr a, SVal svNew ) {
5561    CacheLine* cl;
5562    UWord      cloff, tno, toff;
5563    UShort     descr;
5564    stats__cline_swrite32s++;
5565    if (UNLIKELY(!aligned32(a))) goto slowcase;
5566    cl    = get_cacheline(a);
5567    cloff = get_cacheline_offset(a);
5568    tno   = get_treeno(a);
5569    toff  = get_tree_offset(a); /* == 0 or 4 */
5570    descr = cl->descrs[tno];
5571    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5572       if (valid_value_is_above_me_32(descr, toff)) {
5573          /* We can't indiscriminately write on the w32 node as in the
5574             w64 case, as that might make the node inconsistent with
5575             its parent.  So first, pull down to this level. */
5576          SVal* tree = &cl->svals[tno << 3];
5577          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5578          if (CHECK_ZSM)
5579             tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5580       } else {
5581          /* Writing at this level.  Need to fix up 'descr'. */
5582          cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5583          /* At this point, the tree does not match cl->descr[tno] any
5584             more.  The assignments below will fix it up. */
5585       }
5586    }
5587    tl_assert(svNew != SVal_INVALID);
5588    cl->svals[cloff + 0] = svNew;
5589    cl->svals[cloff + 1] = SVal_INVALID;
5590    cl->svals[cloff + 2] = SVal_INVALID;
5591    cl->svals[cloff + 3] = SVal_INVALID;
5592    return;
5593   slowcase: /* misaligned */
5594    stats__cline_32to16splits++;
5595    zsm_swrite16( a + 0, svNew );
5596    zsm_swrite16( a + 2, svNew );
5597 }
5598 
5599 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5600 
5601 static
zsm_swrite64(Addr a,SVal svNew)5602 void zsm_swrite64 ( Addr a, SVal svNew ) {
5603    CacheLine* cl;
5604    UWord      cloff, tno;
5605    //UWord    toff;
5606    stats__cline_swrite64s++;
5607    if (UNLIKELY(!aligned64(a))) goto slowcase;
5608    cl    = get_cacheline(a);
5609    cloff = get_cacheline_offset(a);
5610    tno   = get_treeno(a);
5611    //toff  = get_tree_offset(a); /* == 0, unused */
5612    cl->descrs[tno] = TREE_DESCR_64;
5613    tl_assert(svNew != SVal_INVALID);
5614    cl->svals[cloff + 0] = svNew;
5615    cl->svals[cloff + 1] = SVal_INVALID;
5616    cl->svals[cloff + 2] = SVal_INVALID;
5617    cl->svals[cloff + 3] = SVal_INVALID;
5618    cl->svals[cloff + 4] = SVal_INVALID;
5619    cl->svals[cloff + 5] = SVal_INVALID;
5620    cl->svals[cloff + 6] = SVal_INVALID;
5621    cl->svals[cloff + 7] = SVal_INVALID;
5622    return;
5623   slowcase: /* misaligned */
5624    stats__cline_64to32splits++;
5625    zsm_swrite32( a + 0, svNew );
5626    zsm_swrite32( a + 4, svNew );
5627 }
5628 
5629 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5630 
5631 static
zsm_sread08(Addr a)5632 SVal zsm_sread08 ( Addr a ) {
5633    CacheLine* cl;
5634    UWord      cloff, tno, toff;
5635    UShort     descr;
5636    stats__cline_sread08s++;
5637    cl    = get_cacheline(a);
5638    cloff = get_cacheline_offset(a);
5639    tno   = get_treeno(a);
5640    toff  = get_tree_offset(a); /* == 0 .. 7 */
5641    descr = cl->descrs[tno];
5642    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5643       SVal* tree = &cl->svals[tno << 3];
5644       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5645    }
5646    return cl->svals[cloff];
5647 }
5648 
zsm_scopy08(Addr src,Addr dst,Bool uu_normalise)5649 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5650    SVal       sv;
5651    stats__cline_scopy08s++;
5652    sv = zsm_sread08( src );
5653    zsm_swrite08( dst, sv );
5654 }
5655 
5656 
5657 /* Block-copy states (needed for implementing realloc()).  Note this
5658    doesn't change the filtering arrangements.  The caller of
5659    zsm_scopy_range needs to attend to that. */
5660 
zsm_scopy_range(Addr src,Addr dst,SizeT len)5661 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
5662 {
5663    SizeT i;
5664    if (len == 0)
5665       return;
5666 
5667    /* assert for non-overlappingness */
5668    tl_assert(src+len <= dst || dst+len <= src);
5669 
5670    /* To be simple, just copy byte by byte.  But so as not to wreck
5671       performance for later accesses to dst[0 .. len-1], normalise
5672       destination lines as we finish with them, and also normalise the
5673       line containing the first and last address. */
5674    for (i = 0; i < len; i++) {
5675       Bool normalise
5676          = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5677            || i == 0       /* first in range */
5678            || i == len-1;  /* last in range */
5679       zsm_scopy08( src+i, dst+i, normalise );
5680    }
5681 }
5682 
5683 
5684 /* For setting address ranges to a given value.  Has considerable
5685    sophistication so as to avoid generating large numbers of pointless
5686    cache loads/writebacks for large ranges. */
5687 
5688 /* Do small ranges in-cache, in the obvious way. */
5689 static
zsm_sset_range_SMALL(Addr a,SizeT len,SVal svNew)5690 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
5691 {
5692    /* fast track a couple of common cases */
5693    if (len == 4 && aligned32(a)) {
5694       zsm_swrite32( a, svNew );
5695       return;
5696    }
5697    if (len == 8 && aligned64(a)) {
5698       zsm_swrite64( a, svNew );
5699       return;
5700    }
5701 
5702    /* be completely general (but as efficient as possible) */
5703    if (len == 0) return;
5704 
5705    if (!aligned16(a) && len >= 1) {
5706       zsm_swrite08( a, svNew );
5707       a += 1;
5708       len -= 1;
5709       tl_assert(aligned16(a));
5710    }
5711    if (len == 0) return;
5712 
5713    if (!aligned32(a) && len >= 2) {
5714       zsm_swrite16( a, svNew );
5715       a += 2;
5716       len -= 2;
5717       tl_assert(aligned32(a));
5718    }
5719    if (len == 0) return;
5720 
5721    if (!aligned64(a) && len >= 4) {
5722       zsm_swrite32( a, svNew );
5723       a += 4;
5724       len -= 4;
5725       tl_assert(aligned64(a));
5726    }
5727    if (len == 0) return;
5728 
5729    if (len >= 8) {
5730       tl_assert(aligned64(a));
5731       while (len >= 8) {
5732          zsm_swrite64( a, svNew );
5733          a += 8;
5734          len -= 8;
5735       }
5736       tl_assert(aligned64(a));
5737    }
5738    if (len == 0) return;
5739 
5740    if (len >= 4)
5741       tl_assert(aligned32(a));
5742    if (len >= 4) {
5743       zsm_swrite32( a, svNew );
5744       a += 4;
5745       len -= 4;
5746    }
5747    if (len == 0) return;
5748 
5749    if (len >= 2)
5750       tl_assert(aligned16(a));
5751    if (len >= 2) {
5752       zsm_swrite16( a, svNew );
5753       a += 2;
5754       len -= 2;
5755    }
5756    if (len == 0) return;
5757 
5758    if (len >= 1) {
5759       zsm_swrite08( a, svNew );
5760       //a += 1;
5761       len -= 1;
5762    }
5763    tl_assert(len == 0);
5764 }
5765 
5766 
5767 /* If we're doing a small range, hand off to zsm_sset_range_SMALL.  But
5768    for larger ranges, try to operate directly on the out-of-cache
5769    representation, rather than dragging lines into the cache,
5770    overwriting them, and forcing them out.  This turns out to be an
5771    important performance optimisation.
5772 
5773    Note that this doesn't change the filtering arrangements.  The
5774    caller of zsm_sset_range needs to attend to that. */
5775 
zsm_sset_range(Addr a,SizeT len,SVal svNew)5776 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
5777 {
5778    tl_assert(svNew != SVal_INVALID);
5779    stats__cache_make_New_arange += (ULong)len;
5780 
5781    if (0 && len > 500)
5782       VG_(printf)("make New      ( %#lx, %lu )\n", a, len );
5783 
5784    if (0) {
5785       static UWord n_New_in_cache = 0;
5786       static UWord n_New_not_in_cache = 0;
5787       /* tag is 'a' with the in-line offset masked out,
5788          eg a[31]..a[4] 0000 */
5789       Addr       tag = a & ~(N_LINE_ARANGE - 1);
5790       UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5791       if (LIKELY(tag == cache_shmem.tags0[wix])) {
5792          n_New_in_cache++;
5793       } else {
5794          n_New_not_in_cache++;
5795       }
5796       if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5797          VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5798                      n_New_in_cache, n_New_not_in_cache );
5799    }
5800 
5801    if (LIKELY(len < 2 * N_LINE_ARANGE)) {
5802       zsm_sset_range_SMALL( a, len, svNew );
5803    } else {
5804       Addr  before_start  = a;
5805       Addr  aligned_start = cacheline_ROUNDUP(a);
5806       Addr  after_start   = cacheline_ROUNDDN(a + len);
5807       UWord before_len    = aligned_start - before_start;
5808       UWord aligned_len   = after_start - aligned_start;
5809       UWord after_len     = a + len - after_start;
5810       tl_assert(before_start <= aligned_start);
5811       tl_assert(aligned_start <= after_start);
5812       tl_assert(before_len < N_LINE_ARANGE);
5813       tl_assert(after_len < N_LINE_ARANGE);
5814       tl_assert(get_cacheline_offset(aligned_start) == 0);
5815       if (get_cacheline_offset(a) == 0) {
5816          tl_assert(before_len == 0);
5817          tl_assert(a == aligned_start);
5818       }
5819       if (get_cacheline_offset(a+len) == 0) {
5820          tl_assert(after_len == 0);
5821          tl_assert(after_start == a+len);
5822       }
5823       if (before_len > 0) {
5824          zsm_sset_range_SMALL( before_start, before_len, svNew );
5825       }
5826       if (after_len > 0) {
5827          zsm_sset_range_SMALL( after_start, after_len, svNew );
5828       }
5829       stats__cache_make_New_inZrep += (ULong)aligned_len;
5830 
5831       while (1) {
5832          Addr tag;
5833          UWord wix;
5834          if (aligned_start >= after_start)
5835             break;
5836          tl_assert(get_cacheline_offset(aligned_start) == 0);
5837          tag = aligned_start & ~(N_LINE_ARANGE - 1);
5838          wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5839          if (tag == cache_shmem.tags0[wix]) {
5840             UWord i;
5841             for (i = 0; i < N_LINE_ARANGE / 8; i++)
5842                zsm_swrite64( aligned_start + i * 8, svNew );
5843          } else {
5844             UWord i;
5845             Word zix;
5846             SecMap* sm;
5847             LineZ* lineZ;
5848             /* This line is not in the cache.  Do not force it in; instead
5849                modify it in-place. */
5850             /* find the Z line to write in and rcdec it or the
5851                associated F line. */
5852             find_Z_for_writing( &sm, &zix, tag );
5853             tl_assert(sm);
5854             tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5855             lineZ = &sm->linesZ[zix];
5856             lineZ->dict[0] = svNew;
5857             lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5858             for (i = 0; i < N_LINE_ARANGE/4; i++)
5859                lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5860             rcinc_LineZ(lineZ);
5861          }
5862          aligned_start += N_LINE_ARANGE;
5863          aligned_len -= N_LINE_ARANGE;
5864       }
5865       tl_assert(aligned_start == after_start);
5866       tl_assert(aligned_len == 0);
5867    }
5868 }
5869 
5870 
5871 /////////////////////////////////////////////////////////
5872 //                                                     //
5873 // Front-filtering accesses                            //
5874 //                                                     //
5875 /////////////////////////////////////////////////////////
5876 
5877 static UWord stats__f_ac = 0;
5878 static UWord stats__f_sk = 0;
5879 
5880 #if 0
5881 #  define STATS__F_SHOW \
5882      do { \
5883         if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5884            VG_(printf)("filters: ac %lu sk %lu\n",   \
5885            stats__f_ac, stats__f_sk); \
5886      } while (0)
5887 #else
5888 #  define STATS__F_SHOW /* */
5889 #endif
5890 
zsm_sapply08_f__msmcwrite(Thr * thr,Addr a)5891 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5892    stats__f_ac++;
5893    STATS__F_SHOW;
5894    if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5895       stats__f_sk++;
5896       return;
5897    }
5898    zsm_sapply08__msmcwrite(thr, a);
5899 }
5900 
zsm_sapply16_f__msmcwrite(Thr * thr,Addr a)5901 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5902    stats__f_ac++;
5903    STATS__F_SHOW;
5904    if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5905       stats__f_sk++;
5906       return;
5907    }
5908    zsm_sapply16__msmcwrite(thr, a);
5909 }
5910 
zsm_sapply32_f__msmcwrite(Thr * thr,Addr a)5911 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5912    stats__f_ac++;
5913    STATS__F_SHOW;
5914    if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5915       stats__f_sk++;
5916       return;
5917    }
5918    zsm_sapply32__msmcwrite(thr, a);
5919 }
5920 
zsm_sapply64_f__msmcwrite(Thr * thr,Addr a)5921 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5922    stats__f_ac++;
5923    STATS__F_SHOW;
5924    if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5925       stats__f_sk++;
5926       return;
5927    }
5928    zsm_sapply64__msmcwrite(thr, a);
5929 }
5930 
zsm_sapplyNN_f__msmcwrite(Thr * thr,Addr a,SizeT len)5931 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5932 {
5933    /* fast track a couple of common cases */
5934    if (len == 4 && aligned32(a)) {
5935       zsm_sapply32_f__msmcwrite( thr, a );
5936       return;
5937    }
5938    if (len == 8 && aligned64(a)) {
5939       zsm_sapply64_f__msmcwrite( thr, a );
5940       return;
5941    }
5942 
5943    /* be completely general (but as efficient as possible) */
5944    if (len == 0) return;
5945 
5946    if (!aligned16(a) && len >= 1) {
5947       zsm_sapply08_f__msmcwrite( thr, a );
5948       a += 1;
5949       len -= 1;
5950       tl_assert(aligned16(a));
5951    }
5952    if (len == 0) return;
5953 
5954    if (!aligned32(a) && len >= 2) {
5955       zsm_sapply16_f__msmcwrite( thr, a );
5956       a += 2;
5957       len -= 2;
5958       tl_assert(aligned32(a));
5959    }
5960    if (len == 0) return;
5961 
5962    if (!aligned64(a) && len >= 4) {
5963       zsm_sapply32_f__msmcwrite( thr, a );
5964       a += 4;
5965       len -= 4;
5966       tl_assert(aligned64(a));
5967    }
5968    if (len == 0) return;
5969 
5970    if (len >= 8) {
5971       tl_assert(aligned64(a));
5972       while (len >= 8) {
5973          zsm_sapply64_f__msmcwrite( thr, a );
5974          a += 8;
5975          len -= 8;
5976       }
5977       tl_assert(aligned64(a));
5978    }
5979    if (len == 0) return;
5980 
5981    if (len >= 4)
5982       tl_assert(aligned32(a));
5983    if (len >= 4) {
5984       zsm_sapply32_f__msmcwrite( thr, a );
5985       a += 4;
5986       len -= 4;
5987    }
5988    if (len == 0) return;
5989 
5990    if (len >= 2)
5991       tl_assert(aligned16(a));
5992    if (len >= 2) {
5993       zsm_sapply16_f__msmcwrite( thr, a );
5994       a += 2;
5995       len -= 2;
5996    }
5997    if (len == 0) return;
5998 
5999    if (len >= 1) {
6000       zsm_sapply08_f__msmcwrite( thr, a );
6001       //a += 1;
6002       len -= 1;
6003    }
6004    tl_assert(len == 0);
6005 }
6006 
zsm_sapply08_f__msmcread(Thr * thr,Addr a)6007 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6008    stats__f_ac++;
6009    STATS__F_SHOW;
6010    if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6011       stats__f_sk++;
6012       return;
6013    }
6014    zsm_sapply08__msmcread(thr, a);
6015 }
6016 
zsm_sapply16_f__msmcread(Thr * thr,Addr a)6017 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6018    stats__f_ac++;
6019    STATS__F_SHOW;
6020    if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6021       stats__f_sk++;
6022       return;
6023    }
6024    zsm_sapply16__msmcread(thr, a);
6025 }
6026 
zsm_sapply32_f__msmcread(Thr * thr,Addr a)6027 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6028    stats__f_ac++;
6029    STATS__F_SHOW;
6030    if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6031       stats__f_sk++;
6032       return;
6033    }
6034    zsm_sapply32__msmcread(thr, a);
6035 }
6036 
zsm_sapply64_f__msmcread(Thr * thr,Addr a)6037 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6038    stats__f_ac++;
6039    STATS__F_SHOW;
6040    if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6041       stats__f_sk++;
6042       return;
6043    }
6044    zsm_sapply64__msmcread(thr, a);
6045 }
6046 
zsm_sapplyNN_f__msmcread(Thr * thr,Addr a,SizeT len)6047 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6048 {
6049    /* fast track a couple of common cases */
6050    if (len == 4 && aligned32(a)) {
6051       zsm_sapply32_f__msmcread( thr, a );
6052       return;
6053    }
6054    if (len == 8 && aligned64(a)) {
6055       zsm_sapply64_f__msmcread( thr, a );
6056       return;
6057    }
6058 
6059    /* be completely general (but as efficient as possible) */
6060    if (len == 0) return;
6061 
6062    if (!aligned16(a) && len >= 1) {
6063       zsm_sapply08_f__msmcread( thr, a );
6064       a += 1;
6065       len -= 1;
6066       tl_assert(aligned16(a));
6067    }
6068    if (len == 0) return;
6069 
6070    if (!aligned32(a) && len >= 2) {
6071       zsm_sapply16_f__msmcread( thr, a );
6072       a += 2;
6073       len -= 2;
6074       tl_assert(aligned32(a));
6075    }
6076    if (len == 0) return;
6077 
6078    if (!aligned64(a) && len >= 4) {
6079       zsm_sapply32_f__msmcread( thr, a );
6080       a += 4;
6081       len -= 4;
6082       tl_assert(aligned64(a));
6083    }
6084    if (len == 0) return;
6085 
6086    if (len >= 8) {
6087       tl_assert(aligned64(a));
6088       while (len >= 8) {
6089          zsm_sapply64_f__msmcread( thr, a );
6090          a += 8;
6091          len -= 8;
6092       }
6093       tl_assert(aligned64(a));
6094    }
6095    if (len == 0) return;
6096 
6097    if (len >= 4)
6098       tl_assert(aligned32(a));
6099    if (len >= 4) {
6100       zsm_sapply32_f__msmcread( thr, a );
6101       a += 4;
6102       len -= 4;
6103    }
6104    if (len == 0) return;
6105 
6106    if (len >= 2)
6107       tl_assert(aligned16(a));
6108    if (len >= 2) {
6109       zsm_sapply16_f__msmcread( thr, a );
6110       a += 2;
6111       len -= 2;
6112    }
6113    if (len == 0) return;
6114 
6115    if (len >= 1) {
6116       zsm_sapply08_f__msmcread( thr, a );
6117       //a += 1;
6118       len -= 1;
6119    }
6120    tl_assert(len == 0);
6121 }
6122 
libhb_Thr_resumes(Thr * thr)6123 void libhb_Thr_resumes ( Thr* thr )
6124 {
6125    if (0) VG_(printf)("resume %p\n", thr);
6126    tl_assert(thr);
6127    tl_assert(!thr->llexit_done);
6128    Filter__clear(thr->filter, "libhb_Thr_resumes");
6129    /* A kludge, but .. if this thread doesn't have any marker stacks
6130       at all, get one right now.  This is easier than figuring out
6131       exactly when at thread startup we can and can't take a stack
6132       snapshot. */
6133    if (HG_(clo_history_level) == 1) {
6134       tl_assert(thr->local_Kws_n_stacks);
6135       if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6136          note_local_Kw_n_stack_for(thr);
6137    }
6138 }
6139 
6140 
6141 /////////////////////////////////////////////////////////
6142 //                                                     //
6143 // Synchronisation objects                             //
6144 //                                                     //
6145 /////////////////////////////////////////////////////////
6146 
6147 /* A double linked list of all the SO's. */
6148 SO* admin_SO = NULL;
6149 
SO__Alloc(void)6150 static SO* SO__Alloc ( void )
6151 {
6152    SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6153    so->viR   = VtsID_INVALID;
6154    so->viW   = VtsID_INVALID;
6155    so->magic = SO_MAGIC;
6156    /* Add to double linked list */
6157    if (admin_SO) {
6158       tl_assert(admin_SO->admin_prev == NULL);
6159       admin_SO->admin_prev = so;
6160       so->admin_next = admin_SO;
6161    } else {
6162       so->admin_next = NULL;
6163    }
6164    so->admin_prev = NULL;
6165    admin_SO = so;
6166    /* */
6167    return so;
6168 }
6169 
SO__Dealloc(SO * so)6170 static void SO__Dealloc ( SO* so )
6171 {
6172    tl_assert(so);
6173    tl_assert(so->magic == SO_MAGIC);
6174    if (so->viR == VtsID_INVALID) {
6175       tl_assert(so->viW == VtsID_INVALID);
6176    } else {
6177       tl_assert(so->viW != VtsID_INVALID);
6178       VtsID__rcdec(so->viR);
6179       VtsID__rcdec(so->viW);
6180    }
6181    so->magic = 0;
6182    /* Del from double linked list */
6183    if (so->admin_prev)
6184       so->admin_prev->admin_next = so->admin_next;
6185    if (so->admin_next)
6186       so->admin_next->admin_prev = so->admin_prev;
6187    if (so == admin_SO)
6188       admin_SO = so->admin_next;
6189    /* */
6190    HG_(free)( so );
6191 }
6192 
6193 
6194 /////////////////////////////////////////////////////////
6195 //                                                     //
6196 // Top Level API                                       //
6197 //                                                     //
6198 /////////////////////////////////////////////////////////
6199 
show_thread_state(const HChar * str,Thr * t)6200 static void show_thread_state ( const HChar* str, Thr* t )
6201 {
6202    if (1) return;
6203    if (t->viR == t->viW) {
6204       VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6205       VtsID__pp( t->viR );
6206       VG_(printf)("%s","\n");
6207    } else {
6208       VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6209       VtsID__pp( t->viR );
6210       VG_(printf)(" viW %u==", t->viW);
6211       VtsID__pp( t->viW );
6212       VG_(printf)("%s","\n");
6213    }
6214 }
6215 
6216 
libhb_init(void (* get_stacktrace)(Thr *,Addr *,UWord),ExeContext * (* get_EC)(Thr *))6217 Thr* libhb_init (
6218         void        (*get_stacktrace)( Thr*, Addr*, UWord ),
6219         ExeContext* (*get_EC)( Thr* )
6220      )
6221 {
6222    Thr*  thr;
6223    VtsID vi;
6224 
6225    // We will have to have to store a large number of these,
6226    // so make sure they're the size we expect them to be.
6227    STATIC_ASSERT(sizeof(ScalarTS) == 8);
6228 
6229    /* because first 1024 unusable */
6230    STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6231    /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6232       Thr_n_RCEC and TSW). */
6233    STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6234 
6235    /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6236       (32-bit).  It's not correctness-critical, but there are a lot of
6237       them, so it's important from a space viewpoint.  Unfortunately
6238       we simply can't pack it into 2 words on a 32-bit target. */
6239    STATIC_ASSERT(   (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6240                  || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6241    STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6242 
6243    /* Word sets really are 32 bits.  Even on a 64 bit target. */
6244    STATIC_ASSERT(sizeof(WordSetID) == 4);
6245    STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6246 
6247    tl_assert(get_stacktrace);
6248    tl_assert(get_EC);
6249    main_get_stacktrace   = get_stacktrace;
6250    main_get_EC           = get_EC;
6251 
6252    // No need to initialise hg_wordfm.
6253    // No need to initialise hg_wordset.
6254 
6255    /* Allocated once and never deallocated.  Used as a temporary in
6256       VTS singleton, tick and join operations. */
6257    temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6258    temp_max_sized_VTS->id = VtsID_INVALID;
6259    verydead_thread_tables_init();
6260    vts_set_init();
6261    vts_tab_init();
6262    event_map_init();
6263    VtsID__invalidate_caches();
6264 
6265    // initialise shadow memory
6266    zsm_init( );
6267 
6268    thr = Thr__new();
6269    vi  = VtsID__mk_Singleton( thr, 1 );
6270    thr->viR = vi;
6271    thr->viW = vi;
6272    VtsID__rcinc(thr->viR);
6273    VtsID__rcinc(thr->viW);
6274 
6275    show_thread_state("  root", thr);
6276    return thr;
6277 }
6278 
6279 
libhb_create(Thr * parent)6280 Thr* libhb_create ( Thr* parent )
6281 {
6282    /* The child's VTSs are copies of the parent's VTSs, but ticked at
6283       the child's index.  Since the child's index is guaranteed
6284       unique, it has never been seen before, so the implicit value
6285       before the tick is zero and after that is one. */
6286    Thr* child = Thr__new();
6287 
6288    child->viR = VtsID__tick( parent->viR, child );
6289    child->viW = VtsID__tick( parent->viW, child );
6290    Filter__clear(child->filter, "libhb_create(child)");
6291    VtsID__rcinc(child->viR);
6292    VtsID__rcinc(child->viW);
6293    /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6294       early for that - it may not have a valid TId yet.  So, let
6295       libhb_Thr_resumes pick it up the first time the thread runs. */
6296 
6297    tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6298    tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6299 
6300    /* and the parent has to move along too */
6301    VtsID__rcdec(parent->viR);
6302    VtsID__rcdec(parent->viW);
6303    parent->viR = VtsID__tick( parent->viR, parent );
6304    parent->viW = VtsID__tick( parent->viW, parent );
6305    Filter__clear(parent->filter, "libhb_create(parent)");
6306    VtsID__rcinc(parent->viR);
6307    VtsID__rcinc(parent->viW);
6308    note_local_Kw_n_stack_for( parent );
6309 
6310    show_thread_state(" child", child);
6311    show_thread_state("parent", parent);
6312 
6313    return child;
6314 }
6315 
6316 /* Shut down the library, and print stats (in fact that's _all_
6317    this is for. */
libhb_shutdown(Bool show_stats)6318 void libhb_shutdown ( Bool show_stats )
6319 {
6320    if (show_stats) {
6321       VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6322       VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6323                   stats__secmaps_allocd,
6324                   stats__secmap_ga_space_covered);
6325       VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6326                   stats__secmap_linesZ_allocd,
6327                   stats__secmap_linesZ_bytes);
6328       VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)"
6329                   " (%'10lu used)\n",
6330                   VG_(sizePA) (LineF_pool_allocator),
6331                   VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
6332                   shmem__SecMap_used_linesF());
6333       VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6334                   " #%lu scanGC \n",
6335                   stats__secmaps_in_map_shmem,
6336                   shmem__SecMap_do_GC(False /* really do GC */),
6337                   stats__secmaps_scanGC);
6338       tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6339       VG_(printf)(" secmaps: %'10lu in freelist,"
6340                   " total (scanGCed %'lu, ssetGCed %'lu)\n",
6341                   SecMap_freelist_length(),
6342                   stats__secmaps_scanGCed,
6343                   stats__secmaps_ssetGCed);
6344       VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6345                   stats__secmaps_search, stats__secmaps_search_slow);
6346 
6347       VG_(printf)("%s","\n");
6348       VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
6349                   stats__cache_totrefs, stats__cache_totmisses );
6350       VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
6351                   stats__cache_Z_fetches, stats__cache_F_fetches );
6352       VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
6353                   stats__cache_Z_wbacks, stats__cache_F_wbacks );
6354       VG_(printf)("   cache: %'14lu flushes_invals\n",
6355                   stats__cache_flushes_invals );
6356       VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
6357                   stats__cache_make_New_arange,
6358                   stats__cache_make_New_inZrep);
6359 
6360       VG_(printf)("%s","\n");
6361       VG_(printf)("   cline: %'10lu normalises\n",
6362                   stats__cline_normalises );
6363       VG_(printf)("   cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6364                   stats__cline_cread64s,
6365                   stats__cline_cread32s,
6366                   stats__cline_cread16s,
6367                   stats__cline_cread08s );
6368       VG_(printf)("   cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6369                   stats__cline_cwrite64s,
6370                   stats__cline_cwrite32s,
6371                   stats__cline_cwrite16s,
6372                   stats__cline_cwrite08s );
6373       VG_(printf)("   cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6374                   stats__cline_swrite64s,
6375                   stats__cline_swrite32s,
6376                   stats__cline_swrite16s,
6377                   stats__cline_swrite08s );
6378       VG_(printf)("   cline: s rd1s %'lu, s copy1s %'lu\n",
6379                   stats__cline_sread08s, stats__cline_scopy08s );
6380       VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu"
6381                   "    2to1 %'12lu\n",
6382                   stats__cline_64to32splits, stats__cline_32to16splits,
6383                   stats__cline_16to8splits );
6384       VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu"
6385                   "    2to1 %'12lu\n",
6386                   stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6387                   stats__cline_16to8pulldown );
6388       if (0)
6389       VG_(printf)("   cline: sizeof(CacheLineZ) %ld,"
6390                   " covers %ld bytes of arange\n",
6391                   (Word)sizeof(LineZ),
6392                   (Word)N_LINE_ARANGE);
6393 
6394       VG_(printf)("%s","\n");
6395 
6396       VG_(printf)("   libhb: %'13llu msmcread  (%'llu dragovers)\n",
6397                   stats__msmcread, stats__msmcread_change);
6398       VG_(printf)("   libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6399                   stats__msmcwrite, stats__msmcwrite_change);
6400       VG_(printf)("   libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6401                   stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6402       VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
6403                   stats__join2_queries, stats__join2_misses);
6404 
6405       VG_(printf)("%s","\n");
6406       VG_(printf)("   libhb: VTSops: tick %'lu,  join %'lu,  cmpLEQ %'lu\n",
6407                   stats__vts__tick, stats__vts__join,  stats__vts__cmpLEQ );
6408       VG_(printf)("   libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6409                   stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6410       VG_(printf)("   libhb: VTSset: find__or__clone_and_add %'lu"
6411                   " (%'lu allocd)\n",
6412                    stats__vts_set__focaa, stats__vts_set__focaa_a );
6413       VG_(printf)( "   libhb: VTSops: indexAt_SLOW %'lu\n",
6414                    stats__vts__indexat_slow );
6415 
6416       VG_(printf)("%s","\n");
6417       VG_(printf)(
6418          "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6419          VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6420       );
6421       VG_(printf)("   libhb: #%lu vts_tab GC    #%lu vts pruning\n",
6422                   stats__vts_tab_GC, stats__vts_pruning);
6423       VG_(printf)( "   libhb: %lu entries in vts_set\n",
6424                    VG_(sizeFM)( vts_set ) );
6425 
6426       VG_(printf)("%s","\n");
6427       {
6428          UInt live = 0;
6429          UInt llexit_done = 0;
6430          UInt joinedwith_done = 0;
6431          UInt llexit_and_joinedwith_done = 0;
6432 
6433          Thread* hgthread = get_admin_threads();
6434          tl_assert(hgthread);
6435          while (hgthread) {
6436             Thr* hbthr = hgthread->hbthr;
6437             tl_assert(hbthr);
6438             if (hbthr->llexit_done && hbthr->joinedwith_done)
6439                llexit_and_joinedwith_done++;
6440             else if (hbthr->llexit_done)
6441                llexit_done++;
6442             else if (hbthr->joinedwith_done)
6443                joinedwith_done++;
6444             else
6445                live++;
6446             hgthread = hgthread->admin;
6447          }
6448          VG_(printf)("   libhb: threads live: %u exit_and_joinedwith %u"
6449                      " exit %u joinedwith %u\n",
6450                      live, llexit_and_joinedwith_done,
6451                      llexit_done, joinedwith_done);
6452          VG_(printf)("   libhb: %d verydead_threads, "
6453                      "%d verydead_threads_not_pruned\n",
6454                      (int) VG_(sizeXA)( verydead_thread_table),
6455                      (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6456          tl_assert (VG_(sizeXA)( verydead_thread_table)
6457                     + VG_(sizeXA)( verydead_thread_table_not_pruned)
6458                     == llexit_and_joinedwith_done);
6459       }
6460 
6461       VG_(printf)("%s","\n");
6462       VG_(printf)( "   libhb: oldrefHTN %lu (%'d bytes)\n",
6463                    oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6464       tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6465       VG_(printf)( "   libhb: oldref lookup found=%lu notfound=%lu\n",
6466                    stats__evm__lookup_found, stats__evm__lookup_notfound);
6467       if (VG_(clo_verbosity) > 1)
6468          VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6469       VG_(printf)( "   libhb: oldref bind tsw/rcec "
6470                    "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6471                    stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6472                    stats__ctxt_neq_tsw_neq_rcec);
6473       VG_(printf)( "   libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6474                    stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6475       VG_(printf)( "   libhb: contextTab: %lu slots,"
6476                    " %lu cur ents(ref'd %lu),"
6477                    " %lu max ents\n",
6478                    (UWord)N_RCEC_TAB,
6479                    stats__ctxt_tab_curr, RCEC_referenced,
6480                    stats__ctxt_tab_max );
6481       {
6482 #        define  MAXCHAIN 10
6483          UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6484          UInt non0chain = 0;
6485          UInt n;
6486          UInt i;
6487          RCEC *p;
6488 
6489          for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6490          for (i = 0; i < N_RCEC_TAB; i++) {
6491             n = 0;
6492             for (p = contextTab[i]; p; p = p->next)
6493                n++;
6494             if (n < MAXCHAIN)
6495                chains[n]++;
6496             else
6497                chains[MAXCHAIN]++;
6498             if (n > 0)
6499                non0chain++;
6500          }
6501          VG_(printf)( "   libhb: contextTab chain of [length]=nchain."
6502                       " Avg chain len %3.1f\n"
6503                       "        ",
6504                       (Double)stats__ctxt_tab_curr
6505                       / (Double)(non0chain ? non0chain : 1));
6506          for (i = 0; i <= MAXCHAIN; i++) {
6507             if (chains[i] != 0)
6508                 VG_(printf)( "[%u%s]=%u ",
6509                              i, i == MAXCHAIN ? "+" : "",
6510                              chains[i]);
6511          }
6512          VG_(printf)( "\n");
6513 #        undef MAXCHAIN
6514       }
6515       VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
6516                    stats__ctxt_tab_qs,
6517                    stats__ctxt_tab_cmps );
6518 #if 0
6519       VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
6520       VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
6521       VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
6522       VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
6523       VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
6524       VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
6525       VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
6526       VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
6527       VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
6528       VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6529       VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
6530       VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
6531       VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
6532       VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
6533 
6534       VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
6535       VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
6536       VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
6537       VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
6538 #endif
6539 
6540       VG_(printf)("%s","<<< END libhb stats >>>\n");
6541       VG_(printf)("%s","\n");
6542 
6543    }
6544 }
6545 
6546 /* Receive notification that a thread has low level exited.  The
6547    significance here is that we do not expect to see any more memory
6548    references from it. */
libhb_async_exit(Thr * thr)6549 void libhb_async_exit ( Thr* thr )
6550 {
6551    tl_assert(thr);
6552    tl_assert(!thr->llexit_done);
6553    thr->llexit_done = True;
6554 
6555    /* free up Filter and local_Kws_n_stacks (well, actually not the
6556       latter ..) */
6557    tl_assert(thr->filter);
6558    HG_(free)(thr->filter);
6559    thr->filter = NULL;
6560 
6561    /* Tell the VTS mechanism this thread has exited, so it can
6562       participate in VTS pruning.  Note this can only happen if the
6563       thread has both ll_exited and has been joined with. */
6564    if (thr->joinedwith_done)
6565       VTS__declare_thread_very_dead(thr);
6566 
6567    /* Another space-accuracy tradeoff.  Do we want to be able to show
6568       H1 history for conflicts in threads which have since exited?  If
6569       yes, then we better not free up thr->local_Kws_n_stacks.  The
6570       downside is a potential per-thread leak of up to
6571       N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6572       XArray average overcommit factor is (1.5 I'd guess). */
6573    // hence:
6574    // VG_(deleteXA)(thr->local_Kws_n_stacks);
6575    // thr->local_Kws_n_stacks = NULL;
6576 }
6577 
6578 /* Receive notification that a thread has been joined with.  The
6579    significance here is that we do not expect to see any further
6580    references to its vector clocks (Thr::viR and Thr::viW). */
libhb_joinedwith_done(Thr * thr)6581 void libhb_joinedwith_done ( Thr* thr )
6582 {
6583    tl_assert(thr);
6584    /* Caller must ensure that this is only ever called once per Thr. */
6585    tl_assert(!thr->joinedwith_done);
6586    thr->joinedwith_done = True;
6587    if (thr->llexit_done)
6588       VTS__declare_thread_very_dead(thr);
6589 }
6590 
6591 
6592 /* Both Segs and SOs point to VTSs.  However, there is no sharing, so
6593    a Seg that points at a VTS is its one-and-only owner, and ditto for
6594    a SO that points at a VTS. */
6595 
libhb_so_alloc(void)6596 SO* libhb_so_alloc ( void )
6597 {
6598    return SO__Alloc();
6599 }
6600 
libhb_so_dealloc(SO * so)6601 void libhb_so_dealloc ( SO* so )
6602 {
6603    tl_assert(so);
6604    tl_assert(so->magic == SO_MAGIC);
6605    SO__Dealloc(so);
6606 }
6607 
6608 /* See comments in libhb.h for details on the meaning of
6609    strong vs weak sends and strong vs weak receives. */
libhb_so_send(Thr * thr,SO * so,Bool strong_send)6610 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6611 {
6612    /* Copy the VTSs from 'thr' into the sync object, and then move
6613       the thread along one step. */
6614 
6615    tl_assert(so);
6616    tl_assert(so->magic == SO_MAGIC);
6617 
6618    /* stay sane .. a thread's read-clock must always lead or be the
6619       same as its write-clock */
6620    { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6621      tl_assert(leq);
6622    }
6623 
6624    /* since we're overwriting the VtsIDs in the SO, we need to drop
6625       any references made by the previous contents thereof */
6626    if (so->viR == VtsID_INVALID) {
6627       tl_assert(so->viW == VtsID_INVALID);
6628       so->viR = thr->viR;
6629       so->viW = thr->viW;
6630       VtsID__rcinc(so->viR);
6631       VtsID__rcinc(so->viW);
6632    } else {
6633       /* In a strong send, we dump any previous VC in the SO and
6634          install the sending thread's VC instead.  For a weak send we
6635          must join2 with what's already there. */
6636       tl_assert(so->viW != VtsID_INVALID);
6637       VtsID__rcdec(so->viR);
6638       VtsID__rcdec(so->viW);
6639       so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6640       so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6641       VtsID__rcinc(so->viR);
6642       VtsID__rcinc(so->viW);
6643    }
6644 
6645    /* move both parent clocks along */
6646    VtsID__rcdec(thr->viR);
6647    VtsID__rcdec(thr->viW);
6648    thr->viR = VtsID__tick( thr->viR, thr );
6649    thr->viW = VtsID__tick( thr->viW, thr );
6650    if (!thr->llexit_done) {
6651       Filter__clear(thr->filter, "libhb_so_send");
6652       note_local_Kw_n_stack_for(thr);
6653    }
6654    VtsID__rcinc(thr->viR);
6655    VtsID__rcinc(thr->viW);
6656 
6657    if (strong_send)
6658       show_thread_state("s-send", thr);
6659    else
6660       show_thread_state("w-send", thr);
6661 }
6662 
libhb_so_recv(Thr * thr,SO * so,Bool strong_recv)6663 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6664 {
6665    tl_assert(so);
6666    tl_assert(so->magic == SO_MAGIC);
6667 
6668    if (so->viR != VtsID_INVALID) {
6669       tl_assert(so->viW != VtsID_INVALID);
6670 
6671       /* Weak receive (basically, an R-acquisition of a R-W lock).
6672          This advances the read-clock of the receiver, but not the
6673          write-clock. */
6674       VtsID__rcdec(thr->viR);
6675       thr->viR = VtsID__join2( thr->viR, so->viR );
6676       VtsID__rcinc(thr->viR);
6677 
6678       /* At one point (r10589) it seemed safest to tick the clocks for
6679          the receiving thread after the join.  But on reflection, I
6680          wonder if that might cause it to 'overtake' constraints,
6681          which could lead to missing races.  So, back out that part of
6682          r10589. */
6683       //VtsID__rcdec(thr->viR);
6684       //thr->viR = VtsID__tick( thr->viR, thr );
6685       //VtsID__rcinc(thr->viR);
6686 
6687       /* For a strong receive, we also advance the receiver's write
6688          clock, which means the receive as a whole is essentially
6689          equivalent to a W-acquisition of a R-W lock. */
6690       if (strong_recv) {
6691          VtsID__rcdec(thr->viW);
6692          thr->viW = VtsID__join2( thr->viW, so->viW );
6693          VtsID__rcinc(thr->viW);
6694 
6695          /* See comment just above, re r10589. */
6696          //VtsID__rcdec(thr->viW);
6697          //thr->viW = VtsID__tick( thr->viW, thr );
6698          //VtsID__rcinc(thr->viW);
6699       }
6700 
6701       if (thr->filter)
6702          Filter__clear(thr->filter, "libhb_so_recv");
6703       note_local_Kw_n_stack_for(thr);
6704 
6705       if (strong_recv)
6706          show_thread_state("s-recv", thr);
6707       else
6708          show_thread_state("w-recv", thr);
6709 
6710    } else {
6711       tl_assert(so->viW == VtsID_INVALID);
6712       /* Deal with degenerate case: 'so' has no vts, so there has been
6713          no message posted to it.  Just ignore this case. */
6714       show_thread_state("d-recv", thr);
6715    }
6716 }
6717 
libhb_so_everSent(SO * so)6718 Bool libhb_so_everSent ( SO* so )
6719 {
6720    if (so->viR == VtsID_INVALID) {
6721       tl_assert(so->viW == VtsID_INVALID);
6722       return False;
6723    } else {
6724       tl_assert(so->viW != VtsID_INVALID);
6725       return True;
6726    }
6727 }
6728 
6729 #define XXX1 0 // 0x67a106c
6730 #define XXX2 0
6731 
TRACEME(Addr a,SizeT szB)6732 static inline Bool TRACEME(Addr a, SizeT szB) {
6733    if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6734    if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6735    return False;
6736 }
trace(Thr * thr,Addr a,SizeT szB,const HChar * s)6737 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
6738 {
6739   SVal sv = zsm_sread08(a);
6740   VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6741   show_thread_state("", thr);
6742   VG_(printf)("%s","\n");
6743 }
6744 
libhb_srange_new(Thr * thr,Addr a,SizeT szB)6745 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
6746 {
6747    SVal sv = SVal__mkC(thr->viW, thr->viW);
6748    tl_assert(is_sane_SVal_C(sv));
6749    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6750    zsm_sset_range( a, szB, sv );
6751    Filter__clear_range( thr->filter, a, szB );
6752    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
6753 }
6754 
libhb_srange_noaccess_NoFX(Thr * thr,Addr a,SizeT szB)6755 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
6756 {
6757    /* do nothing */
6758 }
6759 
6760 
6761 /* Set the lines zix_start till zix_end to NOACCESS. */
zsm_secmap_line_range_noaccess(SecMap * sm,UInt zix_start,UInt zix_end)6762 static void zsm_secmap_line_range_noaccess (SecMap *sm,
6763                                             UInt zix_start, UInt zix_end)
6764 {
6765    for (UInt lz = zix_start; lz <= zix_end; lz++) {
6766       LineZ* lineZ;
6767       lineZ = &sm->linesZ[lz];
6768       if (lineZ->dict[0] != SVal_INVALID) {
6769          rcdec_LineZ(lineZ);
6770          lineZ->dict[0] = SVal_NOACCESS;
6771          lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6772       } else {
6773          clear_LineF_of_Z(lineZ);
6774       }
6775       for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6776          lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6777    }
6778 }
6779 
6780 /* Set the given range to SVal_NOACCESS in-place in the secmap.
6781    a must be cacheline aligned. len must be a multiple of a cacheline
6782    and must be < N_SECMAP_ARANGE. */
zsm_sset_range_noaccess_in_secmap(Addr a,SizeT len)6783 static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6784 {
6785    tl_assert (is_valid_scache_tag (a));
6786    tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6787    tl_assert (len < N_SECMAP_ARANGE);
6788 
6789    SecMap *sm1 = shmem__find_SecMap (a);
6790    SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6791    UWord zix_start = shmem__get_SecMap_offset(a          ) >> N_LINE_BITS;
6792    UWord zix_end   = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6793 
6794    if (sm1) {
6795       if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6796       zsm_secmap_line_range_noaccess (sm1, zix_start,
6797                                       sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6798    }
6799    if (sm2 && sm1 != sm2) {
6800       if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6801       zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6802    }
6803 }
6804 
6805 /* Set the given address range to SVal_NOACCESS.
6806    The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
zsm_sset_range_noaccess(Addr addr,SizeT len)6807 static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6808 {
6809    /*
6810        BPC = Before, Partial Cacheline, = addr
6811              (i.e. starting inside a cacheline/inside a SecMap)
6812        BFC = Before, Full Cacheline(s), but not full SecMap
6813              (i.e. starting inside a SecMap)
6814        FSM = Full SecMap(s)
6815              (i.e. starting a SecMap)
6816        AFC = After, Full Cacheline(s), but not full SecMap
6817              (i.e. first address after the full SecMap(s))
6818        APC = After, Partial Cacheline, i.e. first address after the
6819              full CacheLines).
6820        ARE = After Range End = addr+len = first address not part of the range.
6821 
6822        If addr     starts a Cacheline, then BPC == BFC.
6823        If addr     starts a SecMap,    then BPC == BFC == FSM.
6824        If addr+len starts a SecMap,    then APC == ARE == AFC
6825        If addr+len starts a Cacheline, then APC == ARE
6826    */
6827    Addr ARE = addr + len;
6828    Addr BPC = addr;
6829    Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6830    Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6831    Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6832    Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6833    SizeT Plen = len; // Plen will be split between the following:
6834    SizeT BPClen;
6835    SizeT BFClen;
6836    SizeT FSMlen;
6837    SizeT AFClen;
6838    SizeT APClen;
6839 
6840    /* Consumes from Plen the nr of bytes between from and to.
6841       from and to must be aligned on a multiple of round.
6842       The length consumed will be a multiple of round, with
6843       a maximum of Plen. */
6844 #  define PlenCONSUME(from, to, round, consumed) \
6845    do {                                          \
6846    if (from < to) {                              \
6847       if (to - from < Plen)                      \
6848          consumed = to - from;                   \
6849       else                                       \
6850          consumed = ROUNDDN(Plen, round);        \
6851    } else {                                      \
6852       consumed = 0;                              \
6853    }                                             \
6854    Plen -= consumed; } while (0)
6855 
6856    PlenCONSUME(BPC, BFC, 1,               BPClen);
6857    PlenCONSUME(BFC, FSM, N_LINE_ARANGE,   BFClen);
6858    PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6859    PlenCONSUME(AFC, APC, N_LINE_ARANGE,   AFClen);
6860    PlenCONSUME(APC, ARE, 1,               APClen);
6861 
6862    if (0)
6863       VG_(printf) ("addr %p[%lu] ARE %p"
6864                    " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
6865                    " AFC %p[%lu] APC %p[%lu]\n",
6866                    (void*)addr, len, (void*)ARE,
6867                    (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6868                    (void*)AFC, AFClen, (void*)APC, APClen);
6869 
6870    tl_assert (Plen == 0);
6871 
6872    /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6873 
6874    /* First we set the partial cachelines. This is done through the cache. */
6875    if (BPClen > 0)
6876       zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6877    if (APClen > 0)
6878       zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6879 
6880    /* After this, we will not use the cache anymore. We will directly work
6881       in-place on the z shadow memory in SecMap(s).
6882       So, we invalidate the cachelines for the whole range we are setting
6883       to NOACCESS below. */
6884    shmem__invalidate_scache_range (BFC, APC - BFC);
6885 
6886    if (BFClen > 0)
6887       zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6888    if (AFClen > 0)
6889       zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6890 
6891    if (FSMlen > 0) {
6892       /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6893          free list. */
6894       Addr  sm_start = FSM;
6895       while (sm_start < AFC) {
6896          SecMap *sm = shmem__find_SecMap (sm_start);
6897          if (sm) {
6898             Addr gaKey;
6899             SecMap *fm_sm;
6900 
6901             if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6902             for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
6903                LineZ *lineZ = &sm->linesZ[lz];
6904                if (LIKELY(lineZ->dict[0] != SVal_INVALID))
6905                   rcdec_LineZ(lineZ);
6906                else
6907                   clear_LineF_of_Z(lineZ);
6908             }
6909             if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6910                tl_assert (0);
6911             stats__secmaps_in_map_shmem--;
6912             tl_assert (gaKey == sm_start);
6913             tl_assert (sm == fm_sm);
6914             stats__secmaps_ssetGCed++;
6915             push_SecMap_on_freelist (sm);
6916          }
6917          sm_start += N_SECMAP_ARANGE;
6918       }
6919       tl_assert (sm_start == AFC);
6920 
6921       /* The above loop might have kept copies of freed SecMap in the smCache.
6922          => clear them. */
6923       if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6924          smCache[0].gaKey = 1;
6925          smCache[0].sm = NULL;
6926       }
6927       if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6928          smCache[1].gaKey = 1;
6929          smCache[1].sm = NULL;
6930       }
6931       if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6932          smCache[2].gaKey = 1;
6933          smCache[2].sm = NULL;
6934       }
6935       STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6936    }
6937 }
6938 
libhb_srange_noaccess_AHAE(Thr * thr,Addr a,SizeT szB)6939 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6940 {
6941    /* This really does put the requested range in NoAccess.  It's
6942       expensive though. */
6943    SVal sv = SVal_NOACCESS;
6944    tl_assert(is_sane_SVal_C(sv));
6945    if (LIKELY(szB < 2 * N_LINE_ARANGE))
6946       zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6947    else
6948       zsm_sset_range_noaccess (a, szB);
6949    Filter__clear_range( thr->filter, a, szB );
6950 }
6951 
6952 /* Works byte at a time. Can be optimised if needed. */
libhb_srange_get_abits(Addr a,UChar * abits,SizeT len)6953 UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6954 {
6955    UWord anr = 0; // nr of bytes addressable.
6956 
6957    /* Get the accessibility of each byte. Pay attention to not
6958       create SecMap or LineZ when checking if a byte is addressable.
6959 
6960       Note: this is used for client request. Performance deemed not critical.
6961       So for simplicity, we work byte per byte.
6962       Performance could be improved  by working with full cachelines
6963       or with full SecMap, when reaching a cacheline or secmap boundary. */
6964    for (SizeT i = 0; i < len; i++) {
6965       SVal       sv = SVal_INVALID;
6966       Addr       b = a + i;
6967       Addr       tag = b & ~(N_LINE_ARANGE - 1);
6968       UWord      wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
6969       UWord      cloff = get_cacheline_offset(b);
6970 
6971       /* Note: we do not use get_cacheline(b) to avoid creating cachelines
6972          and/or SecMap for non addressable bytes. */
6973       if (tag == cache_shmem.tags0[wix]) {
6974          CacheLine copy = cache_shmem.lyns0[wix];
6975          /* We work on a copy of the cacheline, as we do not want to
6976             record the client request as a real read.
6977             The below is somewhat similar to zsm_sapply08__msmcread but
6978             avoids side effects on the cache. */
6979          UWord toff = get_tree_offset(b); /* == 0 .. 7 */
6980          UWord tno  = get_treeno(b);
6981          UShort descr = copy.descrs[tno];
6982          if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
6983             SVal* tree = &copy.svals[tno << 3];
6984             copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
6985          }
6986          sv = copy.svals[cloff];
6987       } else {
6988          /* Byte not found in the cacheline. Search for a SecMap. */
6989          SecMap *sm = shmem__find_SecMap(b);
6990          LineZ *lineZ;
6991          if (sm == NULL)
6992             sv = SVal_NOACCESS;
6993          else {
6994             UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
6995             lineZ = &sm->linesZ[zix];
6996             if (lineZ->dict[0] == SVal_INVALID) {
6997                LineF *lineF = SVal2Ptr(lineZ->dict[1]);
6998                sv = lineF->w64s[cloff];
6999             } else {
7000                UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7001                sv = lineZ->dict[ix];
7002             }
7003          }
7004       }
7005 
7006       tl_assert (sv != SVal_INVALID);
7007       if (sv == SVal_NOACCESS) {
7008          if (abits)
7009             abits[i] = 0x00;
7010       } else {
7011          if (abits)
7012             abits[i] = 0xff;
7013          anr++;
7014       }
7015    }
7016 
7017    return anr;
7018 }
7019 
7020 
libhb_srange_untrack(Thr * thr,Addr a,SizeT szB)7021 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7022 {
7023    SVal sv = SVal_NOACCESS;
7024    tl_assert(is_sane_SVal_C(sv));
7025    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7026    if (LIKELY(szB < 2 * N_LINE_ARANGE))
7027       zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7028    else
7029       zsm_sset_range_noaccess (a, szB);
7030    Filter__clear_range( thr->filter, a, szB );
7031    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7032 }
7033 
libhb_get_Thr_hgthread(Thr * thr)7034 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
7035    tl_assert(thr);
7036    return thr->hgthread;
7037 }
7038 
libhb_set_Thr_hgthread(Thr * thr,Thread * hgthread)7039 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7040    tl_assert(thr);
7041    thr->hgthread = hgthread;
7042 }
7043 
libhb_copy_shadow_state(Thr * thr,Addr src,Addr dst,SizeT len)7044 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7045 {
7046    zsm_scopy_range(src, dst, len);
7047    Filter__clear_range( thr->filter, dst, len );
7048 }
7049 
libhb_maybe_GC(void)7050 void libhb_maybe_GC ( void )
7051 {
7052    /* GC the unreferenced (zero rc) RCECs when
7053          (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7054              with mostly NULL ptr)
7055      and (2) approaching the max nr of RCEC (as we have in any case
7056              at least that amount of RCEC in the pool allocator)
7057              Note: the margin allows to avoid a small but constant increase
7058              of the max nr of RCEC due to the fact that libhb_maybe_GC is
7059              not called when the current nr of RCEC exactly reaches the max.
7060      and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7061      Avoid growing too much the nr of RCEC keeps the memory use low,
7062      and avoids to have too many elements in the (fixed) contextTab hashtable.
7063    */
7064    if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7065                 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7066                 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7067       do_RCEC_GC();
7068 
7069    /* If there are still no entries available (all the table entries are full),
7070       and we hit the threshold point, then do a GC */
7071    Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7072       && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7073    if (UNLIKELY (vts_tab_GC))
7074       vts_tab__do_GC( False/*don't show stats*/ );
7075 
7076    /* scan GC the SecMaps when
7077           (1) no SecMap in the freelist
7078       and (2) the current nr of live secmaps exceeds the threshold. */
7079    if (UNLIKELY(SecMap_freelist == NULL
7080                 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7081       // If we did a vts tab GC, then no need to flush the cache again.
7082       if (!vts_tab_GC)
7083          zsm_flush_cache();
7084       shmem__SecMap_do_GC(True);
7085    }
7086 
7087    /* Check the reference counts (expensive) */
7088    if (CHECK_CEM)
7089       event_map__check_reference_counts();
7090 }
7091 
7092 
7093 /////////////////////////////////////////////////////////////////
7094 /////////////////////////////////////////////////////////////////
7095 //                                                             //
7096 // SECTION END main library                                    //
7097 //                                                             //
7098 /////////////////////////////////////////////////////////////////
7099 /////////////////////////////////////////////////////////////////
7100 
7101 /*--------------------------------------------------------------------*/
7102 /*--- end                                             libhb_main.c ---*/
7103 /*--------------------------------------------------------------------*/
7104