1 
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache.               ---*/
4 /*---                                                 m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2015 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47 
48 
49 #define DEBUG_TRANSTAB 0
50 
51 
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55 
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59    Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static SECno n_sectors = 0;
61 
62 /* Average size of a transtab code entry. 0 means to use the tool
63    provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
65 
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of entries in hash table of each sector.  This needs to be a prime
68    number to work properly, it must be <= 65535 (so that a TTE index
69    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70    to denote 'deleted') and  0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71    hash table.
72    It is strongly recommended not to change this.
73    65521 is the largest prime <= 65535. */
74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
75 
76 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
77 #define HTT_DELETED     EC2TTE_DELETED
78 #define HTT_EMPTY       0XFFFE
79 
80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81 typedef UShort HTTno;
82 
83 /* Because each sector contains a hash table of TTEntries, we need to
84    specify the maximum allowable loading, after which the sector is
85    deemed full. */
86 #define SECTOR_TT_LIMIT_PERCENT 65
87 
88 /* The sector is deemed full when this many entries are in it. */
89 #define N_TTES_PER_SECTOR \
90            ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
91 
92 /* Equivalence classes for fast address range deletion.  There are 1 +
93    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
94    address range which does not fall cleanly within any specific bin.
95    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96    ECLASS_N must fit in a EclassNo. */
97 #define ECLASS_SHIFT 11
98 #define ECLASS_WIDTH 8
99 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
100 #define ECLASS_N     (1 + ECLASS_MISC)
101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
102 
103 typedef UShort EClassNo;
104 
105 /*------------------ TYPES ------------------*/
106 
107 /* In edges ("to-me") in the graph created by chaining. */
108 typedef
109    struct {
110       SECno from_sNo;   /* sector number */
111       TTEno from_tteNo; /* TTE number in given sector */
112       UInt  from_offs: (sizeof(UInt)*8)-1;  /* code offset from TCEntry::tcptr
113                                                where the patch is */
114       Bool  to_fastEP:1; /* Is the patch to a fast or slow entry point? */
115    }
116    InEdge;
117 
118 
119 /* Out edges ("from-me") in the graph created by chaining. */
120 typedef
121    struct {
122       SECno to_sNo;    /* sector number */
123       TTEno to_tteNo;  /* TTE number in given sector */
124       UInt  from_offs; /* code offset in owning translation where patch is */
125    }
126    OutEdge;
127 
128 
129 #define N_FIXED_IN_EDGE_ARR 3
130 typedef
131    struct {
132       Bool     has_var:1; /* True if var is used (then n_fixed must be 0) */
133       UInt     n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134       union {
135          InEdge   fixed[N_FIXED_IN_EDGE_ARR];      /* if !has_var */
136          XArray*  var; /* XArray* of InEdgeArr */  /* if  has_var */
137       } edges;
138    }
139    InEdgeArr;
140 
141 #define N_FIXED_OUT_EDGE_ARR 2
142 typedef
143    struct {
144       Bool    has_var:1; /* True if var is used (then n_fixed must be 0) */
145       UInt    n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146       union {
147          OutEdge fixed[N_FIXED_OUT_EDGE_ARR];      /* if !has_var */
148          XArray* var; /* XArray* of OutEdgeArr */  /* if  has_var */
149       } edges;
150    }
151    OutEdgeArr;
152 
153 
154 /* A translation-table entry.  This indicates precisely which areas of
155    guest code are included in the translation, and contains all other
156    auxiliary info too.  */
157 typedef
158    struct {
159       union {
160          struct {
161             /* Profiling only: the count and weight (arbitrary meaning) for
162                this translation.  Weight is a property of the translation
163                itself and computed once when the translation is created.
164                Count is an entry count for the translation and is
165                incremented by 1 every time the translation is used, if we
166                are profiling. */
167             ULong    count;
168             UShort   weight;
169          } prof; // if status == InUse
170          TTEno next_empty_tte; // if status != InUse
171       } usage;
172 
173       /* Status of the slot.  Note, we need to be able to do lazy
174          deletion, hence the Deleted state. */
175       enum { InUse, Deleted, Empty } status;
176 
177       /* 64-bit aligned pointer to one or more 64-bit words containing
178          the corresponding host code (must be in the same sector!)
179          This is a pointer into the sector's tc (code) area. */
180       ULong* tcptr;
181 
182       /* This is the original guest address that purportedly is the
183          entry point of the translation.  You might think that .entry
184          should be the same as .vge->base[0], and most of the time it
185          is.  However, when doing redirections, that is not the case.
186          .vge must always correctly describe the guest code sections
187          from which this translation was made.  However, .entry may or
188          may not be a lie, depending on whether or not we're doing
189          redirection. */
190       Addr entry;
191 
192       /* This structure describes precisely what ranges of guest code
193          the translation covers, so we can decide whether or not to
194          delete it when translations of a given address range are
195          invalidated. */
196       VexGuestExtents vge;
197 
198       /* Address range summary info: these are pointers back to
199          eclass[] entries in the containing Sector.  Those entries in
200          turn point back here -- the two structures are mutually
201          redundant but both necessary to make fast deletions work.
202          The eclass info is similar to, and derived from, this entry's
203          'vge' field, but it is not the same */
204       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
205       EClassNo tte2ec_ec[3];  // for each, the eclass #
206       UInt     tte2ec_ix[3];  // and the index within the eclass.
207       // for i in 0 .. n_tte2ec-1
208       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209       // should be the index
210       // of this TTEntry in the containing Sector's tt array.
211 
212       /* Admin information for chaining.  'in_edges' is a set of the
213          patch points which jump to this translation -- hence are
214          predecessors in the control flow graph.  'out_edges' points
215          to successors in the control flow graph -- translations to
216          which this one has a patched jump.  In short these are just
217          backwards and forwards edges in the graph of patched-together
218          blocks.  The 'in_edges' contain slightly more info, enough
219          that we can undo the chaining of each mentioned patch point.
220          The 'out_edges' list exists only so that we can visit the
221          'in_edges' entries of all blocks we're patched through to, in
222          order to remove ourselves from then when we're deleted. */
223 
224       /* A translation can disappear for two reasons:
225           1. erased (as part of the oldest sector cleanup) when the
226              youngest sector is full.
227           2. discarded due to calls to VG_(discard_translations).
228              VG_(discard_translations) sets the status of the
229              translation to 'Deleted'.
230              A.o., the gdbserver discards one or more translations
231              when a breakpoint is inserted or removed at an Addr,
232              or when single stepping mode is enabled/disabled
233              or when a translation is instrumented for gdbserver
234              (all the target jumps of this translation are
235               invalidated).
236 
237          So, it is possible that the translation A to be patched
238          (to obtain a patched jump from A to B) is invalidated
239          after B is translated and before A is patched.
240          In case a translation is erased or discarded, the patching
241          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242          are checking the 'from' translation still exists before
243          doing the patching.
244 
245          Is it safe to erase or discard the current translation E being
246          executed ? Amazing, but yes, it is safe.
247          Here is the explanation:
248 
249          The translation E being executed can only be erased if a new
250          translation N is being done. A new translation is done only
251          if the host addr is a not yet patched jump to another
252          translation. In such a case, the guest address of N is
253          assigned to the PC in the VEX state. Control is returned
254          to the scheduler. N will be translated. This can erase the
255          translation E (in case of sector full). VG_(tt_tc_do_chaining)
256          will not do the chaining to a non found translation E.
257          The execution will continue at the current guest PC
258          (i.e. the translation N).
259          => it is safe to erase the current translation being executed.
260 
261          The current translation E being executed can also be discarded
262          (e.g. by gdbserver). VG_(discard_translations) will mark
263          this translation E as Deleted, but the translation itself
264          is not erased. In particular, its host code can only
265          be overwritten or erased in case a new translation is done.
266          A new translation will only be done if a not yet translated
267          jump is to be executed. The execution of the Deleted translation
268          E will continue till a non patched jump is encountered.
269          This situation is then similar to the 'erasing' case above :
270          the current translation E can be erased or overwritten, as the
271          execution will continue at the new translation N.
272 
273       */
274 
275       /* It is possible, although very unlikely, that a block A has
276          more than one patched jump to block B.  This could happen if
277          (eg) A finishes "jcond B; jmp B".
278 
279          This means in turn that B's in_edges set can list A more than
280          once (twice in this example).  However, each such entry must
281          have a different from_offs, since a patched jump can only
282          jump to one place at once (it's meaningless for it to have
283          multiple destinations.)  IOW, the successor and predecessor
284          edges in the graph are not uniquely determined by a
285          TTEntry --> TTEntry pair, but rather by a
286          (TTEntry,offset) --> TTEntry triple.
287 
288          If A has multiple edges to B then B will mention A multiple
289          times in its in_edges.  To make things simpler, we then
290          require that A mentions B exactly the same number of times in
291          its out_edges.  Furthermore, a matching out-in pair must have
292          the same offset (from_offs).  This facilitates sanity
293          checking, and it facilitates establishing the invariant that
294          a out_edges set may not have duplicates when using the
295          equality defined by (TTEntry,offset).  Hence the out_edges
296          and in_edges sets really do have both have set semantics.
297 
298          eg if  A has been patched to B at offsets 42 and 87 (in A)
299          then   A.out_edges = { (B,42), (B,87) }   (in any order)
300          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
301 
302          Hence for each node pair P->Q in the graph, there's a 1:1
303          mapping between P.out_edges and Q.in_edges.
304       */
305       InEdgeArr  in_edges;
306       OutEdgeArr out_edges;
307    }
308    TTEntry;
309 
310 
311 /* A structure used for mapping host code addresses back to the
312    relevant TTEntry.  Used when doing chaining, for finding the
313    TTEntry to which some arbitrary patch address belongs. */
314 typedef
315    struct {
316       UChar* start;
317       UInt   len;
318       TTEno  tteNo;
319    }
320    HostExtent;
321 
322 /* Finally, a sector itself.  Each sector contains an array of
323    TCEntries, which hold code, and an array of TTEntries, containing
324    all required administrative info.  Profiling is supported using the
325    TTEntry usage.prof.count and usage.prof.weight fields, if required.
326 
327    If the sector is not in use, all three pointers are NULL and
328    tt_n_inuse is zero.
329 */
330 typedef
331    struct {
332       /* The TCEntry area.  Size of this depends on the average
333          translation size.  We try and size it so it becomes full
334          precisely when this sector's translation table (tt) reaches
335          its load limit (SECTOR_TT_LIMIT_PERCENT). */
336       ULong* tc;
337 
338       /* An hash table, mapping guest address to an index in the tt array.
339          htt is a fixed size, always containing
340          exactly N_HTTES_PER_SECTOR entries. */
341       TTEno* htt;
342 
343       /* The TTEntry array.  This is a fixed size, always containing
344          exactly N_TTES_PER_SECTOR entries. */
345       TTEntry* tt;
346 
347       /* This points to the current allocation point in tc. */
348       ULong* tc_next;
349 
350       /* The count of tt entries with state InUse. */
351       Int tt_n_inuse;
352 
353       /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
354       TTEno empty_tt_list;
355 
356       /* Expandable arrays of tt indices for each of the ECLASS_N
357          address range equivalence classes.  These hold indices into
358          the containing sector's tt array, which in turn should point
359          back here. */
360       Int     ec2tte_size[ECLASS_N];
361       Int     ec2tte_used[ECLASS_N];
362       TTEno*  ec2tte[ECLASS_N];
363 
364       /* The host extents.  The [start, +len) ranges are constructed
365          in strictly non-overlapping order, so we can binary search
366          them at any time. */
367       XArray* host_extents; /* XArray* of HostExtent */
368    }
369    Sector;
370 
371 
372 /*------------------ DECLS ------------------*/
373 
374 /* The root data structure is an array of sectors.  The index of the
375    youngest sector is recorded, and new translations are put into that
376    sector.  When it fills up, we move along to the next sector and
377    start to fill that up, wrapping around at the end of the array.
378    That way, once all N_TC_SECTORS have been bought into use for the
379    first time, and are full, we then re-use the oldest sector,
380    endlessly.
381 
382    When running, youngest sector should be between >= 0 and <
383    N_TC_SECTORS.  The initial  value indicates the TT/TC system is
384    not yet initialised.
385 */
386 static Sector sectors[MAX_N_SECTORS];
387 static Int    youngest_sector = INV_SNO;
388 
389 /* The number of ULongs in each TCEntry area.  This is computed once
390    at startup and does not change. */
391 static Int    tc_sector_szQ = 0;
392 
393 
394 /* A list of sector numbers, in the order which they should be
395    searched to find translations.  This is an optimisation to be used
396    when searching for translations and should not affect
397    correctness.  INV_SNO denotes "no entry". */
398 static SECno sector_search_order[MAX_N_SECTORS];
399 
400 
401 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
402    recently used (guest address, host address) pairs.  This array is
403    referred to directly from m_dispatch/dispatch-<platform>.S.
404 
405    Entries in tt_fast may refer to any valid TC entry, regardless of
406    which sector it's in.  Consequently we must be very careful to
407    invalidate this cache when TC entries are changed or disappear.
408 
409    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
410    pointed at to cause that cache entry to miss.  This relies on the
411    assumption that no guest code actually has that address, hence a
412    value 0x1 seems good.  m_translate gives the client a synthetic
413    segfault if it tries to execute at this address.
414 */
415 /*
416 typedef
417    struct {
418       Addr guest;
419       Addr host;
420    }
421    FastCacheEntry;
422 */
423 /*global*/ __attribute__((aligned(16)))
424            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
425 
426 /* Make sure we're not used before initialisation. */
427 static Bool init_done = False;
428 
429 
430 /*------------------ STATS DECLS ------------------*/
431 
432 /* Number of fast-cache updates and flushes done. */
433 static ULong n_fast_flushes = 0;
434 static ULong n_fast_updates = 0;
435 
436 /* Number of full lookups done. */
437 static ULong n_full_lookups = 0;
438 static ULong n_lookup_probes = 0;
439 
440 /* Number/osize/tsize of translations entered; also the number of
441    those for which self-checking was requested. */
442 static ULong n_in_count    = 0;
443 static ULong n_in_osize    = 0;
444 static ULong n_in_tsize    = 0;
445 static ULong n_in_sc_count = 0;
446 
447 /* Number/osize of translations discarded due to lack of space. */
448 static ULong n_dump_count = 0;
449 static ULong n_dump_osize = 0;
450 static ULong n_sectors_recycled = 0;
451 
452 /* Number/osize of translations discarded due to requests to do so. */
453 static ULong n_disc_count = 0;
454 static ULong n_disc_osize = 0;
455 
456 
457 /*-------------------------------------------------------------*/
458 /*--- Misc                                                  ---*/
459 /*-------------------------------------------------------------*/
460 
ttaux_malloc(const HChar * tag,SizeT n)461 static void* ttaux_malloc ( const HChar* tag, SizeT n )
462 {
463    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
464 }
465 
ttaux_free(void * p)466 static void ttaux_free ( void* p )
467 {
468    VG_(arena_free)(VG_AR_TTAUX, p);
469 }
470 
471 
472 /*-------------------------------------------------------------*/
473 /*--- Chaining support                                      ---*/
474 /*-------------------------------------------------------------*/
475 
index_tte(SECno sNo,TTEno tteNo)476 static inline TTEntry* index_tte ( SECno sNo, TTEno tteNo )
477 {
478    vg_assert(sNo < n_sectors);
479    vg_assert(tteNo < N_TTES_PER_SECTOR);
480    Sector* s = &sectors[sNo];
481    vg_assert(s->tt);
482    TTEntry* tte = &s->tt[tteNo];
483    vg_assert(tte->status == InUse);
484    return tte;
485 }
486 
InEdge__init(InEdge * ie)487 static void InEdge__init ( InEdge* ie )
488 {
489    ie->from_sNo   = INV_SNO; /* invalid */
490    ie->from_tteNo = 0;
491    ie->from_offs  = 0;
492    ie->to_fastEP  = False;
493 }
494 
OutEdge__init(OutEdge * oe)495 static void OutEdge__init ( OutEdge* oe )
496 {
497    oe->to_sNo    = INV_SNO; /* invalid */
498    oe->to_tteNo  = 0;
499    oe->from_offs = 0;
500 }
501 
TTEntry__init(TTEntry * tte)502 static void TTEntry__init ( TTEntry* tte )
503 {
504    VG_(memset)(tte, 0, sizeof(*tte));
505 }
506 
InEdgeArr__size(const InEdgeArr * iea)507 static UWord InEdgeArr__size ( const InEdgeArr* iea )
508 {
509    if (iea->has_var) {
510       vg_assert(iea->n_fixed == 0);
511       return VG_(sizeXA)(iea->edges.var);
512    } else {
513       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
514       return iea->n_fixed;
515    }
516 }
517 
InEdgeArr__makeEmpty(InEdgeArr * iea)518 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
519 {
520    if (iea->has_var) {
521       vg_assert(iea->n_fixed == 0);
522       VG_(deleteXA)(iea->edges.var);
523       iea->edges.var = NULL;
524       iea->has_var = False;
525    } else {
526       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
527       iea->n_fixed = 0;
528    }
529 }
530 
531 static
InEdgeArr__index(InEdgeArr * iea,UWord i)532 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
533 {
534    if (iea->has_var) {
535       vg_assert(iea->n_fixed == 0);
536       return (InEdge*)VG_(indexXA)(iea->edges.var, i);
537    } else {
538       vg_assert(i < iea->n_fixed);
539       return &iea->edges.fixed[i];
540    }
541 }
542 
543 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)544 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
545 {
546    if (iea->has_var) {
547       vg_assert(iea->n_fixed == 0);
548       VG_(removeIndexXA)(iea->edges.var, i);
549    } else {
550       vg_assert(i < iea->n_fixed);
551       for (; i+1 < iea->n_fixed; i++) {
552          iea->edges.fixed[i] = iea->edges.fixed[i+1];
553       }
554       iea->n_fixed--;
555    }
556 }
557 
558 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)559 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
560 {
561    if (iea->has_var) {
562       vg_assert(iea->n_fixed == 0);
563       VG_(addToXA)(iea->edges.var, ie);
564    } else {
565       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
566       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
567          /* The fixed array is full, so we have to initialise an
568             XArray and copy the fixed array into it. */
569          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
570                                   ttaux_free,
571                                   sizeof(InEdge));
572          VG_(hintSizeXA) (var, iea->n_fixed + 1);
573          UWord i;
574          for (i = 0; i < iea->n_fixed; i++) {
575             VG_(addToXA)(var, &iea->edges.fixed[i]);
576          }
577          VG_(addToXA)(var, ie);
578          iea->n_fixed = 0;
579          iea->has_var = True;
580          iea->edges.var = var;
581       } else {
582          /* Just add to the fixed array. */
583          iea->edges.fixed[iea->n_fixed++] = *ie;
584       }
585    }
586 }
587 
OutEdgeArr__size(const OutEdgeArr * oea)588 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
589 {
590    if (oea->has_var) {
591       vg_assert(oea->n_fixed == 0);
592       return VG_(sizeXA)(oea->edges.var);
593    } else {
594       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
595       return oea->n_fixed;
596    }
597 }
598 
OutEdgeArr__makeEmpty(OutEdgeArr * oea)599 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
600 {
601    if (oea->has_var) {
602       vg_assert(oea->n_fixed == 0);
603       VG_(deleteXA)(oea->edges.var);
604       oea->edges.var = NULL;
605       oea->has_var = False;
606    } else {
607       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
608       oea->n_fixed = 0;
609    }
610 }
611 
612 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)613 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
614 {
615    if (oea->has_var) {
616       vg_assert(oea->n_fixed == 0);
617       return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
618    } else {
619       vg_assert(i < oea->n_fixed);
620       return &oea->edges.fixed[i];
621    }
622 }
623 
624 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)625 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
626 {
627    if (oea->has_var) {
628       vg_assert(oea->n_fixed == 0);
629       VG_(removeIndexXA)(oea->edges.var, i);
630    } else {
631       vg_assert(i < oea->n_fixed);
632       for (; i+1 < oea->n_fixed; i++) {
633          oea->edges.fixed[i] = oea->edges.fixed[i+1];
634       }
635       oea->n_fixed--;
636    }
637 }
638 
639 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)640 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
641 {
642    if (oea->has_var) {
643       vg_assert(oea->n_fixed == 0);
644       VG_(addToXA)(oea->edges.var, oe);
645    } else {
646       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
647       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
648          /* The fixed array is full, so we have to initialise an
649             XArray and copy the fixed array into it. */
650          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
651                                   ttaux_free,
652                                   sizeof(OutEdge));
653          VG_(hintSizeXA) (var, oea->n_fixed+1);
654          UWord i;
655          for (i = 0; i < oea->n_fixed; i++) {
656             VG_(addToXA)(var, &oea->edges.fixed[i]);
657          }
658          VG_(addToXA)(var, oe);
659          oea->n_fixed = 0;
660          oea->has_var = True;
661          oea->edges.var = var;
662       } else {
663          /* Just add to the fixed array. */
664          oea->edges.fixed[oea->n_fixed++] = *oe;
665       }
666    }
667 }
668 
669 static
HostExtent__cmpOrd(const void * v1,const void * v2)670 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
671 {
672    const HostExtent* hx1 = v1;
673    const HostExtent* hx2 = v2;
674    if (hx1->start + hx1->len <= hx2->start) return -1;
675    if (hx2->start + hx2->len <= hx1->start) return 1;
676    return 0; /* partial overlap */
677 }
678 
679 /* True if hx is a dead host extent, i.e. corresponds to host code
680    of an entry that was invalidated. */
681 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)682 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
683 {
684    const TTEno tteNo = hx->tteNo;
685 #define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
686       VG_(printf) (m                                            \
687                    " start 0x%p len %u sector %d ttslot %u"     \
688                    " tt.entry 0x%lu tt.tcptr 0x%p\n",           \
689                    hx->start, hx->len, (int)(sec - sectors),    \
690                    hx->tteNo,                                   \
691                    sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
692 
693    /* Entry might have been invalidated and not re-used yet.*/
694    if (sec->tt[tteNo].status == Deleted) {
695       LDEBUG("found deleted entry");
696       return True;
697    }
698    /* Maybe we found this entry via a host_extents which was
699       inserted for an entry which was changed to Deleted then
700       re-used after. If this entry was re-used, then its tcptr
701       is >= to host_extents start (i.e. the previous tcptr) + len.
702       This is the case as there is no re-use of host code: a new
703       entry or re-used entry always gets "higher value" host code. */
704    if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
705       LDEBUG("found re-used entry");
706       return True;
707    }
708 
709    return False;
710 #undef LDEBUG
711 }
712 
713 static __attribute__((noinline))
find_TTEntry_from_hcode(SECno * from_sNo,TTEno * from_tteNo,void * hcode)714 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
715                               /*OUT*/TTEno* from_tteNo,
716                               void* hcode )
717 {
718    SECno i;
719 
720    /* Search order logic copied from VG_(search_transtab). */
721    for (i = 0; i < n_sectors; i++) {
722       SECno sno = sector_search_order[i];
723       if (UNLIKELY(sno == INV_SNO))
724          return False; /* run out of sectors to search */
725 
726       const Sector* sec = &sectors[sno];
727       const XArray* /* of HostExtent */ host_extents = sec->host_extents;
728       vg_assert(host_extents);
729 
730       HostExtent key;
731       VG_(memset)(&key, 0, sizeof(key));
732       key.start = hcode;
733       key.len = 1;
734       Word firstW = -1, lastW = -1;
735       Bool found  = VG_(lookupXA_UNSAFE)(
736                        host_extents, &key, &firstW, &lastW,
737                        HostExtent__cmpOrd );
738       vg_assert(firstW == lastW); // always true, even if not found
739       if (found) {
740          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
741          TTEno tteNo = hx->tteNo;
742          /* Do some additional sanity checks. */
743          vg_assert(tteNo < N_TTES_PER_SECTOR);
744 
745          /* if this hx entry corresponds to dead host code, we must
746             tell this code has not been found, as it cannot be patched. */
747          if (HostExtent__is_dead (hx, sec))
748             return False;
749 
750          vg_assert(sec->tt[tteNo].status == InUse);
751          /* Can only half check that the found TTEntry contains hcode,
752             due to not having a length value for the hcode in the
753             TTEntry. */
754          vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
755          /* Looks plausible */
756          *from_sNo   = sno;
757          *from_tteNo = tteNo;
758          return True;
759       }
760    }
761    return False;
762 }
763 
764 
765 /* Figure out whether or not hcode is jitted code present in the main
766    code cache (but not in the no-redir cache).  Used for sanity
767    checking. */
is_in_the_main_TC(const void * hcode)768 static Bool is_in_the_main_TC ( const void* hcode )
769 {
770    SECno i, sno;
771    for (i = 0; i < n_sectors; i++) {
772       sno = sector_search_order[i];
773       if (sno == INV_SNO)
774          break; /* run out of sectors to search */
775       if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
776           && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
777                               + sizeof(ULong) - 1)
778          return True;
779    }
780    return False;
781 }
782 
783 
784 /* Fulfill a chaining request, and record admin info so we
785    can undo it later, if required.
786 */
VG_(tt_tc_do_chaining)787 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
788                               SECno to_sNo,
789                               TTEno to_tteNo,
790                               Bool  to_fastEP )
791 {
792    /* Get the CPU info established at startup. */
793    VexArch     arch_host = VexArch_INVALID;
794    VexArchInfo archinfo_host;
795    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
796    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
797    VexEndness endness_host = archinfo_host.endness;
798 
799    // host_code is where we're patching to.  So it needs to
800    // take into account, whether we're jumping to the slow
801    // or fast entry point.  By definition, the fast entry point
802    // is exactly one event check's worth of code along from
803    // the slow (tcptr) entry point.
804    TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
805    void*    host_code = ((UChar*)to_tte->tcptr)
806                         + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
807 
808    // stay sane -- the patch point (dst) is in this sector's code cache
809    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
810    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
811                                    + sizeof(ULong) - 1 );
812 
813    /* Find the TTEntry for the from__ code.  This isn't simple since
814       we only know the patch address, which is going to be somewhere
815       inside the from_ block. */
816    SECno from_sNo   = INV_SNO;
817    TTEno from_tteNo = INV_TTE;
818    Bool from_found
819       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
820                                  from__patch_addr );
821    if (!from_found) {
822       // The from code might have been discarded due to sector re-use
823       // or marked Deleted due to translation invalidation.
824       // In such a case, don't do the chaining.
825       VG_(debugLog)(1,"transtab",
826                     "host code %p not found (discarded? sector recycled?)"
827                     " => no chaining done\n",
828                     from__patch_addr);
829       return;
830    }
831 
832    TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
833 
834    /* Get VEX to do the patching itself.  We have to hand it off
835       since it is host-dependent. */
836    VexInvalRange vir
837       = LibVEX_Chain(
838            arch_host, endness_host,
839            from__patch_addr,
840            VG_(fnptr_to_fnentry)(
841               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
842                         : &VG_(disp_cp_chain_me_to_slowEP)),
843            (void*)host_code
844         );
845    VG_(invalidate_icache)( (void*)vir.start, vir.len );
846 
847    /* Now do the tricky bit -- update the ch_succs and ch_preds info
848       for the two translations involved, so we can undo the chaining
849       later, which we will have to do if the to_ block gets removed
850       for whatever reason. */
851 
852    /* This is the new from_ -> to_ link to add. */
853    InEdge ie;
854    InEdge__init(&ie);
855    ie.from_sNo   = from_sNo;
856    ie.from_tteNo = from_tteNo;
857    ie.to_fastEP  = to_fastEP;
858    HWord from_offs = (HWord)( (UChar*)from__patch_addr
859                               - (UChar*)from_tte->tcptr );
860    vg_assert(from_offs < 100000/* let's say */);
861    ie.from_offs  = (UInt)from_offs;
862 
863    /* This is the new to_ -> from_ backlink to add. */
864    OutEdge oe;
865    OutEdge__init(&oe);
866    oe.to_sNo    = to_sNo;
867    oe.to_tteNo  = to_tteNo;
868    oe.from_offs = (UInt)from_offs;
869 
870    /* Add .. */
871    InEdgeArr__add(&to_tte->in_edges, &ie);
872    OutEdgeArr__add(&from_tte->out_edges, &oe);
873 }
874 
875 
876 /* Unchain one patch, as described by the specified InEdge.  For
877    sanity check purposes only (to check that the patched location is
878    as expected) it also requires the fast and slow entry point
879    addresses of the destination block (that is, the block that owns
880    this InEdge). */
881 __attribute__((noinline))
unchain_one(VexArch arch_host,VexEndness endness_host,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)882 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
883                           InEdge* ie,
884                           void* to_fastEPaddr, void* to_slowEPaddr )
885 {
886    vg_assert(ie);
887    TTEntry* tte
888       = index_tte(ie->from_sNo, ie->from_tteNo);
889    UChar* place_to_patch
890       = ((UChar*)tte->tcptr) + ie->from_offs;
891    UChar* disp_cp_chain_me
892       = VG_(fnptr_to_fnentry)(
893            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
894                          : &VG_(disp_cp_chain_me_to_slowEP)
895         );
896    UChar* place_to_jump_to_EXPECTED
897       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
898 
899    // stay sane: both src and dst for this unchaining are
900    // in the main code cache
901    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
902    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
903    // dst check is ok because LibVEX_UnChain checks that
904    // place_to_jump_to_EXPECTED really is the current dst, and
905    // asserts if it isn't.
906    VexInvalRange vir
907        = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
908                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
909    VG_(invalidate_icache)( (void*)vir.start, vir.len );
910 }
911 
912 
913 /* The specified block is about to be deleted.  Update the preds and
914    succs of its associated blocks accordingly.  This includes undoing
915    any chained jumps to this block. */
916 static
unchain_in_preparation_for_deletion(VexArch arch_host,VexEndness endness_host,SECno here_sNo,TTEno here_tteNo)917 void unchain_in_preparation_for_deletion ( VexArch arch_host,
918                                            VexEndness endness_host,
919                                            SECno here_sNo, TTEno here_tteNo )
920 {
921    if (DEBUG_TRANSTAB)
922       VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
923    UWord    i, j, n, m;
924    Int      evCheckSzB = LibVEX_evCheckSzB(arch_host);
925    TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
926    if (DEBUG_TRANSTAB)
927       VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
928                   here_tte->entry, here_tte->tcptr);
929    vg_assert(here_tte->status == InUse);
930 
931    /* Visit all InEdges owned by here_tte. */
932    n = InEdgeArr__size(&here_tte->in_edges);
933    for (i = 0; i < n; i++) {
934       InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
935       // Undo the chaining.
936       UChar* here_slow_EP = (UChar*)here_tte->tcptr;
937       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
938       unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
939       // Find the corresponding entry in the "from" node's out_edges,
940       // and remove it.
941       TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
942       m = OutEdgeArr__size(&from_tte->out_edges);
943       vg_assert(m > 0); // it must have at least one entry
944       for (j = 0; j < m; j++) {
945          OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
946          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
947              && oe->from_offs == ie->from_offs)
948            break;
949       }
950       vg_assert(j < m); // "oe must be findable"
951       OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
952    }
953 
954    /* Visit all OutEdges owned by here_tte. */
955    n = OutEdgeArr__size(&here_tte->out_edges);
956    for (i = 0; i < n; i++) {
957       OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
958       // Find the corresponding entry in the "to" node's in_edges,
959       // and remove it.
960       TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
961       m = InEdgeArr__size(&to_tte->in_edges);
962       vg_assert(m > 0); // it must have at least one entry
963       for (j = 0; j < m; j++) {
964          InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
965          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
966              && ie->from_offs == oe->from_offs)
967            break;
968       }
969       vg_assert(j < m); // "ie must be findable"
970       InEdgeArr__deleteIndex(&to_tte->in_edges, j);
971    }
972 
973    InEdgeArr__makeEmpty(&here_tte->in_edges);
974    OutEdgeArr__makeEmpty(&here_tte->out_edges);
975 }
976 
977 
978 /*-------------------------------------------------------------*/
979 /*--- Address-range equivalence class stuff                 ---*/
980 /*-------------------------------------------------------------*/
981 
982 /* Return equivalence class number for a range. */
983 
range_to_eclass(Addr start,UInt len)984 static EClassNo range_to_eclass ( Addr start, UInt len )
985 {
986    UInt mask   = (1 << ECLASS_WIDTH) - 1;
987    UInt lo     = (UInt)start;
988    UInt hi     = lo + len - 1;
989    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
990    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
991    if (loBits == hiBits) {
992       vg_assert(loBits < ECLASS_N-1);
993       return loBits;
994    } else {
995       return ECLASS_MISC;
996    }
997 }
998 
999 
1000 /* Calculates the equivalence class numbers for any VexGuestExtent.
1001    These are written in *eclasses, which must be big enough to hold 3
1002    Ints.  The number written, between 1 and 3, is returned.  The
1003    eclasses are presented in order, and any duplicates are removed.
1004 */
1005 
1006 static
vexGuestExtents_to_eclasses(EClassNo * eclasses,const VexGuestExtents * vge)1007 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1008                                   const VexGuestExtents* vge )
1009 {
1010 
1011 #  define SWAP(_lv1,_lv2) \
1012       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1013 
1014    Int i, j, n_ec;
1015    EClassNo r;
1016 
1017    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1018 
1019    n_ec = 0;
1020    for (i = 0; i < vge->n_used; i++) {
1021       r = range_to_eclass( vge->base[i], vge->len[i] );
1022       if (r == ECLASS_MISC)
1023          goto bad;
1024       /* only add if we haven't already seen it */
1025       for (j = 0; j < n_ec; j++)
1026          if (eclasses[j] == r)
1027             break;
1028       if (j == n_ec)
1029          eclasses[n_ec++] = r;
1030    }
1031 
1032    if (n_ec == 1)
1033       return 1;
1034 
1035    if (n_ec == 2) {
1036       /* sort */
1037       if (eclasses[0] > eclasses[1])
1038          SWAP(eclasses[0], eclasses[1]);
1039       return 2;
1040    }
1041 
1042    if (n_ec == 3) {
1043       /* sort */
1044       if (eclasses[0] > eclasses[2])
1045          SWAP(eclasses[0], eclasses[2]);
1046       if (eclasses[0] > eclasses[1])
1047          SWAP(eclasses[0], eclasses[1]);
1048       if (eclasses[1] > eclasses[2])
1049          SWAP(eclasses[1], eclasses[2]);
1050       return 3;
1051    }
1052 
1053    /* NOTREACHED */
1054    vg_assert(0);
1055 
1056   bad:
1057    eclasses[0] = ECLASS_MISC;
1058    return 1;
1059 
1060 #  undef SWAP
1061 }
1062 
1063 
1064 /* Add tteno to the set of entries listed for equivalence class ec in
1065    this sector.  Returns used location in eclass array. */
1066 
1067 static
addEClassNo(Sector * sec,EClassNo ec,TTEno tteno)1068 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1069 {
1070    Int    old_sz, new_sz, i, r;
1071    TTEno  *old_ar, *new_ar;
1072 
1073    vg_assert(ec >= 0 && ec < ECLASS_N);
1074    vg_assert(tteno < N_TTES_PER_SECTOR);
1075 
1076    if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1077 
1078    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1079 
1080       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1081 
1082       old_sz = sec->ec2tte_size[ec];
1083       old_ar = sec->ec2tte[ec];
1084       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1085       new_ar = ttaux_malloc("transtab.aECN.1",
1086                             new_sz * sizeof(TTEno));
1087       for (i = 0; i < old_sz; i++)
1088          new_ar[i] = old_ar[i];
1089       if (old_ar)
1090          ttaux_free(old_ar);
1091       sec->ec2tte_size[ec] = new_sz;
1092       sec->ec2tte[ec] = new_ar;
1093 
1094       if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1095    }
1096 
1097    /* Common case */
1098    r = sec->ec2tte_used[ec]++;
1099    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1100    sec->ec2tte[ec][r] = tteno;
1101    return (UInt)r;
1102 }
1103 
1104 
1105 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1106    eclass entries to 'sec'. */
1107 
1108 static
upd_eclasses_after_add(Sector * sec,TTEno tteno)1109 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1110 {
1111    Int i, r;
1112    EClassNo eclasses[3];
1113    TTEntry* tte;
1114    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1115 
1116    tte = &sec->tt[tteno];
1117    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1118 
1119    vg_assert(r >= 1 && r <= 3);
1120    tte->n_tte2ec = r;
1121 
1122    for (i = 0; i < r; i++) {
1123       tte->tte2ec_ec[i] = eclasses[i];
1124       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1125    }
1126 }
1127 
1128 
1129 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1130    True if OK, False if something's not right.  Expensive. */
1131 
sanity_check_eclasses_in_sector(const Sector * sec)1132 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1133 {
1134 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1135 
1136    const HChar* whassup = NULL;
1137    Int      j, k, n, ec_idx;
1138    EClassNo i;
1139    EClassNo ec_num;
1140    TTEntry* tte;
1141    TTEno    tteno;
1142    ULong*   tce;
1143 
1144    /* Basic checks on this sector */
1145    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1146       BAD("invalid sec->tt_n_inuse");
1147    tce = sec->tc_next;
1148    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1149       BAD("sec->tc_next points outside tc");
1150 
1151    /* For each eclass ... */
1152    for (i = 0; i < ECLASS_N; i++) {
1153       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1154          BAD("ec2tte_size/ec2tte mismatch(1)");
1155       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1156          BAD("ec2tte_size/ec2tte mismatch(2)");
1157       if (sec->ec2tte_used[i] < 0
1158           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1159          BAD("implausible ec2tte_used");
1160       if (sec->ec2tte_used[i] == 0)
1161          continue;
1162 
1163       /* For each tt reference in each eclass .. ensure the reference
1164          is to a valid tt entry, and that the entry's address ranges
1165          really include this eclass. */
1166 
1167       for (j = 0; j < sec->ec2tte_used[i]; j++) {
1168          tteno = sec->ec2tte[i][j];
1169          if (tteno == EC2TTE_DELETED)
1170             continue;
1171          if (tteno >= N_TTES_PER_SECTOR)
1172             BAD("implausible tteno");
1173          tte = &sec->tt[tteno];
1174          if (tte->status != InUse)
1175             BAD("tteno points to non-inuse tte");
1176          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1177             BAD("tte->n_tte2ec out of range");
1178          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1179             must equal i.  Inspect tte's eclass info. */
1180          n = 0;
1181          for (k = 0; k < tte->n_tte2ec; k++) {
1182             if (k < tte->n_tte2ec-1
1183                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1184                BAD("tte->tte2ec_ec[..] out of order");
1185             ec_num = tte->tte2ec_ec[k];
1186             if (ec_num < 0 || ec_num >= ECLASS_N)
1187                BAD("tte->tte2ec_ec[..] out of range");
1188             if (ec_num != i)
1189                continue;
1190             ec_idx = tte->tte2ec_ix[k];
1191             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1192                BAD("tte->tte2ec_ix[..] out of range");
1193             if (ec_idx == j)
1194                n++;
1195          }
1196          if (n != 1)
1197             BAD("tteno does not point back at eclass");
1198       }
1199    }
1200 
1201    /* That establishes that for each forward pointer from TTEntrys
1202       there is a corresponding backward pointer from the eclass[]
1203       arrays.  However, it doesn't rule out the possibility of other,
1204       bogus pointers in the eclass[] arrays.  So do those similarly:
1205       scan through them and check the TTEntryies they point at point
1206       back. */
1207 
1208    for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1209 
1210       tte = &sec->tt[tteno];
1211       if (tte->status == Empty || tte->status == Deleted) {
1212          if (tte->n_tte2ec != 0)
1213             BAD("tte->n_eclasses nonzero for unused tte");
1214          continue;
1215       }
1216 
1217       vg_assert(tte->status == InUse);
1218 
1219       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1220          BAD("tte->n_eclasses out of range(2)");
1221 
1222       for (j = 0; j < tte->n_tte2ec; j++) {
1223          ec_num = tte->tte2ec_ec[j];
1224          if (ec_num < 0 || ec_num >= ECLASS_N)
1225             BAD("tte->eclass[..] out of range");
1226          ec_idx = tte->tte2ec_ix[j];
1227          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1228             BAD("tte->ec_idx[..] out of range(2)");
1229          if (sec->ec2tte[ec_num][ec_idx] != tteno)
1230             BAD("ec2tte does not point back to tte");
1231       }
1232    }
1233 
1234    return True;
1235 
1236   bad:
1237    if (whassup)
1238       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1239 
1240 #  if 0
1241    VG_(printf)("eclass = %d\n", i);
1242    VG_(printf)("tteno = %d\n", (Int)tteno);
1243    switch (tte->status) {
1244       case InUse:   VG_(printf)("InUse\n"); break;
1245       case Deleted: VG_(printf)("Deleted\n"); break;
1246       case Empty:   VG_(printf)("Empty\n"); break;
1247    }
1248    if (tte->status != Empty) {
1249       for (k = 0; k < tte->vge.n_used; k++)
1250          VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1251    }
1252 #  endif
1253 
1254    return False;
1255 
1256 #  undef BAD
1257 }
1258 
1259 
1260 /* Sanity check absolutely everything.  True == check passed. */
1261 
1262 /* forwards */
1263 static Bool sanity_check_redir_tt_tc ( void );
1264 
sanity_check_sector_search_order(void)1265 static Bool sanity_check_sector_search_order ( void )
1266 {
1267    SECno i, j, nListed;
1268    /* assert the array is the right size */
1269    vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1270                                / sizeof(sector_search_order[0])));
1271    /* Check it's of the form  valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1272    for (i = 0; i < n_sectors; i++) {
1273       if (sector_search_order[i] == INV_SNO
1274           || sector_search_order[i] >= n_sectors)
1275          break;
1276    }
1277    nListed = i;
1278    for (/* */; i < n_sectors; i++) {
1279       if (sector_search_order[i] != INV_SNO)
1280          break;
1281    }
1282    if (i != n_sectors)
1283       return False;
1284    /* Check each sector number only appears once */
1285    for (i = 0; i < n_sectors; i++) {
1286       if (sector_search_order[i] == INV_SNO)
1287          continue;
1288       for (j = i+1; j < n_sectors; j++) {
1289          if (sector_search_order[j] == sector_search_order[i])
1290             return False;
1291       }
1292    }
1293    /* Check that the number of listed sectors equals the number
1294       in use, by counting nListed back down. */
1295    for (i = 0; i < n_sectors; i++) {
1296       if (sectors[i].tc != NULL)
1297          nListed--;
1298    }
1299    if (nListed != 0)
1300       return False;
1301    return True;
1302 }
1303 
sanity_check_all_sectors(void)1304 static Bool sanity_check_all_sectors ( void )
1305 {
1306    SECno   sno;
1307    Bool    sane;
1308    Sector* sec;
1309    for (sno = 0; sno < n_sectors; sno++) {
1310       Int i;
1311       Int nr_not_dead_hx = 0;
1312       Int szhxa;
1313       sec = &sectors[sno];
1314       if (sec->tc == NULL)
1315          continue;
1316       sane = sanity_check_eclasses_in_sector( sec );
1317       if (!sane)
1318          return False;
1319       szhxa = VG_(sizeXA)(sec->host_extents);
1320       for (i = 0; i < szhxa; i++) {
1321          const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1322          if (!HostExtent__is_dead (hx, sec))
1323             nr_not_dead_hx++;
1324       }
1325       if (nr_not_dead_hx != sec->tt_n_inuse) {
1326          VG_(debugLog)(0, "transtab",
1327                        "nr_not_dead_hx %d sanity fail "
1328                        "(expected == in use %d)\n",
1329                        nr_not_dead_hx, sec->tt_n_inuse);
1330          return False;
1331       }
1332    }
1333 
1334    if ( !sanity_check_redir_tt_tc() )
1335       return False;
1336    if ( !sanity_check_sector_search_order() )
1337       return False;
1338    return True;
1339 }
1340 
1341 
1342 
1343 /*-------------------------------------------------------------*/
1344 /*--- Add/find translations                                 ---*/
1345 /*-------------------------------------------------------------*/
1346 
vge_osize(const VexGuestExtents * vge)1347 static UInt vge_osize ( const VexGuestExtents* vge )
1348 {
1349    UInt i, n = 0;
1350    for (i = 0; i < vge->n_used; i++)
1351       n += (UInt)vge->len[i];
1352    return n;
1353 }
1354 
isValidSector(SECno sector)1355 static Bool isValidSector ( SECno sector )
1356 {
1357    if (sector == INV_SNO || sector >= n_sectors)
1358       return False;
1359    return True;
1360 }
1361 
HASH_TT(Addr key)1362 static inline HTTno HASH_TT ( Addr key )
1363 {
1364    UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1365    UInt kLo = (UInt)key;
1366    UInt k32 = kHi ^ kLo;
1367    UInt ror = 7;
1368    if (ror > 0)
1369       k32 = (k32 >> ror) | (k32 << (32-ror));
1370    return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1371 }
1372 
setFastCacheEntry(Addr key,ULong * tcptr)1373 static void setFastCacheEntry ( Addr key, ULong* tcptr )
1374 {
1375    UInt cno = (UInt)VG_TT_FAST_HASH(key);
1376    VG_(tt_fast)[cno].guest = key;
1377    VG_(tt_fast)[cno].host  = (Addr)tcptr;
1378    n_fast_updates++;
1379    /* This shouldn't fail.  It should be assured by m_translate
1380       which should reject any attempt to make translation of code
1381       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1382    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1383 }
1384 
1385 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1386 static void invalidateFastCache ( void )
1387 {
1388    UInt j;
1389    /* This loop is popular enough to make it worth unrolling a
1390       bit, at least on ppc32. */
1391    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1392    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1393       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1394       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1395       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1396       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1397    }
1398 
1399    vg_assert(j == VG_TT_FAST_SIZE);
1400    n_fast_flushes++;
1401 }
1402 
1403 
get_empty_tt_slot(SECno sNo)1404 static TTEno get_empty_tt_slot(SECno sNo)
1405 {
1406    TTEno i;
1407 
1408    i = sectors[sNo].empty_tt_list;
1409    sectors[sNo].empty_tt_list = sectors[sNo].tt[i].usage.next_empty_tte;
1410 
1411    vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1412 
1413    return i;
1414 }
1415 
add_in_empty_tt_list(SECno sNo,TTEno tteno)1416 static void add_in_empty_tt_list (SECno sNo, TTEno tteno)
1417 {
1418    sectors[sNo].tt[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1419    sectors[sNo].empty_tt_list = tteno;
1420 }
1421 
initialiseSector(SECno sno)1422 static void initialiseSector ( SECno sno )
1423 {
1424    UInt i;
1425    SysRes  sres;
1426    Sector* sec;
1427    vg_assert(isValidSector(sno));
1428 
1429    { Bool sane = sanity_check_sector_search_order();
1430      vg_assert(sane);
1431    }
1432    sec = &sectors[sno];
1433 
1434    if (sec->tc == NULL) {
1435 
1436       /* Sector has never been used before.  Need to allocate tt and
1437          tc. */
1438       vg_assert(sec->tt == NULL);
1439       vg_assert(sec->tc_next == NULL);
1440       vg_assert(sec->tt_n_inuse == 0);
1441       for (EClassNo e = 0; e < ECLASS_N; e++) {
1442          vg_assert(sec->ec2tte_size[e] == 0);
1443          vg_assert(sec->ec2tte_used[e] == 0);
1444          vg_assert(sec->ec2tte[e] == NULL);
1445       }
1446       vg_assert(sec->host_extents == NULL);
1447 
1448       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1449          VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1450 
1451       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1452       if (sr_isError(sres)) {
1453          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1454                                      8 * tc_sector_szQ );
1455 	 /*NOTREACHED*/
1456       }
1457       sec->tc = (ULong*)(Addr)sr_Res(sres);
1458 
1459       sres = VG_(am_mmap_anon_float_valgrind)
1460                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1461       if (sr_isError(sres)) {
1462          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1463                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
1464 	 /*NOTREACHED*/
1465       }
1466       sec->tt = (TTEntry*)(Addr)sr_Res(sres);
1467       sec->empty_tt_list = HTT_EMPTY;
1468       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1469          sec->tt[ei].status   = Empty;
1470          sec->tt[ei].n_tte2ec = 0;
1471          add_in_empty_tt_list(sno, ei);
1472       }
1473 
1474       sres = VG_(am_mmap_anon_float_valgrind)
1475                 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1476       if (sr_isError(sres)) {
1477          VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1478                                      N_HTTES_PER_SECTOR * sizeof(TTEno) );
1479 	 /*NOTREACHED*/
1480       }
1481       sec->htt = (TTEno*)(Addr)sr_Res(sres);
1482       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1483          sec->htt[hi] = HTT_EMPTY;
1484 
1485       /* Set up the host_extents array. */
1486       sec->host_extents
1487          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1488                       ttaux_free,
1489                       sizeof(HostExtent));
1490 
1491       /* Add an entry in the sector_search_order */
1492       for (i = 0; i < n_sectors; i++) {
1493          if (sector_search_order[i] == INV_SNO)
1494             break;
1495       }
1496       vg_assert(i >= 0 && i < n_sectors);
1497       sector_search_order[i] = sno;
1498 
1499       if (VG_(clo_verbosity) > 2)
1500          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1501 
1502    } else {
1503 
1504       /* Sector has been used before.  Dump the old contents. */
1505       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1506          VG_(dmsg)("transtab: " "recycle  sector %d\n", sno);
1507       n_sectors_recycled++;
1508 
1509       vg_assert(sec->tt != NULL);
1510       vg_assert(sec->tc_next != NULL);
1511       n_dump_count += sec->tt_n_inuse;
1512 
1513       VexArch     arch_host = VexArch_INVALID;
1514       VexArchInfo archinfo_host;
1515       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1516       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1517       VexEndness endness_host = archinfo_host.endness;
1518 
1519       /* Visit each just-about-to-be-abandoned translation. */
1520       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1521                                       sno);
1522       sec->empty_tt_list = HTT_EMPTY;
1523       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1524          if (sec->tt[ei].status == InUse) {
1525             vg_assert(sec->tt[ei].n_tte2ec >= 1);
1526             vg_assert(sec->tt[ei].n_tte2ec <= 3);
1527             n_dump_osize += vge_osize(&sec->tt[ei].vge);
1528             /* Tell the tool too. */
1529             if (VG_(needs).superblock_discards) {
1530                VG_TDICT_CALL( tool_discard_superblock_info,
1531                               sec->tt[ei].entry,
1532                               sec->tt[ei].vge );
1533             }
1534             unchain_in_preparation_for_deletion(arch_host,
1535                                                 endness_host, sno, ei);
1536          } else {
1537             vg_assert(sec->tt[ei].n_tte2ec == 0);
1538          }
1539          sec->tt[ei].status   = Empty;
1540          sec->tt[ei].n_tte2ec = 0;
1541          add_in_empty_tt_list(sno, ei);
1542       }
1543       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1544          sec->htt[hi] = HTT_EMPTY;
1545 
1546       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1547                                       sno);
1548 
1549       /* Free up the eclass structures. */
1550       for (EClassNo e = 0; e < ECLASS_N; e++) {
1551          if (sec->ec2tte_size[e] == 0) {
1552             vg_assert(sec->ec2tte_used[e] == 0);
1553             vg_assert(sec->ec2tte[e] == NULL);
1554          } else {
1555             vg_assert(sec->ec2tte[e] != NULL);
1556             ttaux_free(sec->ec2tte[e]);
1557             sec->ec2tte[e] = NULL;
1558             sec->ec2tte_size[e] = 0;
1559             sec->ec2tte_used[e] = 0;
1560          }
1561       }
1562 
1563       /* Empty out the host extents array. */
1564       vg_assert(sec->host_extents != NULL);
1565       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1566       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1567 
1568       /* Sanity check: ensure it is already in
1569          sector_search_order[]. */
1570       SECno ix;
1571       for (ix = 0; ix < n_sectors; ix++) {
1572          if (sector_search_order[ix] == sno)
1573             break;
1574       }
1575       vg_assert(ix >= 0 && ix < n_sectors);
1576 
1577       if (VG_(clo_verbosity) > 2)
1578          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1579    }
1580 
1581    sec->tc_next = sec->tc;
1582    sec->tt_n_inuse = 0;
1583 
1584    invalidateFastCache();
1585 
1586    { Bool sane = sanity_check_sector_search_order();
1587      vg_assert(sane);
1588    }
1589 }
1590 
1591 /* Add a translation of vge to TT/TC.  The translation is temporarily
1592    in code[0 .. code_len-1].
1593 
1594    pre: youngest_sector points to a valid (although possibly full)
1595    sector.
1596 */
VG_(add_to_transtab)1597 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1598                            Addr             entry,
1599                            Addr             code,
1600                            UInt             code_len,
1601                            Bool             is_self_checking,
1602                            Int              offs_profInc,
1603                            UInt             n_guest_instrs )
1604 {
1605    Int    tcAvailQ, reqdQ, y;
1606    ULong  *tcptr, *tcptr2;
1607    UChar* srcP;
1608    UChar* dstP;
1609 
1610    vg_assert(init_done);
1611    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1612 
1613    /* 60000: should agree with N_TMPBUF in m_translate.c. */
1614    vg_assert(code_len > 0 && code_len < 60000);
1615 
1616    /* Generally stay sane */
1617    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1618 
1619    if (DEBUG_TRANSTAB)
1620       VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1621                   entry, code_len);
1622 
1623    n_in_count++;
1624    n_in_tsize += code_len;
1625    n_in_osize += vge_osize(vge);
1626    if (is_self_checking)
1627       n_in_sc_count++;
1628 
1629    y = youngest_sector;
1630    vg_assert(isValidSector(y));
1631 
1632    if (sectors[y].tc == NULL)
1633       initialiseSector(y);
1634 
1635    /* Try putting the translation in this sector. */
1636    reqdQ = (code_len + 7) >> 3;
1637 
1638    /* Will it fit in tc? */
1639    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1640               - ((ULong*)(sectors[y].tc_next));
1641    vg_assert(tcAvailQ >= 0);
1642    vg_assert(tcAvailQ <= tc_sector_szQ);
1643 
1644    if (tcAvailQ < reqdQ
1645        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1646       /* No.  So move on to the next sector.  Either it's never been
1647          used before, in which case it will get its tt/tc allocated
1648          now, or it has been used before, in which case it is set to be
1649          empty, hence throwing out the oldest sector. */
1650       vg_assert(tc_sector_szQ > 0);
1651       Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1652                            / N_HTTES_PER_SECTOR;
1653       Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1654                            / tc_sector_szQ;
1655       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1656          VG_(dmsg)("transtab: "
1657                    "declare  sector %d full "
1658                    "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1659                    y, tt_loading_pct, tc_loading_pct,
1660                    8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1661       }
1662       youngest_sector++;
1663       if (youngest_sector >= n_sectors)
1664          youngest_sector = 0;
1665       y = youngest_sector;
1666       initialiseSector(y);
1667    }
1668 
1669    /* Be sure ... */
1670    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1671               - ((ULong*)(sectors[y].tc_next));
1672    vg_assert(tcAvailQ >= 0);
1673    vg_assert(tcAvailQ <= tc_sector_szQ);
1674    vg_assert(tcAvailQ >= reqdQ);
1675    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1676    vg_assert(sectors[y].tt_n_inuse >= 0);
1677 
1678    /* Copy into tc. */
1679    tcptr = sectors[y].tc_next;
1680    vg_assert(tcptr >= &sectors[y].tc[0]);
1681    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1682 
1683    dstP = (UChar*)tcptr;
1684    srcP = (UChar*)code;
1685    VG_(memcpy)(dstP, srcP, code_len);
1686    sectors[y].tc_next += reqdQ;
1687    sectors[y].tt_n_inuse++;
1688 
1689    /* more paranoia */
1690    tcptr2 = sectors[y].tc_next;
1691    vg_assert(tcptr2 >= &sectors[y].tc[0]);
1692    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1693 
1694    /* Find an empty tt slot, and use it.  There must be such a slot
1695       since tt is never allowed to get completely full. */
1696    TTEno tteix = get_empty_tt_slot(y);
1697    TTEntry__init(&sectors[y].tt[tteix]);
1698    sectors[y].tt[tteix].status = InUse;
1699    sectors[y].tt[tteix].tcptr  = tcptr;
1700    sectors[y].tt[tteix].usage.prof.count  = 0;
1701    sectors[y].tt[tteix].usage.prof.weight =
1702       n_guest_instrs == 0 ? 1 : n_guest_instrs;
1703    sectors[y].tt[tteix].vge    = *vge;
1704    sectors[y].tt[tteix].entry  = entry;
1705 
1706    // Point an htt entry to the tt slot
1707    HTTno htti = HASH_TT(entry);
1708    vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1709    while (True) {
1710       if (sectors[y].htt[htti] == HTT_EMPTY
1711           || sectors[y].htt[htti] == HTT_DELETED)
1712          break;
1713       htti++;
1714       if (htti >= N_HTTES_PER_SECTOR)
1715          htti = 0;
1716    }
1717    sectors[y].htt[htti] = tteix;
1718 
1719    /* Patch in the profile counter location, if necessary. */
1720    if (offs_profInc != -1) {
1721       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1722       VexArch     arch_host = VexArch_INVALID;
1723       VexArchInfo archinfo_host;
1724       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1725       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1726       VexEndness endness_host = archinfo_host.endness;
1727       VexInvalRange vir
1728          = LibVEX_PatchProfInc( arch_host, endness_host,
1729                                 dstP + offs_profInc,
1730                                 &sectors[y].tt[tteix].usage.prof.count );
1731       VG_(invalidate_icache)( (void*)vir.start, vir.len );
1732    }
1733 
1734    VG_(invalidate_icache)( dstP, code_len );
1735 
1736    /* Add this entry to the host_extents map, checking that we're
1737       adding in order. */
1738    { HostExtent hx;
1739      hx.start = (UChar*)tcptr;
1740      hx.len   = code_len;
1741      hx.tteNo = tteix;
1742      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1743      XArray* hx_array = sectors[y].host_extents;
1744      vg_assert(hx_array);
1745      Word n = VG_(sizeXA)(hx_array);
1746      if (n > 0) {
1747         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1748         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1749      }
1750      VG_(addToXA)(hx_array, &hx);
1751      if (DEBUG_TRANSTAB)
1752         VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1753                     hx.start, hx.len, y, tteix);
1754    }
1755 
1756    /* Update the fast-cache. */
1757    setFastCacheEntry( entry, tcptr );
1758 
1759    /* Note the eclass numbers for this translation. */
1760    upd_eclasses_after_add( &sectors[y], tteix );
1761 }
1762 
1763 
1764 /* Search for the translation of the given guest address.  If
1765    requested, a successful search can also cause the fast-caches to be
1766    updated.
1767 */
VG_(search_transtab)1768 Bool VG_(search_transtab) ( /*OUT*/Addr*  res_hcode,
1769                             /*OUT*/SECno* res_sNo,
1770                             /*OUT*/TTEno* res_tteNo,
1771                             Addr          guest_addr,
1772                             Bool          upd_cache )
1773 {
1774    SECno i, sno;
1775    HTTno j, k, kstart;
1776    TTEno tti;
1777 
1778    vg_assert(init_done);
1779    /* Find the initial probe point just once.  It will be the same in
1780       all sectors and avoids multiple expensive % operations. */
1781    n_full_lookups++;
1782    kstart = HASH_TT(guest_addr);
1783    vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1784 
1785    /* Search in all the sectors,using sector_search_order[] as a
1786       heuristic guide as to what order to visit the sectors. */
1787    for (i = 0; i < n_sectors; i++) {
1788 
1789       sno = sector_search_order[i];
1790       if (UNLIKELY(sno == INV_SNO))
1791          return False; /* run out of sectors to search */
1792 
1793       k = kstart;
1794       for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1795          n_lookup_probes++;
1796          tti = sectors[sno].htt[k];
1797          if (tti < N_TTES_PER_SECTOR
1798              && sectors[sno].tt[tti].entry == guest_addr) {
1799             /* found it */
1800             if (upd_cache)
1801                setFastCacheEntry(
1802                   guest_addr, sectors[sno].tt[tti].tcptr );
1803             if (res_hcode)
1804                *res_hcode = (Addr)sectors[sno].tt[tti].tcptr;
1805             if (res_sNo)
1806                *res_sNo = sno;
1807             if (res_tteNo)
1808                *res_tteNo = tti;
1809             /* pull this one one step closer to the front.  For large
1810                apps this more or less halves the number of required
1811                probes. */
1812             if (i > 0) {
1813                Int tmp = sector_search_order[i-1];
1814                sector_search_order[i-1] = sector_search_order[i];
1815                sector_search_order[i] = tmp;
1816             }
1817             return True;
1818          }
1819          // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1820          if (sectors[sno].htt[k] == HTT_EMPTY)
1821             break; /* not found in this sector */
1822          k++;
1823          if (k == N_HTTES_PER_SECTOR)
1824             k = 0;
1825       }
1826 
1827       /* If we fall off the end, all entries are InUse and not
1828          matching, or Deleted.  In any case we did not find it in this
1829          sector. */
1830    }
1831 
1832    /* Not found in any sector. */
1833    return False;
1834 }
1835 
1836 
1837 /*-------------------------------------------------------------*/
1838 /*--- Delete translations.                                  ---*/
1839 /*-------------------------------------------------------------*/
1840 
1841 /* forward */
1842 static void unredir_discard_translations( Addr, ULong );
1843 
1844 /* Stuff for deleting translations which intersect with a given
1845    address range.  Unfortunately, to make this run at a reasonable
1846    speed, it is complex. */
1847 
1848 static inline
overlap1(Addr s1,ULong r1,Addr s2,ULong r2)1849 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1850 {
1851    Addr e1 = s1 + r1 - 1;
1852    Addr e2 = s2 + r2 - 1;
1853    if (e1 < s2 || e2 < s1)
1854       return False;
1855    return True;
1856 }
1857 
1858 static inline
overlaps(Addr start,ULong range,const VexGuestExtents * vge)1859 Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge )
1860 {
1861    if (overlap1(start, range, vge->base[0], vge->len[0]))
1862       return True;
1863    if (vge->n_used < 2)
1864       return False;
1865    if (overlap1(start, range, vge->base[1], vge->len[1]))
1866       return True;
1867    if (vge->n_used < 3)
1868       return False;
1869    if (overlap1(start, range, vge->base[2], vge->len[2]))
1870       return True;
1871    return False;
1872 }
1873 
1874 
1875 /* Delete a tt entry, and update all the eclass data accordingly. */
1876 
delete_tte(Sector * sec,SECno secNo,TTEno tteno,VexArch arch_host,VexEndness endness_host)1877 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
1878                          VexArch arch_host, VexEndness endness_host )
1879 {
1880    Int      i, ec_idx;
1881    EClassNo ec_num;
1882    TTEntry* tte;
1883 
1884    /* sec and secNo are mutually redundant; cross-check. */
1885    vg_assert(sec == &sectors[secNo]);
1886 
1887    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1888    tte = &sec->tt[tteno];
1889    vg_assert(tte->status == InUse);
1890    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1891 
1892    /* Unchain .. */
1893    unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1894 
1895    /* Deal with the ec-to-tte links first. */
1896    for (i = 0; i < tte->n_tte2ec; i++) {
1897       ec_num = tte->tte2ec_ec[i];
1898       ec_idx = tte->tte2ec_ix[i];
1899       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1900       vg_assert(ec_idx >= 0);
1901       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1902       /* Assert that the two links point at each other. */
1903       vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
1904       /* "delete" the pointer back to here. */
1905       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1906    }
1907 
1908    /* Now fix up this TTEntry. */
1909    /* Mark the entry as deleted in htt.
1910       Note: we could avoid the below hash table search by
1911       adding a reference from tte to its hash position in tt. */
1912    HTTno j;
1913    HTTno k = HASH_TT(tte->entry);
1914    vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
1915    for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1916       if (sec->htt[k] == tteno)
1917          break;
1918       k++;
1919       if (k == N_HTTES_PER_SECTOR)
1920          k = 0;
1921    }
1922    vg_assert(j < N_HTTES_PER_SECTOR);
1923    sec->htt[k]   = HTT_DELETED;
1924    tte->status   = Deleted;
1925    tte->n_tte2ec = 0;
1926    add_in_empty_tt_list(secNo, tteno);
1927 
1928    /* Stats .. */
1929    sec->tt_n_inuse--;
1930    n_disc_count++;
1931    n_disc_osize += vge_osize(&tte->vge);
1932 
1933    /* Tell the tool too. */
1934    if (VG_(needs).superblock_discards) {
1935       VG_TDICT_CALL( tool_discard_superblock_info,
1936                      tte->entry,
1937                      tte->vge );
1938    }
1939 }
1940 
1941 
1942 /* Delete translations from sec which intersect specified range, but
1943    only consider translations in the specified eclass. */
1944 
1945 static
delete_translations_in_sector_eclass(Sector * sec,SECno secNo,Addr guest_start,ULong range,EClassNo ec,VexArch arch_host,VexEndness endness_host)1946 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
1947                                             Addr guest_start, ULong range,
1948                                             EClassNo ec,
1949                                             VexArch arch_host,
1950                                             VexEndness endness_host )
1951 {
1952    Int      i;
1953    TTEno    tteno;
1954    Bool     anyDeld = False;
1955    TTEntry* tte;
1956 
1957    vg_assert(ec >= 0 && ec < ECLASS_N);
1958 
1959    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1960 
1961       tteno = sec->ec2tte[ec][i];
1962       if (tteno == EC2TTE_DELETED) {
1963          /* already deleted */
1964          continue;
1965       }
1966 
1967       vg_assert(tteno < N_TTES_PER_SECTOR);
1968 
1969       tte = &sec->tt[tteno];
1970       vg_assert(tte->status == InUse);
1971 
1972       if (overlaps( guest_start, range, &tte->vge )) {
1973          anyDeld = True;
1974          delete_tte( sec, secNo, tteno, arch_host, endness_host );
1975       }
1976 
1977    }
1978 
1979    return anyDeld;
1980 }
1981 
1982 
1983 /* Delete translations from sec which intersect specified range, the
1984    slow way, by inspecting all translations in sec. */
1985 
1986 static
delete_translations_in_sector(Sector * sec,SECno secNo,Addr guest_start,ULong range,VexArch arch_host,VexEndness endness_host)1987 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
1988                                      Addr guest_start, ULong range,
1989                                      VexArch arch_host,
1990                                      VexEndness endness_host )
1991 {
1992    TTEno i;
1993    Bool  anyDeld = False;
1994 
1995    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1996       if (sec->tt[i].status == InUse
1997           && overlaps( guest_start, range, &sec->tt[i].vge )) {
1998          anyDeld = True;
1999          delete_tte( sec, secNo, i, arch_host, endness_host );
2000       }
2001    }
2002 
2003    return anyDeld;
2004 }
2005 
2006 
VG_(discard_translations)2007 void VG_(discard_translations) ( Addr guest_start, ULong range,
2008                                  const HChar* who )
2009 {
2010    Sector* sec;
2011    SECno   sno;
2012    EClassNo ec;
2013    Bool    anyDeleted = False;
2014 
2015    vg_assert(init_done);
2016 
2017    VG_(debugLog)(2, "transtab",
2018                     "discard_translations(0x%lx, %llu) req by %s\n",
2019                     guest_start, range, who );
2020 
2021    /* Pre-deletion sanity check */
2022    if (VG_(clo_sanity_level >= 4)) {
2023       Bool sane = sanity_check_all_sectors();
2024       vg_assert(sane);
2025    }
2026 
2027    if (range == 0)
2028       return;
2029 
2030    VexArch     arch_host = VexArch_INVALID;
2031    VexArchInfo archinfo_host;
2032    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2033    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2034    VexEndness endness_host = archinfo_host.endness;
2035 
2036    /* There are two different ways to do this.
2037 
2038       If the range fits within a single address-range equivalence
2039       class, as will be the case for a cache line sized invalidation,
2040       then we only have to inspect the set of translations listed in
2041       that equivalence class, and also in the "sin-bin" equivalence
2042       class ECLASS_MISC.
2043 
2044       Otherwise, the invalidation is of a larger range and probably
2045       results from munmap.  In this case it's (probably!) faster just
2046       to inspect all translations, dump those we don't want, and
2047       regenerate the equivalence class information (since modifying it
2048       in-situ is even more expensive).
2049    */
2050 
2051    /* First off, figure out if the range falls within a single class,
2052       and if so which one. */
2053 
2054    ec = ECLASS_MISC;
2055    if (range < (1ULL << ECLASS_SHIFT))
2056       ec = range_to_eclass( guest_start, (UInt)range );
2057 
2058    /* if ec is ECLASS_MISC then we aren't looking at just a single
2059       class, so use the slow scheme.  Else use the fast scheme,
2060       examining 'ec' and ECLASS_MISC. */
2061 
2062    if (ec != ECLASS_MISC) {
2063 
2064       VG_(debugLog)(2, "transtab",
2065                        "                    FAST, ec = %d\n", ec);
2066 
2067       /* Fast scheme */
2068       vg_assert(ec >= 0 && ec < ECLASS_MISC);
2069 
2070       for (sno = 0; sno < n_sectors; sno++) {
2071          sec = &sectors[sno];
2072          if (sec->tc == NULL)
2073             continue;
2074          anyDeleted |= delete_translations_in_sector_eclass(
2075                           sec, sno, guest_start, range, ec,
2076                           arch_host, endness_host
2077                        );
2078          anyDeleted |= delete_translations_in_sector_eclass(
2079                           sec, sno, guest_start, range, ECLASS_MISC,
2080                           arch_host, endness_host
2081                        );
2082       }
2083 
2084    } else {
2085 
2086       /* slow scheme */
2087 
2088       VG_(debugLog)(2, "transtab",
2089                        "                    SLOW, ec = %d\n", ec);
2090 
2091       for (sno = 0; sno < n_sectors; sno++) {
2092          sec = &sectors[sno];
2093          if (sec->tc == NULL)
2094             continue;
2095          anyDeleted |= delete_translations_in_sector(
2096                           sec, sno, guest_start, range,
2097                           arch_host, endness_host
2098                        );
2099       }
2100 
2101    }
2102 
2103    if (anyDeleted)
2104       invalidateFastCache();
2105 
2106    /* don't forget the no-redir cache */
2107    unredir_discard_translations( guest_start, range );
2108 
2109    /* Post-deletion sanity check */
2110    if (VG_(clo_sanity_level >= 4)) {
2111       TTEno    i;
2112       TTEntry* tte;
2113       Bool     sane = sanity_check_all_sectors();
2114       vg_assert(sane);
2115       /* But now, also check the requested address range isn't
2116          present anywhere. */
2117       for (sno = 0; sno < n_sectors; sno++) {
2118          sec = &sectors[sno];
2119          if (sec->tc == NULL)
2120             continue;
2121          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2122             tte = &sec->tt[i];
2123             if (tte->status != InUse)
2124                continue;
2125             vg_assert(!overlaps( guest_start, range, &tte->vge ));
2126          }
2127       }
2128    }
2129 }
2130 
2131 /* Whether or not tools may discard translations. */
2132 Bool  VG_(ok_to_discard_translations) = False;
2133 
2134 /* This function is exported to tools which can use it to discard
2135    translations, provided it is safe to do so. */
VG_(discard_translations_safely)2136 void VG_(discard_translations_safely) ( Addr  start, SizeT len,
2137                                         const HChar* who )
2138 {
2139    vg_assert(VG_(ok_to_discard_translations));
2140    VG_(discard_translations)(start, len, who);
2141 }
2142 
2143 /*------------------------------------------------------------*/
2144 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
2145 /*------------------------------------------------------------*/
2146 
2147 /* A very simple translation cache which holds a small number of
2148    unredirected translations.  This is completely independent of the
2149    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2150    both structures are simply dumped and we start over.
2151 
2152    Since these translations are unredirected, the search key is (by
2153    definition) the first address entry in the .vge field. */
2154 
2155 /* Sized to hold 500 translations of average size 1000 bytes. */
2156 
2157 #define UNREDIR_SZB   1000
2158 
2159 #define N_UNREDIR_TT  500
2160 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2161 
2162 typedef
2163    struct {
2164       VexGuestExtents vge;
2165       Addr            hcode;
2166       Bool            inUse;
2167    }
2168    UTCEntry;
2169 
2170 /* We just allocate forwards in _tc, never deleting. */
2171 static ULong    *unredir_tc;
2172 static Int      unredir_tc_used = N_UNREDIR_TCQ;
2173 
2174 /* Slots in _tt can come into use and out again (.inUse).
2175    Nevertheless _tt_highwater is maintained so that invalidations
2176    don't have to scan all the slots when only a few are in use.
2177    _tt_highwater holds the index of the highest ever allocated
2178    slot. */
2179 static UTCEntry unredir_tt[N_UNREDIR_TT];
2180 static Int      unredir_tt_highwater;
2181 
2182 
init_unredir_tt_tc(void)2183 static void init_unredir_tt_tc ( void )
2184 {
2185    Int i;
2186    if (unredir_tc == NULL) {
2187       SysRes sres = VG_(am_mmap_anon_float_valgrind)
2188                        ( N_UNREDIR_TT * UNREDIR_SZB );
2189       if (sr_isError(sres)) {
2190          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2191                                      N_UNREDIR_TT * UNREDIR_SZB);
2192          /*NOTREACHED*/
2193       }
2194       unredir_tc = (ULong *)(Addr)sr_Res(sres);
2195    }
2196    unredir_tc_used = 0;
2197    for (i = 0; i < N_UNREDIR_TT; i++)
2198       unredir_tt[i].inUse = False;
2199    unredir_tt_highwater = -1;
2200 }
2201 
2202 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2203 static Bool sanity_check_redir_tt_tc ( void )
2204 {
2205    Int i;
2206    if (unredir_tt_highwater < -1) return False;
2207    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2208 
2209    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2210       if (unredir_tt[i].inUse)
2211          return False;
2212 
2213    if (unredir_tc_used < 0) return False;
2214    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2215 
2216    return True;
2217 }
2218 
2219 
2220 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2221    is temporarily in code[0 .. code_len-1].
2222 */
VG_(add_to_unredir_transtab)2223 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2224                                    Addr             entry,
2225                                    Addr             code,
2226                                    UInt             code_len )
2227 {
2228    Int   i, j, code_szQ;
2229    HChar *srcP, *dstP;
2230 
2231    vg_assert(sanity_check_redir_tt_tc());
2232 
2233    /* This is the whole point: it's not redirected! */
2234    vg_assert(entry == vge->base[0]);
2235 
2236    /* How many unredir_tt slots are needed */
2237    code_szQ = (code_len + 7) / 8;
2238 
2239    /* Look for an empty unredir_tc slot */
2240    for (i = 0; i < N_UNREDIR_TT; i++)
2241       if (!unredir_tt[i].inUse)
2242          break;
2243 
2244    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2245       /* It's full; dump everything we currently have */
2246       init_unredir_tt_tc();
2247       i = 0;
2248    }
2249 
2250    vg_assert(unredir_tc_used >= 0);
2251    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2252    vg_assert(code_szQ > 0);
2253    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2254    vg_assert(i >= 0 && i < N_UNREDIR_TT);
2255    vg_assert(unredir_tt[i].inUse == False);
2256 
2257    if (i > unredir_tt_highwater)
2258       unredir_tt_highwater = i;
2259 
2260    dstP = (HChar*)&unredir_tc[unredir_tc_used];
2261    srcP = (HChar*)code;
2262    for (j = 0; j < code_len; j++)
2263       dstP[j] = srcP[j];
2264 
2265    VG_(invalidate_icache)( dstP, code_len );
2266 
2267    unredir_tt[i].inUse = True;
2268    unredir_tt[i].vge   = *vge;
2269    unredir_tt[i].hcode = (Addr)dstP;
2270 
2271    unredir_tc_used += code_szQ;
2272    vg_assert(unredir_tc_used >= 0);
2273    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2274 
2275    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2276 }
2277 
VG_(search_unredir_transtab)2278 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr*  result,
2279                                     Addr          guest_addr )
2280 {
2281    Int i;
2282    for (i = 0; i < N_UNREDIR_TT; i++) {
2283       if (!unredir_tt[i].inUse)
2284          continue;
2285       if (unredir_tt[i].vge.base[0] == guest_addr) {
2286          *result = unredir_tt[i].hcode;
2287          return True;
2288       }
2289    }
2290    return False;
2291 }
2292 
unredir_discard_translations(Addr guest_start,ULong range)2293 static void unredir_discard_translations( Addr guest_start, ULong range )
2294 {
2295    Int i;
2296 
2297    vg_assert(sanity_check_redir_tt_tc());
2298 
2299    for (i = 0; i <= unredir_tt_highwater; i++) {
2300       if (unredir_tt[i].inUse
2301           && overlaps( guest_start, range, &unredir_tt[i].vge))
2302          unredir_tt[i].inUse = False;
2303    }
2304 }
2305 
2306 
2307 /*------------------------------------------------------------*/
2308 /*--- Initialisation.                                      ---*/
2309 /*------------------------------------------------------------*/
2310 
VG_(init_tt_tc)2311 void VG_(init_tt_tc) ( void )
2312 {
2313    Int i, avg_codeszQ;
2314 
2315    vg_assert(!init_done);
2316    init_done = True;
2317 
2318    /* Otherwise lots of things go wrong... */
2319    vg_assert(sizeof(ULong) == 8);
2320    vg_assert(sizeof(TTEno) == sizeof(HTTno));
2321    vg_assert(sizeof(TTEno) == 2);
2322    vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2323    vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2324    vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2325    vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2326    /* check fast cache entries really are 2 words long */
2327    vg_assert(sizeof(Addr) == sizeof(void*));
2328    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2329    /* check fast cache entries are packed back-to-back with no spaces */
2330    vg_assert(sizeof( VG_(tt_fast) )
2331              == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2332    /* check fast cache is aligned as we requested.  Not fatal if it
2333       isn't, but we might as well make sure. */
2334    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2335 
2336    if (VG_(clo_verbosity) > 2)
2337       VG_(message)(Vg_DebugMsg,
2338                    "TT/TC: VG_(init_tt_tc) "
2339                    "(startup of code management)\n");
2340 
2341    /* Figure out how big each tc area should be.  */
2342    if (VG_(clo_avg_transtab_entry_size) == 0)
2343       avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2344    else
2345       avg_codeszQ   = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2346    tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2347 
2348    /* Ensure the calculated value is not way crazy. */
2349    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2350    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2351 
2352    n_sectors = VG_(clo_num_transtab_sectors);
2353    vg_assert(n_sectors >= MIN_N_SECTORS);
2354    vg_assert(n_sectors <= MAX_N_SECTORS);
2355 
2356    /* Initialise the sectors, even the ones we aren't going to use.
2357       Set all fields to zero. */
2358    youngest_sector = 0;
2359    for (i = 0; i < MAX_N_SECTORS; i++)
2360       VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2361 
2362    /* Initialise the sector_search_order hint table, including the
2363       entries we aren't going to use. */
2364    for (i = 0; i < MAX_N_SECTORS; i++)
2365       sector_search_order[i] = INV_SNO;
2366 
2367    /* Initialise the fast cache. */
2368    invalidateFastCache();
2369 
2370    /* and the unredir tt/tc */
2371    init_unredir_tt_tc();
2372 
2373    if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2374        || VG_(debugLog_getLevel) () >= 2) {
2375       VG_(message)(Vg_DebugMsg,
2376          "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2377          "%stool provided default %u\n",
2378          VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2379          VG_(clo_avg_transtab_entry_size),
2380          VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2381          VG_(details).avg_translation_sizeB);
2382       VG_(message)(Vg_DebugMsg,
2383          "TT/TC: cache: %d sectors of %d bytes each = %d total TC\n",
2384           n_sectors, 8 * tc_sector_szQ,
2385           n_sectors * 8 * tc_sector_szQ );
2386       VG_(message)(Vg_DebugMsg,
2387          "TT/TC: table: %d tables[%d]  of %d bytes each = %d total TT\n",
2388           n_sectors, N_TTES_PER_SECTOR,
2389           (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2390           (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2391       VG_(message)(Vg_DebugMsg,
2392          "TT/TC: table: %d tt entries each = %d total tt entries\n",
2393          N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2394       VG_(message)(Vg_DebugMsg,
2395          "TT/TC: table: %d htt[%d] of %d bytes each = %d total HTT"
2396                        " (htt[%d] %d%% max occup)\n",
2397          n_sectors, N_HTTES_PER_SECTOR,
2398          (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2399          (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2400          N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2401    }
2402 }
2403 
2404 
2405 /*------------------------------------------------------------*/
2406 /*--- Printing out statistics.                             ---*/
2407 /*------------------------------------------------------------*/
2408 
safe_idiv(ULong a,ULong b)2409 static Double safe_idiv( ULong a, ULong b )
2410 {
2411    return (b == 0 ? 0 : (Double)a / (Double)b);
2412 }
2413 
VG_(get_bbs_translated)2414 UInt VG_(get_bbs_translated) ( void )
2415 {
2416    return n_in_count;
2417 }
2418 
VG_(print_tt_tc_stats)2419 void VG_(print_tt_tc_stats) ( void )
2420 {
2421    VG_(message)(Vg_DebugMsg,
2422       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2423       n_full_lookups, n_lookup_probes );
2424    VG_(message)(Vg_DebugMsg,
2425       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2426       n_fast_updates, n_fast_flushes );
2427 
2428    VG_(message)(Vg_DebugMsg,
2429                 " transtab: new        %'llu "
2430                 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2431                 "avg tce size %llu\n",
2432                 n_in_count, n_in_osize, n_in_tsize,
2433                 safe_idiv(n_in_tsize, n_in_osize),
2434                 n_in_sc_count,
2435                 n_in_tsize / (n_in_count ? n_in_count : 1));
2436    VG_(message)(Vg_DebugMsg,
2437                 " transtab: dumped     %'llu (%'llu -> ?" "?) "
2438                 "(sectors recycled %'llu)\n",
2439                 n_dump_count, n_dump_osize, n_sectors_recycled );
2440    VG_(message)(Vg_DebugMsg,
2441                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2442                 n_disc_count, n_disc_osize );
2443 
2444    if (DEBUG_TRANSTAB) {
2445       VG_(printf)("\n");
2446       for (EClassNo e = 0; e < ECLASS_N; e++) {
2447          VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2448          if (e % 16 == 15)
2449             VG_(printf)("\n");
2450       }
2451       VG_(printf)("\n\n");
2452    }
2453 }
2454 
2455 /*------------------------------------------------------------*/
2456 /*--- Printing out of profiling results.                   ---*/
2457 /*------------------------------------------------------------*/
2458 
score(const TTEntry * tte)2459 static ULong score ( const TTEntry* tte )
2460 {
2461    return ((ULong)tte->usage.prof.weight) * ((ULong)tte->usage.prof.count);
2462 }
2463 
VG_(get_SB_profile)2464 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2465 {
2466    SECno sno;
2467    Int   r, s;
2468    ULong score_total;
2469    TTEno i;
2470 
2471    /* First, compute the total weighted count, and find the top N
2472       ttes.  tops contains pointers to the most-used n_tops blocks, in
2473       descending order (viz, tops[0] is the highest scorer). */
2474    for (s = 0; s < n_tops; s++) {
2475       tops[s].addr  = 0;
2476       tops[s].score = 0;
2477    }
2478 
2479    score_total = 0;
2480 
2481    for (sno = 0; sno < n_sectors; sno++) {
2482       if (sectors[sno].tc == NULL)
2483          continue;
2484       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2485          if (sectors[sno].tt[i].status != InUse)
2486             continue;
2487          score_total += score(&sectors[sno].tt[i]);
2488          /* Find the rank for sectors[sno].tt[i]. */
2489          r = n_tops-1;
2490          while (True) {
2491             if (r == -1)
2492                break;
2493              if (tops[r].addr == 0) {
2494                r--;
2495                continue;
2496              }
2497              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2498                 r--;
2499                 continue;
2500              }
2501              break;
2502          }
2503          r++;
2504          vg_assert(r >= 0 && r <= n_tops);
2505          /* This bb should be placed at r, and bbs above it shifted
2506             upwards one slot. */
2507          if (r < n_tops) {
2508             for (s = n_tops-1; s > r; s--)
2509                tops[s] = tops[s-1];
2510             tops[r].addr  = sectors[sno].tt[i].entry;
2511             tops[r].score = score( &sectors[sno].tt[i] );
2512          }
2513       }
2514    }
2515 
2516    /* Now zero out all the counter fields, so that we can make
2517       multiple calls here and just get the values since the last call,
2518       each time, rather than values accumulated for the whole run. */
2519    for (sno = 0; sno < n_sectors; sno++) {
2520       if (sectors[sno].tc == NULL)
2521          continue;
2522       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2523          if (sectors[sno].tt[i].status != InUse)
2524             continue;
2525          sectors[sno].tt[i].usage.prof.count = 0;
2526       }
2527    }
2528 
2529    return score_total;
2530 }
2531 
2532 /*--------------------------------------------------------------------*/
2533 /*--- end                                                          ---*/
2534 /*--------------------------------------------------------------------*/
2535