1
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2015 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47
48
49 #define DEBUG_TRANSTAB 0
50
51
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static SECno n_sectors = 0;
61
62 /* Average size of a transtab code entry. 0 means to use the tool
63 provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
65
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of entries in hash table of each sector. This needs to be a prime
68 number to work properly, it must be <= 65535 (so that a TTE index
69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71 hash table.
72 It is strongly recommended not to change this.
73 65521 is the largest prime <= 65535. */
74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
75
76 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
77 #define HTT_DELETED EC2TTE_DELETED
78 #define HTT_EMPTY 0XFFFE
79
80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81 typedef UShort HTTno;
82
83 /* Because each sector contains a hash table of TTEntries, we need to
84 specify the maximum allowable loading, after which the sector is
85 deemed full. */
86 #define SECTOR_TT_LIMIT_PERCENT 65
87
88 /* The sector is deemed full when this many entries are in it. */
89 #define N_TTES_PER_SECTOR \
90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
91
92 /* Equivalence classes for fast address range deletion. There are 1 +
93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
94 address range which does not fall cleanly within any specific bin.
95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96 ECLASS_N must fit in a EclassNo. */
97 #define ECLASS_SHIFT 11
98 #define ECLASS_WIDTH 8
99 #define ECLASS_MISC (1 << ECLASS_WIDTH)
100 #define ECLASS_N (1 + ECLASS_MISC)
101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
102
103 typedef UShort EClassNo;
104
105 /*------------------ TYPES ------------------*/
106
107 /* In edges ("to-me") in the graph created by chaining. */
108 typedef
109 struct {
110 SECno from_sNo; /* sector number */
111 TTEno from_tteNo; /* TTE number in given sector */
112 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr
113 where the patch is */
114 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */
115 }
116 InEdge;
117
118
119 /* Out edges ("from-me") in the graph created by chaining. */
120 typedef
121 struct {
122 SECno to_sNo; /* sector number */
123 TTEno to_tteNo; /* TTE number in given sector */
124 UInt from_offs; /* code offset in owning translation where patch is */
125 }
126 OutEdge;
127
128
129 #define N_FIXED_IN_EDGE_ARR 3
130 typedef
131 struct {
132 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
133 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134 union {
135 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */
136 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */
137 } edges;
138 }
139 InEdgeArr;
140
141 #define N_FIXED_OUT_EDGE_ARR 2
142 typedef
143 struct {
144 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
145 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146 union {
147 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */
148 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */
149 } edges;
150 }
151 OutEdgeArr;
152
153
154 /* A translation-table entry. This indicates precisely which areas of
155 guest code are included in the translation, and contains all other
156 auxiliary info too. */
157 typedef
158 struct {
159 union {
160 struct {
161 /* Profiling only: the count and weight (arbitrary meaning) for
162 this translation. Weight is a property of the translation
163 itself and computed once when the translation is created.
164 Count is an entry count for the translation and is
165 incremented by 1 every time the translation is used, if we
166 are profiling. */
167 ULong count;
168 UShort weight;
169 } prof; // if status == InUse
170 TTEno next_empty_tte; // if status != InUse
171 } usage;
172
173 /* Status of the slot. Note, we need to be able to do lazy
174 deletion, hence the Deleted state. */
175 enum { InUse, Deleted, Empty } status;
176
177 /* 64-bit aligned pointer to one or more 64-bit words containing
178 the corresponding host code (must be in the same sector!)
179 This is a pointer into the sector's tc (code) area. */
180 ULong* tcptr;
181
182 /* This is the original guest address that purportedly is the
183 entry point of the translation. You might think that .entry
184 should be the same as .vge->base[0], and most of the time it
185 is. However, when doing redirections, that is not the case.
186 .vge must always correctly describe the guest code sections
187 from which this translation was made. However, .entry may or
188 may not be a lie, depending on whether or not we're doing
189 redirection. */
190 Addr entry;
191
192 /* This structure describes precisely what ranges of guest code
193 the translation covers, so we can decide whether or not to
194 delete it when translations of a given address range are
195 invalidated. */
196 VexGuestExtents vge;
197
198 /* Address range summary info: these are pointers back to
199 eclass[] entries in the containing Sector. Those entries in
200 turn point back here -- the two structures are mutually
201 redundant but both necessary to make fast deletions work.
202 The eclass info is similar to, and derived from, this entry's
203 'vge' field, but it is not the same */
204 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
205 EClassNo tte2ec_ec[3]; // for each, the eclass #
206 UInt tte2ec_ix[3]; // and the index within the eclass.
207 // for i in 0 .. n_tte2ec-1
208 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209 // should be the index
210 // of this TTEntry in the containing Sector's tt array.
211
212 /* Admin information for chaining. 'in_edges' is a set of the
213 patch points which jump to this translation -- hence are
214 predecessors in the control flow graph. 'out_edges' points
215 to successors in the control flow graph -- translations to
216 which this one has a patched jump. In short these are just
217 backwards and forwards edges in the graph of patched-together
218 blocks. The 'in_edges' contain slightly more info, enough
219 that we can undo the chaining of each mentioned patch point.
220 The 'out_edges' list exists only so that we can visit the
221 'in_edges' entries of all blocks we're patched through to, in
222 order to remove ourselves from then when we're deleted. */
223
224 /* A translation can disappear for two reasons:
225 1. erased (as part of the oldest sector cleanup) when the
226 youngest sector is full.
227 2. discarded due to calls to VG_(discard_translations).
228 VG_(discard_translations) sets the status of the
229 translation to 'Deleted'.
230 A.o., the gdbserver discards one or more translations
231 when a breakpoint is inserted or removed at an Addr,
232 or when single stepping mode is enabled/disabled
233 or when a translation is instrumented for gdbserver
234 (all the target jumps of this translation are
235 invalidated).
236
237 So, it is possible that the translation A to be patched
238 (to obtain a patched jump from A to B) is invalidated
239 after B is translated and before A is patched.
240 In case a translation is erased or discarded, the patching
241 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242 are checking the 'from' translation still exists before
243 doing the patching.
244
245 Is it safe to erase or discard the current translation E being
246 executed ? Amazing, but yes, it is safe.
247 Here is the explanation:
248
249 The translation E being executed can only be erased if a new
250 translation N is being done. A new translation is done only
251 if the host addr is a not yet patched jump to another
252 translation. In such a case, the guest address of N is
253 assigned to the PC in the VEX state. Control is returned
254 to the scheduler. N will be translated. This can erase the
255 translation E (in case of sector full). VG_(tt_tc_do_chaining)
256 will not do the chaining to a non found translation E.
257 The execution will continue at the current guest PC
258 (i.e. the translation N).
259 => it is safe to erase the current translation being executed.
260
261 The current translation E being executed can also be discarded
262 (e.g. by gdbserver). VG_(discard_translations) will mark
263 this translation E as Deleted, but the translation itself
264 is not erased. In particular, its host code can only
265 be overwritten or erased in case a new translation is done.
266 A new translation will only be done if a not yet translated
267 jump is to be executed. The execution of the Deleted translation
268 E will continue till a non patched jump is encountered.
269 This situation is then similar to the 'erasing' case above :
270 the current translation E can be erased or overwritten, as the
271 execution will continue at the new translation N.
272
273 */
274
275 /* It is possible, although very unlikely, that a block A has
276 more than one patched jump to block B. This could happen if
277 (eg) A finishes "jcond B; jmp B".
278
279 This means in turn that B's in_edges set can list A more than
280 once (twice in this example). However, each such entry must
281 have a different from_offs, since a patched jump can only
282 jump to one place at once (it's meaningless for it to have
283 multiple destinations.) IOW, the successor and predecessor
284 edges in the graph are not uniquely determined by a
285 TTEntry --> TTEntry pair, but rather by a
286 (TTEntry,offset) --> TTEntry triple.
287
288 If A has multiple edges to B then B will mention A multiple
289 times in its in_edges. To make things simpler, we then
290 require that A mentions B exactly the same number of times in
291 its out_edges. Furthermore, a matching out-in pair must have
292 the same offset (from_offs). This facilitates sanity
293 checking, and it facilitates establishing the invariant that
294 a out_edges set may not have duplicates when using the
295 equality defined by (TTEntry,offset). Hence the out_edges
296 and in_edges sets really do have both have set semantics.
297
298 eg if A has been patched to B at offsets 42 and 87 (in A)
299 then A.out_edges = { (B,42), (B,87) } (in any order)
300 and B.in_edges = { (A,42), (A,87) } (in any order)
301
302 Hence for each node pair P->Q in the graph, there's a 1:1
303 mapping between P.out_edges and Q.in_edges.
304 */
305 InEdgeArr in_edges;
306 OutEdgeArr out_edges;
307 }
308 TTEntry;
309
310
311 /* A structure used for mapping host code addresses back to the
312 relevant TTEntry. Used when doing chaining, for finding the
313 TTEntry to which some arbitrary patch address belongs. */
314 typedef
315 struct {
316 UChar* start;
317 UInt len;
318 TTEno tteNo;
319 }
320 HostExtent;
321
322 /* Finally, a sector itself. Each sector contains an array of
323 TCEntries, which hold code, and an array of TTEntries, containing
324 all required administrative info. Profiling is supported using the
325 TTEntry usage.prof.count and usage.prof.weight fields, if required.
326
327 If the sector is not in use, all three pointers are NULL and
328 tt_n_inuse is zero.
329 */
330 typedef
331 struct {
332 /* The TCEntry area. Size of this depends on the average
333 translation size. We try and size it so it becomes full
334 precisely when this sector's translation table (tt) reaches
335 its load limit (SECTOR_TT_LIMIT_PERCENT). */
336 ULong* tc;
337
338 /* An hash table, mapping guest address to an index in the tt array.
339 htt is a fixed size, always containing
340 exactly N_HTTES_PER_SECTOR entries. */
341 TTEno* htt;
342
343 /* The TTEntry array. This is a fixed size, always containing
344 exactly N_TTES_PER_SECTOR entries. */
345 TTEntry* tt;
346
347 /* This points to the current allocation point in tc. */
348 ULong* tc_next;
349
350 /* The count of tt entries with state InUse. */
351 Int tt_n_inuse;
352
353 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
354 TTEno empty_tt_list;
355
356 /* Expandable arrays of tt indices for each of the ECLASS_N
357 address range equivalence classes. These hold indices into
358 the containing sector's tt array, which in turn should point
359 back here. */
360 Int ec2tte_size[ECLASS_N];
361 Int ec2tte_used[ECLASS_N];
362 TTEno* ec2tte[ECLASS_N];
363
364 /* The host extents. The [start, +len) ranges are constructed
365 in strictly non-overlapping order, so we can binary search
366 them at any time. */
367 XArray* host_extents; /* XArray* of HostExtent */
368 }
369 Sector;
370
371
372 /*------------------ DECLS ------------------*/
373
374 /* The root data structure is an array of sectors. The index of the
375 youngest sector is recorded, and new translations are put into that
376 sector. When it fills up, we move along to the next sector and
377 start to fill that up, wrapping around at the end of the array.
378 That way, once all N_TC_SECTORS have been bought into use for the
379 first time, and are full, we then re-use the oldest sector,
380 endlessly.
381
382 When running, youngest sector should be between >= 0 and <
383 N_TC_SECTORS. The initial value indicates the TT/TC system is
384 not yet initialised.
385 */
386 static Sector sectors[MAX_N_SECTORS];
387 static Int youngest_sector = INV_SNO;
388
389 /* The number of ULongs in each TCEntry area. This is computed once
390 at startup and does not change. */
391 static Int tc_sector_szQ = 0;
392
393
394 /* A list of sector numbers, in the order which they should be
395 searched to find translations. This is an optimisation to be used
396 when searching for translations and should not affect
397 correctness. INV_SNO denotes "no entry". */
398 static SECno sector_search_order[MAX_N_SECTORS];
399
400
401 /* Fast helper for the TC. A direct-mapped cache which holds a set of
402 recently used (guest address, host address) pairs. This array is
403 referred to directly from m_dispatch/dispatch-<platform>.S.
404
405 Entries in tt_fast may refer to any valid TC entry, regardless of
406 which sector it's in. Consequently we must be very careful to
407 invalidate this cache when TC entries are changed or disappear.
408
409 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
410 pointed at to cause that cache entry to miss. This relies on the
411 assumption that no guest code actually has that address, hence a
412 value 0x1 seems good. m_translate gives the client a synthetic
413 segfault if it tries to execute at this address.
414 */
415 /*
416 typedef
417 struct {
418 Addr guest;
419 Addr host;
420 }
421 FastCacheEntry;
422 */
423 /*global*/ __attribute__((aligned(16)))
424 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
425
426 /* Make sure we're not used before initialisation. */
427 static Bool init_done = False;
428
429
430 /*------------------ STATS DECLS ------------------*/
431
432 /* Number of fast-cache updates and flushes done. */
433 static ULong n_fast_flushes = 0;
434 static ULong n_fast_updates = 0;
435
436 /* Number of full lookups done. */
437 static ULong n_full_lookups = 0;
438 static ULong n_lookup_probes = 0;
439
440 /* Number/osize/tsize of translations entered; also the number of
441 those for which self-checking was requested. */
442 static ULong n_in_count = 0;
443 static ULong n_in_osize = 0;
444 static ULong n_in_tsize = 0;
445 static ULong n_in_sc_count = 0;
446
447 /* Number/osize of translations discarded due to lack of space. */
448 static ULong n_dump_count = 0;
449 static ULong n_dump_osize = 0;
450 static ULong n_sectors_recycled = 0;
451
452 /* Number/osize of translations discarded due to requests to do so. */
453 static ULong n_disc_count = 0;
454 static ULong n_disc_osize = 0;
455
456
457 /*-------------------------------------------------------------*/
458 /*--- Misc ---*/
459 /*-------------------------------------------------------------*/
460
ttaux_malloc(const HChar * tag,SizeT n)461 static void* ttaux_malloc ( const HChar* tag, SizeT n )
462 {
463 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
464 }
465
ttaux_free(void * p)466 static void ttaux_free ( void* p )
467 {
468 VG_(arena_free)(VG_AR_TTAUX, p);
469 }
470
471
472 /*-------------------------------------------------------------*/
473 /*--- Chaining support ---*/
474 /*-------------------------------------------------------------*/
475
index_tte(SECno sNo,TTEno tteNo)476 static inline TTEntry* index_tte ( SECno sNo, TTEno tteNo )
477 {
478 vg_assert(sNo < n_sectors);
479 vg_assert(tteNo < N_TTES_PER_SECTOR);
480 Sector* s = §ors[sNo];
481 vg_assert(s->tt);
482 TTEntry* tte = &s->tt[tteNo];
483 vg_assert(tte->status == InUse);
484 return tte;
485 }
486
InEdge__init(InEdge * ie)487 static void InEdge__init ( InEdge* ie )
488 {
489 ie->from_sNo = INV_SNO; /* invalid */
490 ie->from_tteNo = 0;
491 ie->from_offs = 0;
492 ie->to_fastEP = False;
493 }
494
OutEdge__init(OutEdge * oe)495 static void OutEdge__init ( OutEdge* oe )
496 {
497 oe->to_sNo = INV_SNO; /* invalid */
498 oe->to_tteNo = 0;
499 oe->from_offs = 0;
500 }
501
TTEntry__init(TTEntry * tte)502 static void TTEntry__init ( TTEntry* tte )
503 {
504 VG_(memset)(tte, 0, sizeof(*tte));
505 }
506
InEdgeArr__size(const InEdgeArr * iea)507 static UWord InEdgeArr__size ( const InEdgeArr* iea )
508 {
509 if (iea->has_var) {
510 vg_assert(iea->n_fixed == 0);
511 return VG_(sizeXA)(iea->edges.var);
512 } else {
513 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
514 return iea->n_fixed;
515 }
516 }
517
InEdgeArr__makeEmpty(InEdgeArr * iea)518 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
519 {
520 if (iea->has_var) {
521 vg_assert(iea->n_fixed == 0);
522 VG_(deleteXA)(iea->edges.var);
523 iea->edges.var = NULL;
524 iea->has_var = False;
525 } else {
526 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
527 iea->n_fixed = 0;
528 }
529 }
530
531 static
InEdgeArr__index(InEdgeArr * iea,UWord i)532 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
533 {
534 if (iea->has_var) {
535 vg_assert(iea->n_fixed == 0);
536 return (InEdge*)VG_(indexXA)(iea->edges.var, i);
537 } else {
538 vg_assert(i < iea->n_fixed);
539 return &iea->edges.fixed[i];
540 }
541 }
542
543 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)544 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
545 {
546 if (iea->has_var) {
547 vg_assert(iea->n_fixed == 0);
548 VG_(removeIndexXA)(iea->edges.var, i);
549 } else {
550 vg_assert(i < iea->n_fixed);
551 for (; i+1 < iea->n_fixed; i++) {
552 iea->edges.fixed[i] = iea->edges.fixed[i+1];
553 }
554 iea->n_fixed--;
555 }
556 }
557
558 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)559 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
560 {
561 if (iea->has_var) {
562 vg_assert(iea->n_fixed == 0);
563 VG_(addToXA)(iea->edges.var, ie);
564 } else {
565 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
566 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
567 /* The fixed array is full, so we have to initialise an
568 XArray and copy the fixed array into it. */
569 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
570 ttaux_free,
571 sizeof(InEdge));
572 VG_(hintSizeXA) (var, iea->n_fixed + 1);
573 UWord i;
574 for (i = 0; i < iea->n_fixed; i++) {
575 VG_(addToXA)(var, &iea->edges.fixed[i]);
576 }
577 VG_(addToXA)(var, ie);
578 iea->n_fixed = 0;
579 iea->has_var = True;
580 iea->edges.var = var;
581 } else {
582 /* Just add to the fixed array. */
583 iea->edges.fixed[iea->n_fixed++] = *ie;
584 }
585 }
586 }
587
OutEdgeArr__size(const OutEdgeArr * oea)588 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
589 {
590 if (oea->has_var) {
591 vg_assert(oea->n_fixed == 0);
592 return VG_(sizeXA)(oea->edges.var);
593 } else {
594 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
595 return oea->n_fixed;
596 }
597 }
598
OutEdgeArr__makeEmpty(OutEdgeArr * oea)599 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
600 {
601 if (oea->has_var) {
602 vg_assert(oea->n_fixed == 0);
603 VG_(deleteXA)(oea->edges.var);
604 oea->edges.var = NULL;
605 oea->has_var = False;
606 } else {
607 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
608 oea->n_fixed = 0;
609 }
610 }
611
612 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)613 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
614 {
615 if (oea->has_var) {
616 vg_assert(oea->n_fixed == 0);
617 return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
618 } else {
619 vg_assert(i < oea->n_fixed);
620 return &oea->edges.fixed[i];
621 }
622 }
623
624 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)625 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
626 {
627 if (oea->has_var) {
628 vg_assert(oea->n_fixed == 0);
629 VG_(removeIndexXA)(oea->edges.var, i);
630 } else {
631 vg_assert(i < oea->n_fixed);
632 for (; i+1 < oea->n_fixed; i++) {
633 oea->edges.fixed[i] = oea->edges.fixed[i+1];
634 }
635 oea->n_fixed--;
636 }
637 }
638
639 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)640 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
641 {
642 if (oea->has_var) {
643 vg_assert(oea->n_fixed == 0);
644 VG_(addToXA)(oea->edges.var, oe);
645 } else {
646 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
647 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
648 /* The fixed array is full, so we have to initialise an
649 XArray and copy the fixed array into it. */
650 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
651 ttaux_free,
652 sizeof(OutEdge));
653 VG_(hintSizeXA) (var, oea->n_fixed+1);
654 UWord i;
655 for (i = 0; i < oea->n_fixed; i++) {
656 VG_(addToXA)(var, &oea->edges.fixed[i]);
657 }
658 VG_(addToXA)(var, oe);
659 oea->n_fixed = 0;
660 oea->has_var = True;
661 oea->edges.var = var;
662 } else {
663 /* Just add to the fixed array. */
664 oea->edges.fixed[oea->n_fixed++] = *oe;
665 }
666 }
667 }
668
669 static
HostExtent__cmpOrd(const void * v1,const void * v2)670 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
671 {
672 const HostExtent* hx1 = v1;
673 const HostExtent* hx2 = v2;
674 if (hx1->start + hx1->len <= hx2->start) return -1;
675 if (hx2->start + hx2->len <= hx1->start) return 1;
676 return 0; /* partial overlap */
677 }
678
679 /* True if hx is a dead host extent, i.e. corresponds to host code
680 of an entry that was invalidated. */
681 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)682 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
683 {
684 const TTEno tteNo = hx->tteNo;
685 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
686 VG_(printf) (m \
687 " start 0x%p len %u sector %d ttslot %u" \
688 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
689 hx->start, hx->len, (int)(sec - sectors), \
690 hx->tteNo, \
691 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
692
693 /* Entry might have been invalidated and not re-used yet.*/
694 if (sec->tt[tteNo].status == Deleted) {
695 LDEBUG("found deleted entry");
696 return True;
697 }
698 /* Maybe we found this entry via a host_extents which was
699 inserted for an entry which was changed to Deleted then
700 re-used after. If this entry was re-used, then its tcptr
701 is >= to host_extents start (i.e. the previous tcptr) + len.
702 This is the case as there is no re-use of host code: a new
703 entry or re-used entry always gets "higher value" host code. */
704 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
705 LDEBUG("found re-used entry");
706 return True;
707 }
708
709 return False;
710 #undef LDEBUG
711 }
712
713 static __attribute__((noinline))
find_TTEntry_from_hcode(SECno * from_sNo,TTEno * from_tteNo,void * hcode)714 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
715 /*OUT*/TTEno* from_tteNo,
716 void* hcode )
717 {
718 SECno i;
719
720 /* Search order logic copied from VG_(search_transtab). */
721 for (i = 0; i < n_sectors; i++) {
722 SECno sno = sector_search_order[i];
723 if (UNLIKELY(sno == INV_SNO))
724 return False; /* run out of sectors to search */
725
726 const Sector* sec = §ors[sno];
727 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
728 vg_assert(host_extents);
729
730 HostExtent key;
731 VG_(memset)(&key, 0, sizeof(key));
732 key.start = hcode;
733 key.len = 1;
734 Word firstW = -1, lastW = -1;
735 Bool found = VG_(lookupXA_UNSAFE)(
736 host_extents, &key, &firstW, &lastW,
737 HostExtent__cmpOrd );
738 vg_assert(firstW == lastW); // always true, even if not found
739 if (found) {
740 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
741 TTEno tteNo = hx->tteNo;
742 /* Do some additional sanity checks. */
743 vg_assert(tteNo < N_TTES_PER_SECTOR);
744
745 /* if this hx entry corresponds to dead host code, we must
746 tell this code has not been found, as it cannot be patched. */
747 if (HostExtent__is_dead (hx, sec))
748 return False;
749
750 vg_assert(sec->tt[tteNo].status == InUse);
751 /* Can only half check that the found TTEntry contains hcode,
752 due to not having a length value for the hcode in the
753 TTEntry. */
754 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
755 /* Looks plausible */
756 *from_sNo = sno;
757 *from_tteNo = tteNo;
758 return True;
759 }
760 }
761 return False;
762 }
763
764
765 /* Figure out whether or not hcode is jitted code present in the main
766 code cache (but not in the no-redir cache). Used for sanity
767 checking. */
is_in_the_main_TC(const void * hcode)768 static Bool is_in_the_main_TC ( const void* hcode )
769 {
770 SECno i, sno;
771 for (i = 0; i < n_sectors; i++) {
772 sno = sector_search_order[i];
773 if (sno == INV_SNO)
774 break; /* run out of sectors to search */
775 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
776 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
777 + sizeof(ULong) - 1)
778 return True;
779 }
780 return False;
781 }
782
783
784 /* Fulfill a chaining request, and record admin info so we
785 can undo it later, if required.
786 */
VG_(tt_tc_do_chaining)787 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
788 SECno to_sNo,
789 TTEno to_tteNo,
790 Bool to_fastEP )
791 {
792 /* Get the CPU info established at startup. */
793 VexArch arch_host = VexArch_INVALID;
794 VexArchInfo archinfo_host;
795 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
796 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
797 VexEndness endness_host = archinfo_host.endness;
798
799 // host_code is where we're patching to. So it needs to
800 // take into account, whether we're jumping to the slow
801 // or fast entry point. By definition, the fast entry point
802 // is exactly one event check's worth of code along from
803 // the slow (tcptr) entry point.
804 TTEntry* to_tte = index_tte(to_sNo, to_tteNo);
805 void* host_code = ((UChar*)to_tte->tcptr)
806 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
807
808 // stay sane -- the patch point (dst) is in this sector's code cache
809 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
810 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
811 + sizeof(ULong) - 1 );
812
813 /* Find the TTEntry for the from__ code. This isn't simple since
814 we only know the patch address, which is going to be somewhere
815 inside the from_ block. */
816 SECno from_sNo = INV_SNO;
817 TTEno from_tteNo = INV_TTE;
818 Bool from_found
819 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
820 from__patch_addr );
821 if (!from_found) {
822 // The from code might have been discarded due to sector re-use
823 // or marked Deleted due to translation invalidation.
824 // In such a case, don't do the chaining.
825 VG_(debugLog)(1,"transtab",
826 "host code %p not found (discarded? sector recycled?)"
827 " => no chaining done\n",
828 from__patch_addr);
829 return;
830 }
831
832 TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
833
834 /* Get VEX to do the patching itself. We have to hand it off
835 since it is host-dependent. */
836 VexInvalRange vir
837 = LibVEX_Chain(
838 arch_host, endness_host,
839 from__patch_addr,
840 VG_(fnptr_to_fnentry)(
841 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
842 : &VG_(disp_cp_chain_me_to_slowEP)),
843 (void*)host_code
844 );
845 VG_(invalidate_icache)( (void*)vir.start, vir.len );
846
847 /* Now do the tricky bit -- update the ch_succs and ch_preds info
848 for the two translations involved, so we can undo the chaining
849 later, which we will have to do if the to_ block gets removed
850 for whatever reason. */
851
852 /* This is the new from_ -> to_ link to add. */
853 InEdge ie;
854 InEdge__init(&ie);
855 ie.from_sNo = from_sNo;
856 ie.from_tteNo = from_tteNo;
857 ie.to_fastEP = to_fastEP;
858 HWord from_offs = (HWord)( (UChar*)from__patch_addr
859 - (UChar*)from_tte->tcptr );
860 vg_assert(from_offs < 100000/* let's say */);
861 ie.from_offs = (UInt)from_offs;
862
863 /* This is the new to_ -> from_ backlink to add. */
864 OutEdge oe;
865 OutEdge__init(&oe);
866 oe.to_sNo = to_sNo;
867 oe.to_tteNo = to_tteNo;
868 oe.from_offs = (UInt)from_offs;
869
870 /* Add .. */
871 InEdgeArr__add(&to_tte->in_edges, &ie);
872 OutEdgeArr__add(&from_tte->out_edges, &oe);
873 }
874
875
876 /* Unchain one patch, as described by the specified InEdge. For
877 sanity check purposes only (to check that the patched location is
878 as expected) it also requires the fast and slow entry point
879 addresses of the destination block (that is, the block that owns
880 this InEdge). */
881 __attribute__((noinline))
unchain_one(VexArch arch_host,VexEndness endness_host,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)882 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
883 InEdge* ie,
884 void* to_fastEPaddr, void* to_slowEPaddr )
885 {
886 vg_assert(ie);
887 TTEntry* tte
888 = index_tte(ie->from_sNo, ie->from_tteNo);
889 UChar* place_to_patch
890 = ((UChar*)tte->tcptr) + ie->from_offs;
891 UChar* disp_cp_chain_me
892 = VG_(fnptr_to_fnentry)(
893 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
894 : &VG_(disp_cp_chain_me_to_slowEP)
895 );
896 UChar* place_to_jump_to_EXPECTED
897 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
898
899 // stay sane: both src and dst for this unchaining are
900 // in the main code cache
901 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
902 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
903 // dst check is ok because LibVEX_UnChain checks that
904 // place_to_jump_to_EXPECTED really is the current dst, and
905 // asserts if it isn't.
906 VexInvalRange vir
907 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
908 place_to_jump_to_EXPECTED, disp_cp_chain_me );
909 VG_(invalidate_icache)( (void*)vir.start, vir.len );
910 }
911
912
913 /* The specified block is about to be deleted. Update the preds and
914 succs of its associated blocks accordingly. This includes undoing
915 any chained jumps to this block. */
916 static
unchain_in_preparation_for_deletion(VexArch arch_host,VexEndness endness_host,SECno here_sNo,TTEno here_tteNo)917 void unchain_in_preparation_for_deletion ( VexArch arch_host,
918 VexEndness endness_host,
919 SECno here_sNo, TTEno here_tteNo )
920 {
921 if (DEBUG_TRANSTAB)
922 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
923 UWord i, j, n, m;
924 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
925 TTEntry* here_tte = index_tte(here_sNo, here_tteNo);
926 if (DEBUG_TRANSTAB)
927 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
928 here_tte->entry, here_tte->tcptr);
929 vg_assert(here_tte->status == InUse);
930
931 /* Visit all InEdges owned by here_tte. */
932 n = InEdgeArr__size(&here_tte->in_edges);
933 for (i = 0; i < n; i++) {
934 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
935 // Undo the chaining.
936 UChar* here_slow_EP = (UChar*)here_tte->tcptr;
937 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
938 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
939 // Find the corresponding entry in the "from" node's out_edges,
940 // and remove it.
941 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
942 m = OutEdgeArr__size(&from_tte->out_edges);
943 vg_assert(m > 0); // it must have at least one entry
944 for (j = 0; j < m; j++) {
945 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
946 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
947 && oe->from_offs == ie->from_offs)
948 break;
949 }
950 vg_assert(j < m); // "oe must be findable"
951 OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
952 }
953
954 /* Visit all OutEdges owned by here_tte. */
955 n = OutEdgeArr__size(&here_tte->out_edges);
956 for (i = 0; i < n; i++) {
957 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
958 // Find the corresponding entry in the "to" node's in_edges,
959 // and remove it.
960 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
961 m = InEdgeArr__size(&to_tte->in_edges);
962 vg_assert(m > 0); // it must have at least one entry
963 for (j = 0; j < m; j++) {
964 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
965 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
966 && ie->from_offs == oe->from_offs)
967 break;
968 }
969 vg_assert(j < m); // "ie must be findable"
970 InEdgeArr__deleteIndex(&to_tte->in_edges, j);
971 }
972
973 InEdgeArr__makeEmpty(&here_tte->in_edges);
974 OutEdgeArr__makeEmpty(&here_tte->out_edges);
975 }
976
977
978 /*-------------------------------------------------------------*/
979 /*--- Address-range equivalence class stuff ---*/
980 /*-------------------------------------------------------------*/
981
982 /* Return equivalence class number for a range. */
983
range_to_eclass(Addr start,UInt len)984 static EClassNo range_to_eclass ( Addr start, UInt len )
985 {
986 UInt mask = (1 << ECLASS_WIDTH) - 1;
987 UInt lo = (UInt)start;
988 UInt hi = lo + len - 1;
989 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
990 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
991 if (loBits == hiBits) {
992 vg_assert(loBits < ECLASS_N-1);
993 return loBits;
994 } else {
995 return ECLASS_MISC;
996 }
997 }
998
999
1000 /* Calculates the equivalence class numbers for any VexGuestExtent.
1001 These are written in *eclasses, which must be big enough to hold 3
1002 Ints. The number written, between 1 and 3, is returned. The
1003 eclasses are presented in order, and any duplicates are removed.
1004 */
1005
1006 static
vexGuestExtents_to_eclasses(EClassNo * eclasses,const VexGuestExtents * vge)1007 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1008 const VexGuestExtents* vge )
1009 {
1010
1011 # define SWAP(_lv1,_lv2) \
1012 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1013
1014 Int i, j, n_ec;
1015 EClassNo r;
1016
1017 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1018
1019 n_ec = 0;
1020 for (i = 0; i < vge->n_used; i++) {
1021 r = range_to_eclass( vge->base[i], vge->len[i] );
1022 if (r == ECLASS_MISC)
1023 goto bad;
1024 /* only add if we haven't already seen it */
1025 for (j = 0; j < n_ec; j++)
1026 if (eclasses[j] == r)
1027 break;
1028 if (j == n_ec)
1029 eclasses[n_ec++] = r;
1030 }
1031
1032 if (n_ec == 1)
1033 return 1;
1034
1035 if (n_ec == 2) {
1036 /* sort */
1037 if (eclasses[0] > eclasses[1])
1038 SWAP(eclasses[0], eclasses[1]);
1039 return 2;
1040 }
1041
1042 if (n_ec == 3) {
1043 /* sort */
1044 if (eclasses[0] > eclasses[2])
1045 SWAP(eclasses[0], eclasses[2]);
1046 if (eclasses[0] > eclasses[1])
1047 SWAP(eclasses[0], eclasses[1]);
1048 if (eclasses[1] > eclasses[2])
1049 SWAP(eclasses[1], eclasses[2]);
1050 return 3;
1051 }
1052
1053 /* NOTREACHED */
1054 vg_assert(0);
1055
1056 bad:
1057 eclasses[0] = ECLASS_MISC;
1058 return 1;
1059
1060 # undef SWAP
1061 }
1062
1063
1064 /* Add tteno to the set of entries listed for equivalence class ec in
1065 this sector. Returns used location in eclass array. */
1066
1067 static
addEClassNo(Sector * sec,EClassNo ec,TTEno tteno)1068 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1069 {
1070 Int old_sz, new_sz, i, r;
1071 TTEno *old_ar, *new_ar;
1072
1073 vg_assert(ec >= 0 && ec < ECLASS_N);
1074 vg_assert(tteno < N_TTES_PER_SECTOR);
1075
1076 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1077
1078 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1079
1080 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1081
1082 old_sz = sec->ec2tte_size[ec];
1083 old_ar = sec->ec2tte[ec];
1084 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1085 new_ar = ttaux_malloc("transtab.aECN.1",
1086 new_sz * sizeof(TTEno));
1087 for (i = 0; i < old_sz; i++)
1088 new_ar[i] = old_ar[i];
1089 if (old_ar)
1090 ttaux_free(old_ar);
1091 sec->ec2tte_size[ec] = new_sz;
1092 sec->ec2tte[ec] = new_ar;
1093
1094 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1095 }
1096
1097 /* Common case */
1098 r = sec->ec2tte_used[ec]++;
1099 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1100 sec->ec2tte[ec][r] = tteno;
1101 return (UInt)r;
1102 }
1103
1104
1105 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1106 eclass entries to 'sec'. */
1107
1108 static
upd_eclasses_after_add(Sector * sec,TTEno tteno)1109 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1110 {
1111 Int i, r;
1112 EClassNo eclasses[3];
1113 TTEntry* tte;
1114 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1115
1116 tte = &sec->tt[tteno];
1117 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1118
1119 vg_assert(r >= 1 && r <= 3);
1120 tte->n_tte2ec = r;
1121
1122 for (i = 0; i < r; i++) {
1123 tte->tte2ec_ec[i] = eclasses[i];
1124 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1125 }
1126 }
1127
1128
1129 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1130 True if OK, False if something's not right. Expensive. */
1131
sanity_check_eclasses_in_sector(const Sector * sec)1132 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1133 {
1134 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1135
1136 const HChar* whassup = NULL;
1137 Int j, k, n, ec_idx;
1138 EClassNo i;
1139 EClassNo ec_num;
1140 TTEntry* tte;
1141 TTEno tteno;
1142 ULong* tce;
1143
1144 /* Basic checks on this sector */
1145 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1146 BAD("invalid sec->tt_n_inuse");
1147 tce = sec->tc_next;
1148 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1149 BAD("sec->tc_next points outside tc");
1150
1151 /* For each eclass ... */
1152 for (i = 0; i < ECLASS_N; i++) {
1153 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1154 BAD("ec2tte_size/ec2tte mismatch(1)");
1155 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1156 BAD("ec2tte_size/ec2tte mismatch(2)");
1157 if (sec->ec2tte_used[i] < 0
1158 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1159 BAD("implausible ec2tte_used");
1160 if (sec->ec2tte_used[i] == 0)
1161 continue;
1162
1163 /* For each tt reference in each eclass .. ensure the reference
1164 is to a valid tt entry, and that the entry's address ranges
1165 really include this eclass. */
1166
1167 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1168 tteno = sec->ec2tte[i][j];
1169 if (tteno == EC2TTE_DELETED)
1170 continue;
1171 if (tteno >= N_TTES_PER_SECTOR)
1172 BAD("implausible tteno");
1173 tte = &sec->tt[tteno];
1174 if (tte->status != InUse)
1175 BAD("tteno points to non-inuse tte");
1176 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1177 BAD("tte->n_tte2ec out of range");
1178 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1179 must equal i. Inspect tte's eclass info. */
1180 n = 0;
1181 for (k = 0; k < tte->n_tte2ec; k++) {
1182 if (k < tte->n_tte2ec-1
1183 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1184 BAD("tte->tte2ec_ec[..] out of order");
1185 ec_num = tte->tte2ec_ec[k];
1186 if (ec_num < 0 || ec_num >= ECLASS_N)
1187 BAD("tte->tte2ec_ec[..] out of range");
1188 if (ec_num != i)
1189 continue;
1190 ec_idx = tte->tte2ec_ix[k];
1191 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1192 BAD("tte->tte2ec_ix[..] out of range");
1193 if (ec_idx == j)
1194 n++;
1195 }
1196 if (n != 1)
1197 BAD("tteno does not point back at eclass");
1198 }
1199 }
1200
1201 /* That establishes that for each forward pointer from TTEntrys
1202 there is a corresponding backward pointer from the eclass[]
1203 arrays. However, it doesn't rule out the possibility of other,
1204 bogus pointers in the eclass[] arrays. So do those similarly:
1205 scan through them and check the TTEntryies they point at point
1206 back. */
1207
1208 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1209
1210 tte = &sec->tt[tteno];
1211 if (tte->status == Empty || tte->status == Deleted) {
1212 if (tte->n_tte2ec != 0)
1213 BAD("tte->n_eclasses nonzero for unused tte");
1214 continue;
1215 }
1216
1217 vg_assert(tte->status == InUse);
1218
1219 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1220 BAD("tte->n_eclasses out of range(2)");
1221
1222 for (j = 0; j < tte->n_tte2ec; j++) {
1223 ec_num = tte->tte2ec_ec[j];
1224 if (ec_num < 0 || ec_num >= ECLASS_N)
1225 BAD("tte->eclass[..] out of range");
1226 ec_idx = tte->tte2ec_ix[j];
1227 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1228 BAD("tte->ec_idx[..] out of range(2)");
1229 if (sec->ec2tte[ec_num][ec_idx] != tteno)
1230 BAD("ec2tte does not point back to tte");
1231 }
1232 }
1233
1234 return True;
1235
1236 bad:
1237 if (whassup)
1238 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1239
1240 # if 0
1241 VG_(printf)("eclass = %d\n", i);
1242 VG_(printf)("tteno = %d\n", (Int)tteno);
1243 switch (tte->status) {
1244 case InUse: VG_(printf)("InUse\n"); break;
1245 case Deleted: VG_(printf)("Deleted\n"); break;
1246 case Empty: VG_(printf)("Empty\n"); break;
1247 }
1248 if (tte->status != Empty) {
1249 for (k = 0; k < tte->vge.n_used; k++)
1250 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1251 }
1252 # endif
1253
1254 return False;
1255
1256 # undef BAD
1257 }
1258
1259
1260 /* Sanity check absolutely everything. True == check passed. */
1261
1262 /* forwards */
1263 static Bool sanity_check_redir_tt_tc ( void );
1264
sanity_check_sector_search_order(void)1265 static Bool sanity_check_sector_search_order ( void )
1266 {
1267 SECno i, j, nListed;
1268 /* assert the array is the right size */
1269 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1270 / sizeof(sector_search_order[0])));
1271 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1272 for (i = 0; i < n_sectors; i++) {
1273 if (sector_search_order[i] == INV_SNO
1274 || sector_search_order[i] >= n_sectors)
1275 break;
1276 }
1277 nListed = i;
1278 for (/* */; i < n_sectors; i++) {
1279 if (sector_search_order[i] != INV_SNO)
1280 break;
1281 }
1282 if (i != n_sectors)
1283 return False;
1284 /* Check each sector number only appears once */
1285 for (i = 0; i < n_sectors; i++) {
1286 if (sector_search_order[i] == INV_SNO)
1287 continue;
1288 for (j = i+1; j < n_sectors; j++) {
1289 if (sector_search_order[j] == sector_search_order[i])
1290 return False;
1291 }
1292 }
1293 /* Check that the number of listed sectors equals the number
1294 in use, by counting nListed back down. */
1295 for (i = 0; i < n_sectors; i++) {
1296 if (sectors[i].tc != NULL)
1297 nListed--;
1298 }
1299 if (nListed != 0)
1300 return False;
1301 return True;
1302 }
1303
sanity_check_all_sectors(void)1304 static Bool sanity_check_all_sectors ( void )
1305 {
1306 SECno sno;
1307 Bool sane;
1308 Sector* sec;
1309 for (sno = 0; sno < n_sectors; sno++) {
1310 Int i;
1311 Int nr_not_dead_hx = 0;
1312 Int szhxa;
1313 sec = §ors[sno];
1314 if (sec->tc == NULL)
1315 continue;
1316 sane = sanity_check_eclasses_in_sector( sec );
1317 if (!sane)
1318 return False;
1319 szhxa = VG_(sizeXA)(sec->host_extents);
1320 for (i = 0; i < szhxa; i++) {
1321 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1322 if (!HostExtent__is_dead (hx, sec))
1323 nr_not_dead_hx++;
1324 }
1325 if (nr_not_dead_hx != sec->tt_n_inuse) {
1326 VG_(debugLog)(0, "transtab",
1327 "nr_not_dead_hx %d sanity fail "
1328 "(expected == in use %d)\n",
1329 nr_not_dead_hx, sec->tt_n_inuse);
1330 return False;
1331 }
1332 }
1333
1334 if ( !sanity_check_redir_tt_tc() )
1335 return False;
1336 if ( !sanity_check_sector_search_order() )
1337 return False;
1338 return True;
1339 }
1340
1341
1342
1343 /*-------------------------------------------------------------*/
1344 /*--- Add/find translations ---*/
1345 /*-------------------------------------------------------------*/
1346
vge_osize(const VexGuestExtents * vge)1347 static UInt vge_osize ( const VexGuestExtents* vge )
1348 {
1349 UInt i, n = 0;
1350 for (i = 0; i < vge->n_used; i++)
1351 n += (UInt)vge->len[i];
1352 return n;
1353 }
1354
isValidSector(SECno sector)1355 static Bool isValidSector ( SECno sector )
1356 {
1357 if (sector == INV_SNO || sector >= n_sectors)
1358 return False;
1359 return True;
1360 }
1361
HASH_TT(Addr key)1362 static inline HTTno HASH_TT ( Addr key )
1363 {
1364 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1365 UInt kLo = (UInt)key;
1366 UInt k32 = kHi ^ kLo;
1367 UInt ror = 7;
1368 if (ror > 0)
1369 k32 = (k32 >> ror) | (k32 << (32-ror));
1370 return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1371 }
1372
setFastCacheEntry(Addr key,ULong * tcptr)1373 static void setFastCacheEntry ( Addr key, ULong* tcptr )
1374 {
1375 UInt cno = (UInt)VG_TT_FAST_HASH(key);
1376 VG_(tt_fast)[cno].guest = key;
1377 VG_(tt_fast)[cno].host = (Addr)tcptr;
1378 n_fast_updates++;
1379 /* This shouldn't fail. It should be assured by m_translate
1380 which should reject any attempt to make translation of code
1381 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1382 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1383 }
1384
1385 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1386 static void invalidateFastCache ( void )
1387 {
1388 UInt j;
1389 /* This loop is popular enough to make it worth unrolling a
1390 bit, at least on ppc32. */
1391 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1392 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1393 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1394 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1395 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1396 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1397 }
1398
1399 vg_assert(j == VG_TT_FAST_SIZE);
1400 n_fast_flushes++;
1401 }
1402
1403
get_empty_tt_slot(SECno sNo)1404 static TTEno get_empty_tt_slot(SECno sNo)
1405 {
1406 TTEno i;
1407
1408 i = sectors[sNo].empty_tt_list;
1409 sectors[sNo].empty_tt_list = sectors[sNo].tt[i].usage.next_empty_tte;
1410
1411 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1412
1413 return i;
1414 }
1415
add_in_empty_tt_list(SECno sNo,TTEno tteno)1416 static void add_in_empty_tt_list (SECno sNo, TTEno tteno)
1417 {
1418 sectors[sNo].tt[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1419 sectors[sNo].empty_tt_list = tteno;
1420 }
1421
initialiseSector(SECno sno)1422 static void initialiseSector ( SECno sno )
1423 {
1424 UInt i;
1425 SysRes sres;
1426 Sector* sec;
1427 vg_assert(isValidSector(sno));
1428
1429 { Bool sane = sanity_check_sector_search_order();
1430 vg_assert(sane);
1431 }
1432 sec = §ors[sno];
1433
1434 if (sec->tc == NULL) {
1435
1436 /* Sector has never been used before. Need to allocate tt and
1437 tc. */
1438 vg_assert(sec->tt == NULL);
1439 vg_assert(sec->tc_next == NULL);
1440 vg_assert(sec->tt_n_inuse == 0);
1441 for (EClassNo e = 0; e < ECLASS_N; e++) {
1442 vg_assert(sec->ec2tte_size[e] == 0);
1443 vg_assert(sec->ec2tte_used[e] == 0);
1444 vg_assert(sec->ec2tte[e] == NULL);
1445 }
1446 vg_assert(sec->host_extents == NULL);
1447
1448 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1449 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1450
1451 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1452 if (sr_isError(sres)) {
1453 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1454 8 * tc_sector_szQ );
1455 /*NOTREACHED*/
1456 }
1457 sec->tc = (ULong*)(Addr)sr_Res(sres);
1458
1459 sres = VG_(am_mmap_anon_float_valgrind)
1460 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1461 if (sr_isError(sres)) {
1462 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1463 N_TTES_PER_SECTOR * sizeof(TTEntry) );
1464 /*NOTREACHED*/
1465 }
1466 sec->tt = (TTEntry*)(Addr)sr_Res(sres);
1467 sec->empty_tt_list = HTT_EMPTY;
1468 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1469 sec->tt[ei].status = Empty;
1470 sec->tt[ei].n_tte2ec = 0;
1471 add_in_empty_tt_list(sno, ei);
1472 }
1473
1474 sres = VG_(am_mmap_anon_float_valgrind)
1475 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1476 if (sr_isError(sres)) {
1477 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1478 N_HTTES_PER_SECTOR * sizeof(TTEno) );
1479 /*NOTREACHED*/
1480 }
1481 sec->htt = (TTEno*)(Addr)sr_Res(sres);
1482 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1483 sec->htt[hi] = HTT_EMPTY;
1484
1485 /* Set up the host_extents array. */
1486 sec->host_extents
1487 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1488 ttaux_free,
1489 sizeof(HostExtent));
1490
1491 /* Add an entry in the sector_search_order */
1492 for (i = 0; i < n_sectors; i++) {
1493 if (sector_search_order[i] == INV_SNO)
1494 break;
1495 }
1496 vg_assert(i >= 0 && i < n_sectors);
1497 sector_search_order[i] = sno;
1498
1499 if (VG_(clo_verbosity) > 2)
1500 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1501
1502 } else {
1503
1504 /* Sector has been used before. Dump the old contents. */
1505 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1506 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1507 n_sectors_recycled++;
1508
1509 vg_assert(sec->tt != NULL);
1510 vg_assert(sec->tc_next != NULL);
1511 n_dump_count += sec->tt_n_inuse;
1512
1513 VexArch arch_host = VexArch_INVALID;
1514 VexArchInfo archinfo_host;
1515 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1516 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1517 VexEndness endness_host = archinfo_host.endness;
1518
1519 /* Visit each just-about-to-be-abandoned translation. */
1520 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1521 sno);
1522 sec->empty_tt_list = HTT_EMPTY;
1523 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1524 if (sec->tt[ei].status == InUse) {
1525 vg_assert(sec->tt[ei].n_tte2ec >= 1);
1526 vg_assert(sec->tt[ei].n_tte2ec <= 3);
1527 n_dump_osize += vge_osize(&sec->tt[ei].vge);
1528 /* Tell the tool too. */
1529 if (VG_(needs).superblock_discards) {
1530 VG_TDICT_CALL( tool_discard_superblock_info,
1531 sec->tt[ei].entry,
1532 sec->tt[ei].vge );
1533 }
1534 unchain_in_preparation_for_deletion(arch_host,
1535 endness_host, sno, ei);
1536 } else {
1537 vg_assert(sec->tt[ei].n_tte2ec == 0);
1538 }
1539 sec->tt[ei].status = Empty;
1540 sec->tt[ei].n_tte2ec = 0;
1541 add_in_empty_tt_list(sno, ei);
1542 }
1543 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1544 sec->htt[hi] = HTT_EMPTY;
1545
1546 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1547 sno);
1548
1549 /* Free up the eclass structures. */
1550 for (EClassNo e = 0; e < ECLASS_N; e++) {
1551 if (sec->ec2tte_size[e] == 0) {
1552 vg_assert(sec->ec2tte_used[e] == 0);
1553 vg_assert(sec->ec2tte[e] == NULL);
1554 } else {
1555 vg_assert(sec->ec2tte[e] != NULL);
1556 ttaux_free(sec->ec2tte[e]);
1557 sec->ec2tte[e] = NULL;
1558 sec->ec2tte_size[e] = 0;
1559 sec->ec2tte_used[e] = 0;
1560 }
1561 }
1562
1563 /* Empty out the host extents array. */
1564 vg_assert(sec->host_extents != NULL);
1565 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1566 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1567
1568 /* Sanity check: ensure it is already in
1569 sector_search_order[]. */
1570 SECno ix;
1571 for (ix = 0; ix < n_sectors; ix++) {
1572 if (sector_search_order[ix] == sno)
1573 break;
1574 }
1575 vg_assert(ix >= 0 && ix < n_sectors);
1576
1577 if (VG_(clo_verbosity) > 2)
1578 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1579 }
1580
1581 sec->tc_next = sec->tc;
1582 sec->tt_n_inuse = 0;
1583
1584 invalidateFastCache();
1585
1586 { Bool sane = sanity_check_sector_search_order();
1587 vg_assert(sane);
1588 }
1589 }
1590
1591 /* Add a translation of vge to TT/TC. The translation is temporarily
1592 in code[0 .. code_len-1].
1593
1594 pre: youngest_sector points to a valid (although possibly full)
1595 sector.
1596 */
VG_(add_to_transtab)1597 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1598 Addr entry,
1599 Addr code,
1600 UInt code_len,
1601 Bool is_self_checking,
1602 Int offs_profInc,
1603 UInt n_guest_instrs )
1604 {
1605 Int tcAvailQ, reqdQ, y;
1606 ULong *tcptr, *tcptr2;
1607 UChar* srcP;
1608 UChar* dstP;
1609
1610 vg_assert(init_done);
1611 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1612
1613 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1614 vg_assert(code_len > 0 && code_len < 60000);
1615
1616 /* Generally stay sane */
1617 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1618
1619 if (DEBUG_TRANSTAB)
1620 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1621 entry, code_len);
1622
1623 n_in_count++;
1624 n_in_tsize += code_len;
1625 n_in_osize += vge_osize(vge);
1626 if (is_self_checking)
1627 n_in_sc_count++;
1628
1629 y = youngest_sector;
1630 vg_assert(isValidSector(y));
1631
1632 if (sectors[y].tc == NULL)
1633 initialiseSector(y);
1634
1635 /* Try putting the translation in this sector. */
1636 reqdQ = (code_len + 7) >> 3;
1637
1638 /* Will it fit in tc? */
1639 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1640 - ((ULong*)(sectors[y].tc_next));
1641 vg_assert(tcAvailQ >= 0);
1642 vg_assert(tcAvailQ <= tc_sector_szQ);
1643
1644 if (tcAvailQ < reqdQ
1645 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1646 /* No. So move on to the next sector. Either it's never been
1647 used before, in which case it will get its tt/tc allocated
1648 now, or it has been used before, in which case it is set to be
1649 empty, hence throwing out the oldest sector. */
1650 vg_assert(tc_sector_szQ > 0);
1651 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1652 / N_HTTES_PER_SECTOR;
1653 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1654 / tc_sector_szQ;
1655 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1656 VG_(dmsg)("transtab: "
1657 "declare sector %d full "
1658 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1659 y, tt_loading_pct, tc_loading_pct,
1660 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1661 }
1662 youngest_sector++;
1663 if (youngest_sector >= n_sectors)
1664 youngest_sector = 0;
1665 y = youngest_sector;
1666 initialiseSector(y);
1667 }
1668
1669 /* Be sure ... */
1670 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1671 - ((ULong*)(sectors[y].tc_next));
1672 vg_assert(tcAvailQ >= 0);
1673 vg_assert(tcAvailQ <= tc_sector_szQ);
1674 vg_assert(tcAvailQ >= reqdQ);
1675 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1676 vg_assert(sectors[y].tt_n_inuse >= 0);
1677
1678 /* Copy into tc. */
1679 tcptr = sectors[y].tc_next;
1680 vg_assert(tcptr >= §ors[y].tc[0]);
1681 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1682
1683 dstP = (UChar*)tcptr;
1684 srcP = (UChar*)code;
1685 VG_(memcpy)(dstP, srcP, code_len);
1686 sectors[y].tc_next += reqdQ;
1687 sectors[y].tt_n_inuse++;
1688
1689 /* more paranoia */
1690 tcptr2 = sectors[y].tc_next;
1691 vg_assert(tcptr2 >= §ors[y].tc[0]);
1692 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1693
1694 /* Find an empty tt slot, and use it. There must be such a slot
1695 since tt is never allowed to get completely full. */
1696 TTEno tteix = get_empty_tt_slot(y);
1697 TTEntry__init(§ors[y].tt[tteix]);
1698 sectors[y].tt[tteix].status = InUse;
1699 sectors[y].tt[tteix].tcptr = tcptr;
1700 sectors[y].tt[tteix].usage.prof.count = 0;
1701 sectors[y].tt[tteix].usage.prof.weight =
1702 n_guest_instrs == 0 ? 1 : n_guest_instrs;
1703 sectors[y].tt[tteix].vge = *vge;
1704 sectors[y].tt[tteix].entry = entry;
1705
1706 // Point an htt entry to the tt slot
1707 HTTno htti = HASH_TT(entry);
1708 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1709 while (True) {
1710 if (sectors[y].htt[htti] == HTT_EMPTY
1711 || sectors[y].htt[htti] == HTT_DELETED)
1712 break;
1713 htti++;
1714 if (htti >= N_HTTES_PER_SECTOR)
1715 htti = 0;
1716 }
1717 sectors[y].htt[htti] = tteix;
1718
1719 /* Patch in the profile counter location, if necessary. */
1720 if (offs_profInc != -1) {
1721 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1722 VexArch arch_host = VexArch_INVALID;
1723 VexArchInfo archinfo_host;
1724 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1725 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1726 VexEndness endness_host = archinfo_host.endness;
1727 VexInvalRange vir
1728 = LibVEX_PatchProfInc( arch_host, endness_host,
1729 dstP + offs_profInc,
1730 §ors[y].tt[tteix].usage.prof.count );
1731 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1732 }
1733
1734 VG_(invalidate_icache)( dstP, code_len );
1735
1736 /* Add this entry to the host_extents map, checking that we're
1737 adding in order. */
1738 { HostExtent hx;
1739 hx.start = (UChar*)tcptr;
1740 hx.len = code_len;
1741 hx.tteNo = tteix;
1742 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1743 XArray* hx_array = sectors[y].host_extents;
1744 vg_assert(hx_array);
1745 Word n = VG_(sizeXA)(hx_array);
1746 if (n > 0) {
1747 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1748 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1749 }
1750 VG_(addToXA)(hx_array, &hx);
1751 if (DEBUG_TRANSTAB)
1752 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1753 hx.start, hx.len, y, tteix);
1754 }
1755
1756 /* Update the fast-cache. */
1757 setFastCacheEntry( entry, tcptr );
1758
1759 /* Note the eclass numbers for this translation. */
1760 upd_eclasses_after_add( §ors[y], tteix );
1761 }
1762
1763
1764 /* Search for the translation of the given guest address. If
1765 requested, a successful search can also cause the fast-caches to be
1766 updated.
1767 */
VG_(search_transtab)1768 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1769 /*OUT*/SECno* res_sNo,
1770 /*OUT*/TTEno* res_tteNo,
1771 Addr guest_addr,
1772 Bool upd_cache )
1773 {
1774 SECno i, sno;
1775 HTTno j, k, kstart;
1776 TTEno tti;
1777
1778 vg_assert(init_done);
1779 /* Find the initial probe point just once. It will be the same in
1780 all sectors and avoids multiple expensive % operations. */
1781 n_full_lookups++;
1782 kstart = HASH_TT(guest_addr);
1783 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1784
1785 /* Search in all the sectors,using sector_search_order[] as a
1786 heuristic guide as to what order to visit the sectors. */
1787 for (i = 0; i < n_sectors; i++) {
1788
1789 sno = sector_search_order[i];
1790 if (UNLIKELY(sno == INV_SNO))
1791 return False; /* run out of sectors to search */
1792
1793 k = kstart;
1794 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1795 n_lookup_probes++;
1796 tti = sectors[sno].htt[k];
1797 if (tti < N_TTES_PER_SECTOR
1798 && sectors[sno].tt[tti].entry == guest_addr) {
1799 /* found it */
1800 if (upd_cache)
1801 setFastCacheEntry(
1802 guest_addr, sectors[sno].tt[tti].tcptr );
1803 if (res_hcode)
1804 *res_hcode = (Addr)sectors[sno].tt[tti].tcptr;
1805 if (res_sNo)
1806 *res_sNo = sno;
1807 if (res_tteNo)
1808 *res_tteNo = tti;
1809 /* pull this one one step closer to the front. For large
1810 apps this more or less halves the number of required
1811 probes. */
1812 if (i > 0) {
1813 Int tmp = sector_search_order[i-1];
1814 sector_search_order[i-1] = sector_search_order[i];
1815 sector_search_order[i] = tmp;
1816 }
1817 return True;
1818 }
1819 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1820 if (sectors[sno].htt[k] == HTT_EMPTY)
1821 break; /* not found in this sector */
1822 k++;
1823 if (k == N_HTTES_PER_SECTOR)
1824 k = 0;
1825 }
1826
1827 /* If we fall off the end, all entries are InUse and not
1828 matching, or Deleted. In any case we did not find it in this
1829 sector. */
1830 }
1831
1832 /* Not found in any sector. */
1833 return False;
1834 }
1835
1836
1837 /*-------------------------------------------------------------*/
1838 /*--- Delete translations. ---*/
1839 /*-------------------------------------------------------------*/
1840
1841 /* forward */
1842 static void unredir_discard_translations( Addr, ULong );
1843
1844 /* Stuff for deleting translations which intersect with a given
1845 address range. Unfortunately, to make this run at a reasonable
1846 speed, it is complex. */
1847
1848 static inline
overlap1(Addr s1,ULong r1,Addr s2,ULong r2)1849 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1850 {
1851 Addr e1 = s1 + r1 - 1;
1852 Addr e2 = s2 + r2 - 1;
1853 if (e1 < s2 || e2 < s1)
1854 return False;
1855 return True;
1856 }
1857
1858 static inline
overlaps(Addr start,ULong range,const VexGuestExtents * vge)1859 Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge )
1860 {
1861 if (overlap1(start, range, vge->base[0], vge->len[0]))
1862 return True;
1863 if (vge->n_used < 2)
1864 return False;
1865 if (overlap1(start, range, vge->base[1], vge->len[1]))
1866 return True;
1867 if (vge->n_used < 3)
1868 return False;
1869 if (overlap1(start, range, vge->base[2], vge->len[2]))
1870 return True;
1871 return False;
1872 }
1873
1874
1875 /* Delete a tt entry, and update all the eclass data accordingly. */
1876
delete_tte(Sector * sec,SECno secNo,TTEno tteno,VexArch arch_host,VexEndness endness_host)1877 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
1878 VexArch arch_host, VexEndness endness_host )
1879 {
1880 Int i, ec_idx;
1881 EClassNo ec_num;
1882 TTEntry* tte;
1883
1884 /* sec and secNo are mutually redundant; cross-check. */
1885 vg_assert(sec == §ors[secNo]);
1886
1887 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1888 tte = &sec->tt[tteno];
1889 vg_assert(tte->status == InUse);
1890 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1891
1892 /* Unchain .. */
1893 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1894
1895 /* Deal with the ec-to-tte links first. */
1896 for (i = 0; i < tte->n_tte2ec; i++) {
1897 ec_num = tte->tte2ec_ec[i];
1898 ec_idx = tte->tte2ec_ix[i];
1899 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1900 vg_assert(ec_idx >= 0);
1901 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1902 /* Assert that the two links point at each other. */
1903 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
1904 /* "delete" the pointer back to here. */
1905 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1906 }
1907
1908 /* Now fix up this TTEntry. */
1909 /* Mark the entry as deleted in htt.
1910 Note: we could avoid the below hash table search by
1911 adding a reference from tte to its hash position in tt. */
1912 HTTno j;
1913 HTTno k = HASH_TT(tte->entry);
1914 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
1915 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1916 if (sec->htt[k] == tteno)
1917 break;
1918 k++;
1919 if (k == N_HTTES_PER_SECTOR)
1920 k = 0;
1921 }
1922 vg_assert(j < N_HTTES_PER_SECTOR);
1923 sec->htt[k] = HTT_DELETED;
1924 tte->status = Deleted;
1925 tte->n_tte2ec = 0;
1926 add_in_empty_tt_list(secNo, tteno);
1927
1928 /* Stats .. */
1929 sec->tt_n_inuse--;
1930 n_disc_count++;
1931 n_disc_osize += vge_osize(&tte->vge);
1932
1933 /* Tell the tool too. */
1934 if (VG_(needs).superblock_discards) {
1935 VG_TDICT_CALL( tool_discard_superblock_info,
1936 tte->entry,
1937 tte->vge );
1938 }
1939 }
1940
1941
1942 /* Delete translations from sec which intersect specified range, but
1943 only consider translations in the specified eclass. */
1944
1945 static
delete_translations_in_sector_eclass(Sector * sec,SECno secNo,Addr guest_start,ULong range,EClassNo ec,VexArch arch_host,VexEndness endness_host)1946 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
1947 Addr guest_start, ULong range,
1948 EClassNo ec,
1949 VexArch arch_host,
1950 VexEndness endness_host )
1951 {
1952 Int i;
1953 TTEno tteno;
1954 Bool anyDeld = False;
1955 TTEntry* tte;
1956
1957 vg_assert(ec >= 0 && ec < ECLASS_N);
1958
1959 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1960
1961 tteno = sec->ec2tte[ec][i];
1962 if (tteno == EC2TTE_DELETED) {
1963 /* already deleted */
1964 continue;
1965 }
1966
1967 vg_assert(tteno < N_TTES_PER_SECTOR);
1968
1969 tte = &sec->tt[tteno];
1970 vg_assert(tte->status == InUse);
1971
1972 if (overlaps( guest_start, range, &tte->vge )) {
1973 anyDeld = True;
1974 delete_tte( sec, secNo, tteno, arch_host, endness_host );
1975 }
1976
1977 }
1978
1979 return anyDeld;
1980 }
1981
1982
1983 /* Delete translations from sec which intersect specified range, the
1984 slow way, by inspecting all translations in sec. */
1985
1986 static
delete_translations_in_sector(Sector * sec,SECno secNo,Addr guest_start,ULong range,VexArch arch_host,VexEndness endness_host)1987 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
1988 Addr guest_start, ULong range,
1989 VexArch arch_host,
1990 VexEndness endness_host )
1991 {
1992 TTEno i;
1993 Bool anyDeld = False;
1994
1995 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1996 if (sec->tt[i].status == InUse
1997 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1998 anyDeld = True;
1999 delete_tte( sec, secNo, i, arch_host, endness_host );
2000 }
2001 }
2002
2003 return anyDeld;
2004 }
2005
2006
VG_(discard_translations)2007 void VG_(discard_translations) ( Addr guest_start, ULong range,
2008 const HChar* who )
2009 {
2010 Sector* sec;
2011 SECno sno;
2012 EClassNo ec;
2013 Bool anyDeleted = False;
2014
2015 vg_assert(init_done);
2016
2017 VG_(debugLog)(2, "transtab",
2018 "discard_translations(0x%lx, %llu) req by %s\n",
2019 guest_start, range, who );
2020
2021 /* Pre-deletion sanity check */
2022 if (VG_(clo_sanity_level >= 4)) {
2023 Bool sane = sanity_check_all_sectors();
2024 vg_assert(sane);
2025 }
2026
2027 if (range == 0)
2028 return;
2029
2030 VexArch arch_host = VexArch_INVALID;
2031 VexArchInfo archinfo_host;
2032 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2033 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2034 VexEndness endness_host = archinfo_host.endness;
2035
2036 /* There are two different ways to do this.
2037
2038 If the range fits within a single address-range equivalence
2039 class, as will be the case for a cache line sized invalidation,
2040 then we only have to inspect the set of translations listed in
2041 that equivalence class, and also in the "sin-bin" equivalence
2042 class ECLASS_MISC.
2043
2044 Otherwise, the invalidation is of a larger range and probably
2045 results from munmap. In this case it's (probably!) faster just
2046 to inspect all translations, dump those we don't want, and
2047 regenerate the equivalence class information (since modifying it
2048 in-situ is even more expensive).
2049 */
2050
2051 /* First off, figure out if the range falls within a single class,
2052 and if so which one. */
2053
2054 ec = ECLASS_MISC;
2055 if (range < (1ULL << ECLASS_SHIFT))
2056 ec = range_to_eclass( guest_start, (UInt)range );
2057
2058 /* if ec is ECLASS_MISC then we aren't looking at just a single
2059 class, so use the slow scheme. Else use the fast scheme,
2060 examining 'ec' and ECLASS_MISC. */
2061
2062 if (ec != ECLASS_MISC) {
2063
2064 VG_(debugLog)(2, "transtab",
2065 " FAST, ec = %d\n", ec);
2066
2067 /* Fast scheme */
2068 vg_assert(ec >= 0 && ec < ECLASS_MISC);
2069
2070 for (sno = 0; sno < n_sectors; sno++) {
2071 sec = §ors[sno];
2072 if (sec->tc == NULL)
2073 continue;
2074 anyDeleted |= delete_translations_in_sector_eclass(
2075 sec, sno, guest_start, range, ec,
2076 arch_host, endness_host
2077 );
2078 anyDeleted |= delete_translations_in_sector_eclass(
2079 sec, sno, guest_start, range, ECLASS_MISC,
2080 arch_host, endness_host
2081 );
2082 }
2083
2084 } else {
2085
2086 /* slow scheme */
2087
2088 VG_(debugLog)(2, "transtab",
2089 " SLOW, ec = %d\n", ec);
2090
2091 for (sno = 0; sno < n_sectors; sno++) {
2092 sec = §ors[sno];
2093 if (sec->tc == NULL)
2094 continue;
2095 anyDeleted |= delete_translations_in_sector(
2096 sec, sno, guest_start, range,
2097 arch_host, endness_host
2098 );
2099 }
2100
2101 }
2102
2103 if (anyDeleted)
2104 invalidateFastCache();
2105
2106 /* don't forget the no-redir cache */
2107 unredir_discard_translations( guest_start, range );
2108
2109 /* Post-deletion sanity check */
2110 if (VG_(clo_sanity_level >= 4)) {
2111 TTEno i;
2112 TTEntry* tte;
2113 Bool sane = sanity_check_all_sectors();
2114 vg_assert(sane);
2115 /* But now, also check the requested address range isn't
2116 present anywhere. */
2117 for (sno = 0; sno < n_sectors; sno++) {
2118 sec = §ors[sno];
2119 if (sec->tc == NULL)
2120 continue;
2121 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2122 tte = &sec->tt[i];
2123 if (tte->status != InUse)
2124 continue;
2125 vg_assert(!overlaps( guest_start, range, &tte->vge ));
2126 }
2127 }
2128 }
2129 }
2130
2131 /* Whether or not tools may discard translations. */
2132 Bool VG_(ok_to_discard_translations) = False;
2133
2134 /* This function is exported to tools which can use it to discard
2135 translations, provided it is safe to do so. */
VG_(discard_translations_safely)2136 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2137 const HChar* who )
2138 {
2139 vg_assert(VG_(ok_to_discard_translations));
2140 VG_(discard_translations)(start, len, who);
2141 }
2142
2143 /*------------------------------------------------------------*/
2144 /*--- AUXILIARY: the unredirected TT/TC ---*/
2145 /*------------------------------------------------------------*/
2146
2147 /* A very simple translation cache which holds a small number of
2148 unredirected translations. This is completely independent of the
2149 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2150 both structures are simply dumped and we start over.
2151
2152 Since these translations are unredirected, the search key is (by
2153 definition) the first address entry in the .vge field. */
2154
2155 /* Sized to hold 500 translations of average size 1000 bytes. */
2156
2157 #define UNREDIR_SZB 1000
2158
2159 #define N_UNREDIR_TT 500
2160 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2161
2162 typedef
2163 struct {
2164 VexGuestExtents vge;
2165 Addr hcode;
2166 Bool inUse;
2167 }
2168 UTCEntry;
2169
2170 /* We just allocate forwards in _tc, never deleting. */
2171 static ULong *unredir_tc;
2172 static Int unredir_tc_used = N_UNREDIR_TCQ;
2173
2174 /* Slots in _tt can come into use and out again (.inUse).
2175 Nevertheless _tt_highwater is maintained so that invalidations
2176 don't have to scan all the slots when only a few are in use.
2177 _tt_highwater holds the index of the highest ever allocated
2178 slot. */
2179 static UTCEntry unredir_tt[N_UNREDIR_TT];
2180 static Int unredir_tt_highwater;
2181
2182
init_unredir_tt_tc(void)2183 static void init_unredir_tt_tc ( void )
2184 {
2185 Int i;
2186 if (unredir_tc == NULL) {
2187 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2188 ( N_UNREDIR_TT * UNREDIR_SZB );
2189 if (sr_isError(sres)) {
2190 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2191 N_UNREDIR_TT * UNREDIR_SZB);
2192 /*NOTREACHED*/
2193 }
2194 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2195 }
2196 unredir_tc_used = 0;
2197 for (i = 0; i < N_UNREDIR_TT; i++)
2198 unredir_tt[i].inUse = False;
2199 unredir_tt_highwater = -1;
2200 }
2201
2202 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2203 static Bool sanity_check_redir_tt_tc ( void )
2204 {
2205 Int i;
2206 if (unredir_tt_highwater < -1) return False;
2207 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2208
2209 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2210 if (unredir_tt[i].inUse)
2211 return False;
2212
2213 if (unredir_tc_used < 0) return False;
2214 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2215
2216 return True;
2217 }
2218
2219
2220 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2221 is temporarily in code[0 .. code_len-1].
2222 */
VG_(add_to_unredir_transtab)2223 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2224 Addr entry,
2225 Addr code,
2226 UInt code_len )
2227 {
2228 Int i, j, code_szQ;
2229 HChar *srcP, *dstP;
2230
2231 vg_assert(sanity_check_redir_tt_tc());
2232
2233 /* This is the whole point: it's not redirected! */
2234 vg_assert(entry == vge->base[0]);
2235
2236 /* How many unredir_tt slots are needed */
2237 code_szQ = (code_len + 7) / 8;
2238
2239 /* Look for an empty unredir_tc slot */
2240 for (i = 0; i < N_UNREDIR_TT; i++)
2241 if (!unredir_tt[i].inUse)
2242 break;
2243
2244 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2245 /* It's full; dump everything we currently have */
2246 init_unredir_tt_tc();
2247 i = 0;
2248 }
2249
2250 vg_assert(unredir_tc_used >= 0);
2251 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2252 vg_assert(code_szQ > 0);
2253 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2254 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2255 vg_assert(unredir_tt[i].inUse == False);
2256
2257 if (i > unredir_tt_highwater)
2258 unredir_tt_highwater = i;
2259
2260 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2261 srcP = (HChar*)code;
2262 for (j = 0; j < code_len; j++)
2263 dstP[j] = srcP[j];
2264
2265 VG_(invalidate_icache)( dstP, code_len );
2266
2267 unredir_tt[i].inUse = True;
2268 unredir_tt[i].vge = *vge;
2269 unredir_tt[i].hcode = (Addr)dstP;
2270
2271 unredir_tc_used += code_szQ;
2272 vg_assert(unredir_tc_used >= 0);
2273 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2274
2275 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2276 }
2277
VG_(search_unredir_transtab)2278 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2279 Addr guest_addr )
2280 {
2281 Int i;
2282 for (i = 0; i < N_UNREDIR_TT; i++) {
2283 if (!unredir_tt[i].inUse)
2284 continue;
2285 if (unredir_tt[i].vge.base[0] == guest_addr) {
2286 *result = unredir_tt[i].hcode;
2287 return True;
2288 }
2289 }
2290 return False;
2291 }
2292
unredir_discard_translations(Addr guest_start,ULong range)2293 static void unredir_discard_translations( Addr guest_start, ULong range )
2294 {
2295 Int i;
2296
2297 vg_assert(sanity_check_redir_tt_tc());
2298
2299 for (i = 0; i <= unredir_tt_highwater; i++) {
2300 if (unredir_tt[i].inUse
2301 && overlaps( guest_start, range, &unredir_tt[i].vge))
2302 unredir_tt[i].inUse = False;
2303 }
2304 }
2305
2306
2307 /*------------------------------------------------------------*/
2308 /*--- Initialisation. ---*/
2309 /*------------------------------------------------------------*/
2310
VG_(init_tt_tc)2311 void VG_(init_tt_tc) ( void )
2312 {
2313 Int i, avg_codeszQ;
2314
2315 vg_assert(!init_done);
2316 init_done = True;
2317
2318 /* Otherwise lots of things go wrong... */
2319 vg_assert(sizeof(ULong) == 8);
2320 vg_assert(sizeof(TTEno) == sizeof(HTTno));
2321 vg_assert(sizeof(TTEno) == 2);
2322 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2323 vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2324 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2325 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2326 /* check fast cache entries really are 2 words long */
2327 vg_assert(sizeof(Addr) == sizeof(void*));
2328 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2329 /* check fast cache entries are packed back-to-back with no spaces */
2330 vg_assert(sizeof( VG_(tt_fast) )
2331 == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2332 /* check fast cache is aligned as we requested. Not fatal if it
2333 isn't, but we might as well make sure. */
2334 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2335
2336 if (VG_(clo_verbosity) > 2)
2337 VG_(message)(Vg_DebugMsg,
2338 "TT/TC: VG_(init_tt_tc) "
2339 "(startup of code management)\n");
2340
2341 /* Figure out how big each tc area should be. */
2342 if (VG_(clo_avg_transtab_entry_size) == 0)
2343 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2344 else
2345 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2346 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2347
2348 /* Ensure the calculated value is not way crazy. */
2349 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2350 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2351
2352 n_sectors = VG_(clo_num_transtab_sectors);
2353 vg_assert(n_sectors >= MIN_N_SECTORS);
2354 vg_assert(n_sectors <= MAX_N_SECTORS);
2355
2356 /* Initialise the sectors, even the ones we aren't going to use.
2357 Set all fields to zero. */
2358 youngest_sector = 0;
2359 for (i = 0; i < MAX_N_SECTORS; i++)
2360 VG_(memset)(§ors[i], 0, sizeof(sectors[i]));
2361
2362 /* Initialise the sector_search_order hint table, including the
2363 entries we aren't going to use. */
2364 for (i = 0; i < MAX_N_SECTORS; i++)
2365 sector_search_order[i] = INV_SNO;
2366
2367 /* Initialise the fast cache. */
2368 invalidateFastCache();
2369
2370 /* and the unredir tt/tc */
2371 init_unredir_tt_tc();
2372
2373 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2374 || VG_(debugLog_getLevel) () >= 2) {
2375 VG_(message)(Vg_DebugMsg,
2376 "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2377 "%stool provided default %u\n",
2378 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2379 VG_(clo_avg_transtab_entry_size),
2380 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2381 VG_(details).avg_translation_sizeB);
2382 VG_(message)(Vg_DebugMsg,
2383 "TT/TC: cache: %d sectors of %d bytes each = %d total TC\n",
2384 n_sectors, 8 * tc_sector_szQ,
2385 n_sectors * 8 * tc_sector_szQ );
2386 VG_(message)(Vg_DebugMsg,
2387 "TT/TC: table: %d tables[%d] of %d bytes each = %d total TT\n",
2388 n_sectors, N_TTES_PER_SECTOR,
2389 (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2390 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2391 VG_(message)(Vg_DebugMsg,
2392 "TT/TC: table: %d tt entries each = %d total tt entries\n",
2393 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2394 VG_(message)(Vg_DebugMsg,
2395 "TT/TC: table: %d htt[%d] of %d bytes each = %d total HTT"
2396 " (htt[%d] %d%% max occup)\n",
2397 n_sectors, N_HTTES_PER_SECTOR,
2398 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2399 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2400 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2401 }
2402 }
2403
2404
2405 /*------------------------------------------------------------*/
2406 /*--- Printing out statistics. ---*/
2407 /*------------------------------------------------------------*/
2408
safe_idiv(ULong a,ULong b)2409 static Double safe_idiv( ULong a, ULong b )
2410 {
2411 return (b == 0 ? 0 : (Double)a / (Double)b);
2412 }
2413
VG_(get_bbs_translated)2414 UInt VG_(get_bbs_translated) ( void )
2415 {
2416 return n_in_count;
2417 }
2418
VG_(print_tt_tc_stats)2419 void VG_(print_tt_tc_stats) ( void )
2420 {
2421 VG_(message)(Vg_DebugMsg,
2422 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2423 n_full_lookups, n_lookup_probes );
2424 VG_(message)(Vg_DebugMsg,
2425 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2426 n_fast_updates, n_fast_flushes );
2427
2428 VG_(message)(Vg_DebugMsg,
2429 " transtab: new %'llu "
2430 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2431 "avg tce size %llu\n",
2432 n_in_count, n_in_osize, n_in_tsize,
2433 safe_idiv(n_in_tsize, n_in_osize),
2434 n_in_sc_count,
2435 n_in_tsize / (n_in_count ? n_in_count : 1));
2436 VG_(message)(Vg_DebugMsg,
2437 " transtab: dumped %'llu (%'llu -> ?" "?) "
2438 "(sectors recycled %'llu)\n",
2439 n_dump_count, n_dump_osize, n_sectors_recycled );
2440 VG_(message)(Vg_DebugMsg,
2441 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2442 n_disc_count, n_disc_osize );
2443
2444 if (DEBUG_TRANSTAB) {
2445 VG_(printf)("\n");
2446 for (EClassNo e = 0; e < ECLASS_N; e++) {
2447 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2448 if (e % 16 == 15)
2449 VG_(printf)("\n");
2450 }
2451 VG_(printf)("\n\n");
2452 }
2453 }
2454
2455 /*------------------------------------------------------------*/
2456 /*--- Printing out of profiling results. ---*/
2457 /*------------------------------------------------------------*/
2458
score(const TTEntry * tte)2459 static ULong score ( const TTEntry* tte )
2460 {
2461 return ((ULong)tte->usage.prof.weight) * ((ULong)tte->usage.prof.count);
2462 }
2463
VG_(get_SB_profile)2464 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2465 {
2466 SECno sno;
2467 Int r, s;
2468 ULong score_total;
2469 TTEno i;
2470
2471 /* First, compute the total weighted count, and find the top N
2472 ttes. tops contains pointers to the most-used n_tops blocks, in
2473 descending order (viz, tops[0] is the highest scorer). */
2474 for (s = 0; s < n_tops; s++) {
2475 tops[s].addr = 0;
2476 tops[s].score = 0;
2477 }
2478
2479 score_total = 0;
2480
2481 for (sno = 0; sno < n_sectors; sno++) {
2482 if (sectors[sno].tc == NULL)
2483 continue;
2484 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2485 if (sectors[sno].tt[i].status != InUse)
2486 continue;
2487 score_total += score(§ors[sno].tt[i]);
2488 /* Find the rank for sectors[sno].tt[i]. */
2489 r = n_tops-1;
2490 while (True) {
2491 if (r == -1)
2492 break;
2493 if (tops[r].addr == 0) {
2494 r--;
2495 continue;
2496 }
2497 if ( score(§ors[sno].tt[i]) > tops[r].score ) {
2498 r--;
2499 continue;
2500 }
2501 break;
2502 }
2503 r++;
2504 vg_assert(r >= 0 && r <= n_tops);
2505 /* This bb should be placed at r, and bbs above it shifted
2506 upwards one slot. */
2507 if (r < n_tops) {
2508 for (s = n_tops-1; s > r; s--)
2509 tops[s] = tops[s-1];
2510 tops[r].addr = sectors[sno].tt[i].entry;
2511 tops[r].score = score( §ors[sno].tt[i] );
2512 }
2513 }
2514 }
2515
2516 /* Now zero out all the counter fields, so that we can make
2517 multiple calls here and just get the values since the last call,
2518 each time, rather than values accumulated for the whole run. */
2519 for (sno = 0; sno < n_sectors; sno++) {
2520 if (sectors[sno].tc == NULL)
2521 continue;
2522 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2523 if (sectors[sno].tt[i].status != InUse)
2524 continue;
2525 sectors[sno].tt[i].usage.prof.count = 0;
2526 }
2527 }
2528
2529 return score_total;
2530 }
2531
2532 /*--------------------------------------------------------------------*/
2533 /*--- end ---*/
2534 /*--------------------------------------------------------------------*/
2535