1 /*--------------------------------------------------------------------*/
2 /*--- Cachegrind: cache configuration.                   cg-arch.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Cachegrind, a Valgrind tool for cache
7    profiling programs.
8 
9    Copyright (C) 2011-2013 Nicholas Nethercote
10       njn@valgrind.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_tool_basics.h"
31 #include "pub_tool_libcassert.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcprint.h"
34 #include "pub_tool_options.h"
35 #include "pub_tool_machine.h"
36 
37 #include "cg_arch.h"
38 
39 static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
40                              Bool all_caches_clo_defined);
41 
42 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
43 // string otherwise.
check_cache(cache_t * cache)44 static const HChar* check_cache(cache_t* cache)
45 {
46    // Simulator requires set count to be a power of two.
47    if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
48        (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
49    {
50       return "Cache set count is not a power of two.\n";
51    }
52 
53    // Simulator requires line size to be a power of two.
54    if (-1 == VG_(log2)(cache->line_size)) {
55       return "Cache line size is not a power of two.\n";
56    }
57 
58    // Then check line size >= 16 -- any smaller and a single instruction could
59    // straddle three cache lines, which breaks a simulation assertion and is
60    // stupid anyway.
61    if (cache->line_size < MIN_LINE_SIZE) {
62       return "Cache line size is too small.\n";
63    }
64 
65    /* Then check cache size > line size (causes seg faults if not). */
66    if (cache->size <= cache->line_size) {
67       return "Cache size <= line size.\n";
68    }
69 
70    /* Then check assoc <= (size / line size) (seg faults otherwise). */
71    if (cache->assoc > (cache->size / cache->line_size)) {
72       return "Cache associativity > (size / line size).\n";
73    }
74 
75    return NULL;
76 }
77 
78 
parse_cache_opt(cache_t * cache,const HChar * opt,const HChar * optval)79 static void parse_cache_opt ( cache_t* cache, const HChar* opt,
80                               const HChar* optval )
81 {
82    Long i1, i2, i3;
83    HChar* endptr;
84    const HChar* checkRes;
85 
86    // Option argument looks like "65536,2,64".  Extract them.
87    i1 = VG_(strtoll10)(optval,   &endptr); if (*endptr != ',')  goto bad;
88    i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
89    i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
90 
91    // Check for overflow.
92    cache->size      = (Int)i1;
93    cache->assoc     = (Int)i2;
94    cache->line_size = (Int)i3;
95    if (cache->size      != i1) goto overflow;
96    if (cache->assoc     != i2) goto overflow;
97    if (cache->line_size != i3) goto overflow;
98 
99    checkRes = check_cache(cache);
100    if (checkRes) {
101       VG_(fmsg)("%s", checkRes);
102       goto bad;
103    }
104 
105    return;
106 
107   bad:
108    VG_(fmsg_bad_option)(opt, "Bad argument '%s'\n", optval);
109 
110   overflow:
111    VG_(fmsg_bad_option)(opt,
112       "One of the cache parameters was too large and overflowed.\n");
113 }
114 
115 
VG_(str_clo_cache_opt)116 Bool VG_(str_clo_cache_opt)(const HChar *arg,
117                             cache_t* clo_I1c,
118                             cache_t* clo_D1c,
119                             cache_t* clo_LLc)
120 {
121    const HChar* tmp_str;
122 
123    if      VG_STR_CLO(arg, "--I1", tmp_str) {
124       parse_cache_opt(clo_I1c, arg, tmp_str);
125       return True;
126    } else if VG_STR_CLO(arg, "--D1", tmp_str) {
127       parse_cache_opt(clo_D1c, arg, tmp_str);
128       return True;
129    } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
130               VG_STR_CLO(arg, "--LL", tmp_str)) {
131       parse_cache_opt(clo_LLc, arg, tmp_str);
132       return True;
133    } else
134       return False;
135 }
136 
umsg_cache_img(const HChar * desc,cache_t * c)137 static void umsg_cache_img(const HChar* desc, cache_t* c)
138 {
139    VG_(umsg)("  %s: %'d B, %d-way, %d B lines\n", desc,
140              c->size, c->assoc, c->line_size);
141 }
142 
143 // Verifies if c is a valid cache.
144 // An invalid value causes an assert, unless clo_redefined is True.
check_cache_or_override(const HChar * desc,cache_t * c,Bool clo_redefined)145 static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
146 {
147    const HChar* checkRes;
148 
149    checkRes = check_cache(c);
150    if (checkRes) {
151       VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
152                 desc, checkRes);
153       umsg_cache_img(desc, c);
154       if (!clo_redefined) {
155          VG_(umsg)("As it probably should be supported, please report a bug!\n");
156          VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
157          tl_assert(0);
158       }
159    }
160 }
161 
162 
163 /* If the LL cache config isn't something the simulation functions
164    can handle, try to adjust it so it is.  Caches are characterised
165    by (total size T, line size L, associativity A), and then we
166    have
167 
168      number of sets S = T / (L * A)
169 
170    The required constraints are:
171 
172    * L must be a power of 2, but it always is in practice, so
173      no problem there
174 
175    * A can be any value >= 1
176 
177    * T can be any value, but ..
178 
179    * S must be a power of 2.
180 
181    That sometimes gives a problem.  For example, some Core iX based
182    Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
183    sets.  Some AMD cpus have T = 5MB, A = 48, L = 64, which gives
184    1706.667 sets (!).
185 
186    The "fix" is to force S down to the nearest power of two below its
187    original value, and increase A proportionately, so as to keep the
188    total cache size the same.  In fact to be safe we recalculate the
189    cache size afterwards anyway, to guarantee that it divides exactly
190    between the new number of sets.
191 
192    The "fix" is "justified" (cough, cough) by alleging that
193    increases of associativity above about 4 have very little effect
194    on the actual miss rate.  It would be far more inaccurate to
195    fudge this by changing the size of the simulated cache --
196    changing the associativity is a much better option.
197 */
198 
199 /* (Helper function) Returns the largest power of 2 that is <= |x|.
200    Even works when |x| == 0. */
floor_power_of_2(UInt x)201 static UInt floor_power_of_2 ( UInt x )
202 {
203    x = x | (x >> 1);
204    x = x | (x >> 2);
205    x = x | (x >> 4);
206    x = x | (x >> 8);
207    x = x | (x >> 16);
208    return x - (x >> 1);
209 }
210 
211 static void
maybe_tweak_LLc(cache_t * LLc)212 maybe_tweak_LLc(cache_t *LLc)
213 {
214   if (LLc->size == 0 || LLc->assoc == 0 || LLc->line_size == 0)
215      return;
216 
217   tl_assert(LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0);
218 
219   UInt old_size      = (UInt)LLc->size;
220   UInt old_assoc     = (UInt)LLc->assoc;
221   UInt old_line_size = (UInt)LLc->line_size;
222 
223   UInt new_size      = old_size;
224   UInt new_assoc     = old_assoc;
225   UInt new_line_size = old_line_size;
226 
227   UInt old_nSets = old_size / (old_assoc * old_line_size);
228   if (old_nSets == 0) {
229      /* This surely can't happen; but would cause chaos with the maths
230       * below if it did.  Just give up if it does. */
231      return;
232   }
233 
234   if (-1 != VG_(log2_64)(old_nSets)) {
235      /* The number of sets is already a power of 2.  Make sure that
236         the size divides exactly between the sets.  Almost all of the
237         time this will have no effect. */
238      new_size = old_line_size * old_assoc * old_nSets;
239   } else {
240      /* The number of sets isn't a power of two.  Calculate some
241         scale-down factor which causes the number of sets to become a
242         power of two.  Then, increase the associativity by that
243         factor.  Finally, re-calculate the total size so as to make
244         sure it divides exactly between the sets. */
245      tl_assert(old_nSets >= 0);
246      UInt new_nSets = floor_power_of_2 ( old_nSets );
247      tl_assert(new_nSets > 0 && new_nSets < old_nSets);
248      Double factor = (Double)old_nSets / (Double)new_nSets;
249      tl_assert(factor >= 1.0);
250 
251      new_assoc = (UInt)(0.5 + factor * (Double)old_assoc);
252      tl_assert(new_assoc >= old_assoc);
253 
254      new_size = old_line_size * new_assoc * new_nSets;
255   }
256 
257   tl_assert(new_line_size == old_line_size); /* we never change this */
258   if (new_size == old_size && new_assoc == old_assoc)
259      return;
260 
261   VG_(dmsg)("warning: "
262             "specified LL cache: line_size %u  assoc %u  total_size %'u\n",
263             old_line_size, old_assoc, old_size);
264   VG_(dmsg)("warning: "
265             "simulated LL cache: line_size %u  assoc %u  total_size %'u\n",\
266             new_line_size, new_assoc, new_size);
267 
268   LLc->size      = new_size;
269   LLc->assoc     = new_assoc;
270   LLc->line_size = new_line_size;
271 }
272 
VG_(post_clo_init_configure_caches)273 void VG_(post_clo_init_configure_caches)(cache_t* I1c,
274                                          cache_t* D1c,
275                                          cache_t* LLc,
276                                          cache_t* clo_I1c,
277                                          cache_t* clo_D1c,
278                                          cache_t* clo_LLc)
279 {
280 #define DEFINED(L)   (-1 != L->size  || -1 != L->assoc || -1 != L->line_size)
281 
282    // Count how many were defined on the command line.
283    Bool all_caches_clo_defined =
284       (DEFINED(clo_I1c) &&
285        DEFINED(clo_D1c) &&
286        DEFINED(clo_LLc));
287 
288    // Set the cache config (using auto-detection, if supported by the
289    // architecture).
290    configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
291 
292    maybe_tweak_LLc( LLc );
293 
294    // Check the default/auto-detected values.
295    // Allow the user to override invalid auto-detected caches
296    // with command line.
297    check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
298    check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
299    check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
300 
301    // Then replace with any defined on the command line.  (Already checked in
302    // VG(parse_clo_cache_opt)().)
303    if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
304    if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
305    if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
306 
307    if (VG_(clo_verbosity) >= 2) {
308       VG_(umsg)("Cache configuration used:\n");
309       umsg_cache_img ("I1", I1c);
310       umsg_cache_img ("D1", D1c);
311       umsg_cache_img ("LL", LLc);
312    }
313 #undef DEFINED
314 }
315 
VG_(print_cache_clo_opts)316 void VG_(print_cache_clo_opts)()
317 {
318    VG_(printf)(
319 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
320 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
321 "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
322                );
323 }
324 
325 
326 // Traverse the cache info and return a cache of the given kind and level.
327 // Return NULL if no such cache exists.
328 static const VexCache *
locate_cache(const VexCacheInfo * ci,VexCacheKind kind,UInt level)329 locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
330 {
331    const VexCache *c;
332 
333    for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
334       if (c->level == level && c->kind == kind) {
335          return c;
336       }
337    }
338    return NULL;  // not found
339 }
340 
341 
342 // Gives the auto-detected configuration of I1, D1 and LL caches.  They get
343 // overridden by any cache configurations specified on the command line.
344 static void
configure_caches(cache_t * I1c,cache_t * D1c,cache_t * LLc,Bool all_caches_clo_defined)345 configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
346                  Bool all_caches_clo_defined)
347 {
348    VexArchInfo vai;
349    const VexCacheInfo *ci;
350    const VexCache *i1, *d1, *ll;
351 
352    VG_(machine_get_VexArchInfo)(NULL, &vai);
353    ci = &vai.hwcache_info;
354 
355    // Extract what we need
356    i1 = locate_cache(ci, INSN_CACHE, 1);
357    d1 = locate_cache(ci, DATA_CACHE, 1);
358    ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
359 
360    if (ci->num_caches > 0 && ll == NULL) {
361       VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
362    }
363 
364    if (ll && ci->num_levels > 2) {
365       VG_(dmsg)("warning: L%u cache found, using its data for the "
366                 "LL simulation.\n", ci->num_levels);
367    }
368 
369    if (i1 && d1 && ll) {
370       if (i1->is_trace_cache) {
371          /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
372           * conversion to byte size is a total guess;  treat the 12K and 16K
373           * cases the same since the cache byte size must be a power of two for
374           * everything to work!.  Also guessing 32 bytes for the line size...
375           */
376          UInt adjusted_size, guessed_line_size = 32;
377 
378          if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
379             adjusted_size = 16 * 1024;
380          } else {
381             adjusted_size = 32 * 1024;
382          }
383          VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
384                    i1->sizeB / 1024);
385          VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
386                    adjusted_size / 1024, guessed_line_size);
387 
388          *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
389       } else {
390          *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
391       }
392       *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
393       *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
394 
395       return;
396    }
397 
398    // Cache information could not be queried; choose some default
399    // architecture specific default setting.
400 
401 #if defined(VGA_ppc32)
402 
403    // Default cache configuration
404    *I1c = (cache_t) {  65536, 2, 64 };
405    *D1c = (cache_t) {  65536, 2, 64 };
406    *LLc = (cache_t) { 262144, 8, 64 };
407 
408 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
409 
410    // Default cache configuration
411    *I1c = (cache_t) {  65536, 2, 64 };
412    *D1c = (cache_t) {  65536, 2, 64 };
413    *LLc = (cache_t) { 262144, 8, 64 };
414 
415 #elif defined(VGA_arm)
416 
417    // Set caches to default (for Cortex-A8 ?)
418    *I1c = (cache_t) {  16384, 4, 64 };
419    *D1c = (cache_t) {  16384, 4, 64 };
420    *LLc = (cache_t) { 262144, 8, 64 };
421 
422 #elif defined(VGA_arm64)
423 
424    // Copy the 32-bit ARM version until such time as we have
425    // some real hardware to run on
426    *I1c = (cache_t) {  16384, 4, 64 };
427    *D1c = (cache_t) {  16384, 4, 64 };
428    *LLc = (cache_t) { 262144, 8, 64 };
429 
430 #elif defined(VGA_s390x)
431    //
432    // Here is the cache data from older machine models:
433    //
434    //           I1            D1      I/D L2
435    // z900  256k/256/4    256k/256/4   16MB
436    // z800  256k/256/4    256k/256/4    8MB
437    // z990  256k/256/4    256k/256/4   32MB
438    // z890  256k/256/4    256k/256/4   32MB
439    // z9    256k/256/4    256k/256/4   40MB
440    //
441    // Sources:
442    // (1) IBM System z9 109 Technical Introduction
443    //     www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
444    // (2) The microarchitecture of the IBM eServer z900 processor
445    //     IBM Journal of Research and Development
446    //     Volume 46, Number 4/5, pp 381-395, July/September 2002
447    // (3) The IBM eServer z990 microprocessor
448    //     IBM Journal of Research and Development
449    //     Volume 48, Number 3/4, pp 295-309, May/July 2004
450    // (4) Charles Webb, IBM
451    //
452    // L2 data is unfortunately incomplete. Otherwise, we could support
453    // machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
454 
455    // Default cache configuration is z10-EC  (Source: ECAG insn)
456    *I1c = (cache_t) {    65536,  4, 256 };
457    *D1c = (cache_t) {   131072,  8, 256 };
458    *LLc = (cache_t) { 50331648, 24, 256 };
459 
460 #elif defined(VGA_mips32)
461 
462    // Set caches to default (for MIPS32-r2(mips 74kc))
463    *I1c = (cache_t) {  32768, 4, 32 };
464    *D1c = (cache_t) {  32768, 4, 32 };
465    *LLc = (cache_t) { 524288, 8, 32 };
466 
467 #elif defined(VGA_mips64)
468 
469    // Set caches to default (for MIPS64 - 5kc)
470    *I1c = (cache_t) {  32768, 4, 32 };
471    *D1c = (cache_t) {  32768, 4, 32 };
472    *LLc = (cache_t) { 524288, 8, 32 };
473 
474 #elif defined(VGA_x86) || defined(VGA_amd64)
475 
476    *I1c = (cache_t) {  65536, 2, 64 };
477    *D1c = (cache_t) {  65536, 2, 64 };
478    *LLc = (cache_t) { 262144, 8, 64 };
479 
480 #elif defined(VGA_tilegx)
481 
482    // Set caches to default for Tilegx.
483    *I1c = (cache_t) { 0x8000,  2, 64 };
484    *D1c = (cache_t) { 0x8000,  2, 64 };
485    *LLc = (cache_t) { 0x40000, 8, 64 };
486 
487 #else
488 
489 #error "Unknown arch"
490 
491 #endif
492 
493    if (!all_caches_clo_defined) {
494       const HChar warning[] =
495         "Warning: Cannot auto-detect cache config, using defaults.\n"
496         "         Run with -v to see.\n";
497       VG_(dmsg)("%s", warning);
498    }
499 }
500 
501 /*--------------------------------------------------------------------*/
502 /*--- end                                                          ---*/
503 /*--------------------------------------------------------------------*/
504