1 
2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation                                    cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Cachegrind, a Valgrind tool for cache
8    profiling programs.
9 
10    Copyright (C) 2002-2013 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Notes:
32   - simulates a write-allocate cache
33   - (block --> set) hash function uses simple bit selection
34   - handling of references straddling two cache blocks:
35       - counts as only one cache access (not two)
36       - both blocks hit                  --> one hit
37       - one block hits, the other misses --> one miss
38       - both blocks miss                 --> one miss (not two)
39 */
40 
41 typedef struct {
42    Int          size;                   /* bytes */
43    Int          assoc;
44    Int          line_size;              /* bytes */
45    Int          sets;
46    Int          sets_min_1;
47    Int          line_size_bits;
48    Int          tag_shift;
49    HChar        desc_line[128];         /* large enough */
50    UWord*       tags;
51 } cache_t2;
52 
53 /* By this point, the size/assoc/line_size has been checked. */
cachesim_initcache(cache_t config,cache_t2 * c)54 static void cachesim_initcache(cache_t config, cache_t2* c)
55 {
56    Int i;
57 
58    c->size      = config.size;
59    c->assoc     = config.assoc;
60    c->line_size = config.line_size;
61 
62    c->sets           = (c->size / c->line_size) / c->assoc;
63    c->sets_min_1     = c->sets - 1;
64    c->line_size_bits = VG_(log2)(c->line_size);
65    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
66 
67    if (c->assoc == 1) {
68       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
69                                  c->size, c->line_size);
70    } else {
71       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72                                  c->size, c->line_size, c->assoc);
73    }
74 
75    c->tags = VG_(malloc)("cg.sim.ci.1",
76                          sizeof(UWord) * c->sets * c->assoc);
77 
78    for (i = 0; i < c->sets * c->assoc; i++)
79       c->tags[i] = 0;
80 }
81 
82 /* This attribute forces GCC to inline the function, getting rid of a
83  * lot of indirection around the cache_t2 pointer, if it is known to be
84  * constant in the caller (the caller is inlined itself).
85  * Without inlining of simulator functions, cachegrind can get 40% slower.
86  */
87 __attribute__((always_inline))
88 static __inline__
cachesim_setref_is_miss(cache_t2 * c,UInt set_no,UWord tag)89 Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
90 {
91    int i, j;
92    UWord *set;
93 
94    set = &(c->tags[set_no * c->assoc]);
95 
96    /* This loop is unrolled for just the first case, which is the most */
97    /* common.  We can't unroll any further because it would screw up   */
98    /* if we have a direct-mapped (1-way) cache.                        */
99    if (tag == set[0])
100       return False;
101 
102    /* If the tag is one other than the MRU, move it into the MRU spot  */
103    /* and shuffle the rest down.                                       */
104    for (i = 1; i < c->assoc; i++) {
105       if (tag == set[i]) {
106          for (j = i; j > 0; j--) {
107             set[j] = set[j - 1];
108          }
109          set[0] = tag;
110 
111          return False;
112       }
113    }
114 
115    /* A miss;  install this tag as MRU, shuffle rest down. */
116    for (j = c->assoc - 1; j > 0; j--) {
117       set[j] = set[j - 1];
118    }
119    set[0] = tag;
120 
121    return True;
122 }
123 
124 __attribute__((always_inline))
125 static __inline__
cachesim_ref_is_miss(cache_t2 * c,Addr a,UChar size)126 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
127 {
128    /* A memory block has the size of a cache line */
129    UWord block1 =  a         >> c->line_size_bits;
130    UWord block2 = (a+size-1) >> c->line_size_bits;
131    UInt  set1   = block1 & c->sets_min_1;
132 
133    /* Tags used in real caches are minimal to save space.
134     * As the last bits of the block number of addresses mapping
135     * into one cache set are the same, real caches use as tag
136     *   tag = block >> log2(#sets)
137     * But using the memory block as more specific tag is fine,
138     * and saves instructions.
139     */
140    UWord tag1   = block1;
141 
142    /* Access entirely within line. */
143    if (block1 == block2)
144       return cachesim_setref_is_miss(c, set1, tag1);
145 
146    /* Access straddles two lines. */
147    else if (block1 + 1 == block2) {
148       UInt  set2 = block2 & c->sets_min_1;
149       UWord tag2 = block2;
150 
151       /* always do both, as state is updated as side effect */
152       if (cachesim_setref_is_miss(c, set1, tag1)) {
153          cachesim_setref_is_miss(c, set2, tag2);
154          return True;
155       }
156       return cachesim_setref_is_miss(c, set2, tag2);
157    }
158    VG_(printf)("addr: %lx  size: %u  blocks: %ld %ld",
159                a, size, block1, block2);
160    VG_(tool_panic)("item straddles more than two cache sets");
161    /* not reached */
162    return True;
163 }
164 
165 
166 static cache_t2 LL;
167 static cache_t2 I1;
168 static cache_t2 D1;
169 
cachesim_initcaches(cache_t I1c,cache_t D1c,cache_t LLc)170 static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
171 {
172    cachesim_initcache(I1c, &I1);
173    cachesim_initcache(D1c, &D1);
174    cachesim_initcache(LLc, &LL);
175 }
176 
177 __attribute__((always_inline))
178 static __inline__
cachesim_I1_doref_Gen(Addr a,UChar size,ULong * m1,ULong * mL)179 void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
180 {
181    if (cachesim_ref_is_miss(&I1, a, size)) {
182       (*m1)++;
183       if (cachesim_ref_is_miss(&LL, a, size))
184          (*mL)++;
185    }
186 }
187 
188 // common special case IrNoX
189 __attribute__((always_inline))
190 static __inline__
cachesim_I1_doref_NoX(Addr a,UChar size,ULong * m1,ULong * mL)191 void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
192 {
193    UWord block  = a >> I1.line_size_bits;
194    UInt  I1_set = block & I1.sets_min_1;
195 
196    // use block as tag
197    if (cachesim_setref_is_miss(&I1, I1_set, block)) {
198       UInt  LL_set = block & LL.sets_min_1;
199       (*m1)++;
200       // can use block as tag as L1I and LL cache line sizes are equal
201       if (cachesim_setref_is_miss(&LL, LL_set, block))
202          (*mL)++;
203    }
204 }
205 
206 __attribute__((always_inline))
207 static __inline__
cachesim_D1_doref(Addr a,UChar size,ULong * m1,ULong * mL)208 void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
209 {
210    if (cachesim_ref_is_miss(&D1, a, size)) {
211       (*m1)++;
212       if (cachesim_ref_is_miss(&LL, a, size))
213          (*mL)++;
214    }
215 }
216 
217 /* Check for special case IrNoX. Called at instrumentation time.
218  *
219  * Does this Ir only touch one cache line, and are L1I/LL cache
220  * line sizes the same? This allows to get rid of a runtime check.
221  *
222  * Returning false is always fine, as this calls the generic case
223  */
cachesim_is_IrNoX(Addr a,UChar size)224 static Bool cachesim_is_IrNoX(Addr a, UChar size)
225 {
226    UWord block1, block2;
227 
228    if (I1.line_size_bits != LL.line_size_bits) return False;
229    block1 =  a         >> I1.line_size_bits;
230    block2 = (a+size-1) >> I1.line_size_bits;
231    if (block1 != block2) return False;
232 
233    return True;
234 }
235 
236 /*--------------------------------------------------------------------*/
237 /*--- end                                                 cg_sim.c ---*/
238 /*--------------------------------------------------------------------*/
239 
240