1 // This artificial program runs a lot of code.  The exact amount depends on
2 // the command line -- if an arg "0" is given, it does exactly
3 // the same amount of work, but using four times as much code.
4 // If an arg >= 1 is given, the amount of code is multiplied by this arg.
5 //
6 // It's a stress test for Valgrind's translation speed;  natively the two
7 // modes run in about the same time (the I-cache effects aren't big enough
8 // to make a difference), but under Valgrind the one running more code is
9 // significantly slower due to the extra translation time.
10 
11 #include <stdio.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <assert.h>
15 #if defined(__mips__)
16 #include <asm/cachectl.h>
17 #include <sys/syscall.h>
18 #elif defined(__tilegx__)
19 #include <asm/cachectl.h>
20 #endif
21 #include "tests/sys_mman.h"
22 
23 #define FN_SIZE   1280     // Must be big enough to hold the compiled f()
24                            // and any literal pool that might be used
25 #define N_LOOPS   20000    // Should be divisible by four
26 #define RATIO     4        // Ratio of code sizes between the two modes
27 
28 int f(int x, int y)
29 {
30    int i;
31    for (i = 0; i < 5000; i++) {
32       switch (x % 8) {
33        case 1:  y += 3;
34        case 2:  y += x;
35        case 3:  y *= 2;
36        default: y--;
37       }
38    }
39    return y;
40 }
41 
42 int main(int argc, char* argv[])
43 {
44    int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0;
45    int n_fns, n_reps;
46 
47    if (argc <= 1) {
48       // Mode 1: not so much code
49       n_fns  = N_LOOPS / RATIO;
50       n_reps = RATIO;
51       printf("mode 1: ");
52    } else {
53       // Mode 2: lots of code
54       const int mul = atoi(argv[1]);
55       if (mul == 0)
56          n_fns = N_LOOPS;
57       else
58          n_fns = N_LOOPS * mul;
59       n_reps = 1;
60       printf("mode 1: ");
61    }
62    printf("%d copies of f(), %d reps\n", n_fns, n_reps);
63 
64    char* a = mmap(0, FN_SIZE * n_fns,
65                      PROT_EXEC|PROT_WRITE,
66                      MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
67    assert(a != (char*)MAP_FAILED);
68 
69    // Make a whole lot of copies of f().  FN_SIZE is much bigger than f()
70    // will ever be (we hope).
71    for (i = 0; i < n_fns; i++) {
72       memcpy(&a[FN_SIZE*i], f, FN_SIZE);
73    }
74 
75 #if defined(__mips__)
76    syscall(__NR_cacheflush, a, FN_SIZE * n_fns, ICACHE);
77 #elif defined(__tilegx__)
78    cacheflush(a, FN_SIZE * n_fns, ICACHE);
79 #endif
80 
81    for (h = 0; h < n_reps; h += 1) {
82       for (i = 0; i < n_fns; i += 4) {
83          int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)];
84          int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)];
85          int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)];
86          int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)];
87          sum1 += f1(i+0, n_fns-i+0);
88          sum2 += f2(i+1, n_fns-i+1);
89          sum3 += f3(i+2, n_fns-i+2);
90          sum4 += f4(i+3, n_fns-i+3);
91          if (i % 1000 == 0)
92             printf(".");
93       }
94    }
95    printf("result = %d\n", sum1 + sum2 + sum3 + sum4);
96    return 0;
97 }
98