1 /******************************************************************************/
2 /* */
3 /* Paul Mackerras <paulus@samba.org>, 2009 */
4 /* */
5 /* This program is free software; you can redistribute it and/or modify */
6 /* it under the terms of the GNU General Public License as published by */
7 /* the Free Software Foundation; either version 2 of the License, or */
8 /* (at your option) any later version. */
9 /* */
10 /* This program is distributed in the hope that it will be useful, */
11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */
13 /* the GNU General Public License for more details. */
14 /* */
15 /* You should have received a copy of the GNU General Public License */
16 /* along with this program; if not, write to the Free Software */
17 /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18 /* */
19 /******************************************************************************/
20 /*
21 Here's a little test program that checks whether software counters
22 (specifically, the task clock counter) work correctly when they're in
23 a group with hardware counters.
24
25 What it does is to create several groups, each with one hardware
26 counter, counting instructions, plus a task clock counter. It needs
27 to know an upper bound N on the number of hardware counters you have
28 (N defaults to 8), and it creates N+4 groups to force them to be
29 multiplexed. It also creates an overall task clock counter.
30
31 Then it spins for a while, and then stops all the counters and reads
32 them. It takes the total of the task clock counters in the groups and
33 computes the ratio of that total to the overall execution time from
34 the overall task clock counter.
35
36 That ratio should be equal to the number of actual hardware counters
37 that can count instructions. If the task clock counters in the groups
38 don't stop when their group gets taken off the PMU, the ratio will
39 instead be close to N+4. The program will declare that the test fails
40 if the ratio is greater than N (actually, N + 0.0001 to allow for FP
41 rounding errors).
42
43 Could someone run this on x86 on the latest PCL tree and let me know
44 what happens? I don't have an x86 crash box easily to hand. On
45 powerpc, it passes, but I think that is because I am missing setting
46 counter->prev_count in arch/powerpc/kernel/perf_counter.c, and I think
47 that means that enabling/disabling a group with a task clock counter
48 in it won't work correctly (I'll do a test program for that next).
49
50 Usage is: ./performance_counter02 [-v]
51
52 The -v flag makes it print out the values of each counter.
53 */
54
55 #include <stdio.h>
56 #include <stddef.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <fcntl.h>
60 #include <poll.h>
61 #include <unistd.h>
62 #include <errno.h>
63 #include "config.h"
64 #include <sys/prctl.h>
65 #include <sys/types.h>
66 #include <linux/types.h>
67
68 #if HAVE_PERF_EVENT_ATTR
69 # include <linux/perf_event.h>
70 #endif
71
72 #include "test.h"
73 #include "safe_macros.h"
74 #include "linux_syscall_numbers.h"
75
76 char *TCID = "perf_event_open02";
77 int TST_TOTAL = 1;
78
79 #if HAVE_PERF_EVENT_ATTR
80
81 #define MAX_CTRS 1000
82 #define LOOPS 1000000000
83
84 static int count_hardware_counters(void);
85 static void setup(void);
86 static void verify(void);
87 static void cleanup(void);
88 static void help(void);
89
90 static int n, nhw;
91 static int verbose;
92 static option_t options[] = {
93 {"v", &verbose, NULL},
94 {NULL, NULL, NULL},
95 };
96
97 static int tsk0;
98 static int hwfd[MAX_CTRS], tskfd[MAX_CTRS];
99
main(int ac,char ** av)100 int main(int ac, char **av)
101 {
102 int lc;
103
104 tst_parse_opts(ac, av, options, help);
105
106 setup();
107
108 for (lc = 0; TEST_LOOPING(lc); lc++) {
109 tst_count = 0;
110 verify();
111 }
112
113 cleanup();
114 tst_exit();
115 }
116
perf_event_open(struct perf_event_attr * hw_event,pid_t pid,int cpu,int group_fd,unsigned long flags)117 static int perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
118 int cpu, int group_fd, unsigned long flags)
119 {
120 int ret;
121
122 ret = ltp_syscall(__NR_perf_event_open, hw_event, pid, cpu,
123 group_fd, flags);
124 return ret;
125 }
126
127
do_work(void)128 static void do_work(void)
129 {
130 int i;
131
132 for (i = 0; i < LOOPS; ++i)
133 asm volatile (""::"g" (i));
134 }
135
136 struct read_format {
137 unsigned long long value;
138 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
139 unsigned long long time_enabled;
140 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
141 unsigned long long time_running;
142 };
143
count_hardware_counters(void)144 static int count_hardware_counters(void)
145 {
146 struct perf_event_attr hw_event;
147 int i, hwctrs = 0;
148 int fdarry[MAX_CTRS];
149 struct read_format buf;
150
151 memset(&hw_event, 0, sizeof(struct perf_event_attr));
152
153 hw_event.type = PERF_TYPE_HARDWARE;
154 hw_event.size = sizeof(struct perf_event_attr);
155 hw_event.disabled = 1;
156 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
157 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
158 PERF_FORMAT_TOTAL_TIME_RUNNING;
159
160 for (i = 0; i < MAX_CTRS; i++) {
161 fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
162 if (fdarry[i] == -1) {
163 if (errno == ENOENT) {
164 tst_brkm(TCONF, cleanup,
165 "PERF_COUNT_HW_INSTRUCTIONS not supported");
166 }
167 tst_brkm(TBROK | TERRNO, cleanup,
168 "perf_event_open failed at iteration:%d", i);
169 }
170
171 if (prctl(PR_TASK_PERF_EVENTS_ENABLE) == -1) {
172 tst_brkm(TBROK | TERRNO, cleanup,
173 "prctl(PR_TASK_PERF_EVENTS_ENABLE) failed");
174 }
175
176 do_work();
177
178 if (prctl(PR_TASK_PERF_EVENTS_DISABLE) == -1) {
179 tst_brkm(TBROK | TERRNO, cleanup,
180 "prctl(PR_TASK_PERF_EVENTS_DISABLE) failed");
181 }
182
183 if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf)) {
184 tst_brkm(TBROK | TERRNO, cleanup,
185 "error reading counter(s)");
186 }
187
188 if (verbose == 1) {
189 printf("at iteration:%d value:%lld time_enabled:%lld "
190 "time_running:%lld\n", i, buf.value,
191 buf.time_enabled, buf.time_running);
192 }
193
194 /*
195 * Normally time_enabled and time_running are the same value.
196 * But if more events are started than available counter slots
197 * on the PMU, then multiplexing happens and events run only
198 * part of the time. Time_enabled and time_running's values
199 * will be different. In this case the time_enabled and time_
200 * running values can be used to scale an estimated value for
201 * the count. So if buf.time_enabled and buf.time_running are
202 * not equal, we can think that PMU hardware counters
203 * multiplexing happens and the number of the opened events
204 * are the number of max available hardware counters.
205 */
206 if (buf.time_enabled != buf.time_running) {
207 hwctrs = i;
208 break;
209 }
210 }
211
212 for (i = 0; i <= hwctrs; i++)
213 SAFE_CLOSE(cleanup, fdarry[i]);
214
215 return hwctrs;
216 }
217
setup(void)218 static void setup(void)
219 {
220 int i;
221 struct perf_event_attr tsk_event, hw_event;
222
223 /*
224 * According to perf_event_open's manpage, the official way of
225 * knowing if perf_event_open() support is enabled is checking for
226 * the existence of the file /proc/sys/kernel/perf_event_paranoid.
227 */
228 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
229 tst_brkm(TCONF, NULL, "Kernel doesn't have perf_event support");
230
231 tst_sig(NOFORK, DEF_HANDLER, cleanup);
232
233 TEST_PAUSE;
234
235 nhw = count_hardware_counters();
236 n = nhw + 4;
237
238 memset(&hw_event, 0, sizeof(struct perf_event_attr));
239 memset(&tsk_event, 0, sizeof(struct perf_event_attr));
240
241 tsk_event.type = PERF_TYPE_SOFTWARE;
242 tsk_event.size = sizeof(struct perf_event_attr);
243 tsk_event.disabled = 1;
244 tsk_event.config = PERF_COUNT_SW_TASK_CLOCK;
245
246 hw_event.type = PERF_TYPE_HARDWARE;
247 hw_event.size = sizeof(struct perf_event_attr);
248 hw_event.disabled = 1;
249 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
250
251 tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0);
252 if (tsk0 == -1) {
253 tst_brkm(TBROK | TERRNO, cleanup, "perf_event_open failed");
254 } else {
255 tsk_event.disabled = 0;
256 for (i = 0; i < n; ++i) {
257 hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
258 tskfd[i] = perf_event_open(&tsk_event, 0, -1,
259 hwfd[i], 0);
260 if (tskfd[i] == -1 || hwfd[i] == -1) {
261 tst_brkm(TBROK | TERRNO, cleanup,
262 "perf_event_open failed");
263 }
264 }
265 }
266 }
267
cleanup(void)268 static void cleanup(void)
269 {
270 int i;
271
272 for (i = 0; i < n; i++) {
273 if (hwfd[i] > 0 && close(hwfd[i]) == -1)
274 tst_resm(TWARN | TERRNO, "close(%d) failed", hwfd[i]);
275 if (tskfd[i] > 0 && close(tskfd[i]) == -1)
276 tst_resm(TWARN | TERRNO, "close(%d) failed", tskfd[i]);
277 }
278
279 if (tsk0 > 0 && close(tsk0) == -1)
280 tst_resm(TWARN | TERRNO, "close(%d) failed", tsk0);
281 }
282
verify(void)283 static void verify(void)
284 {
285 unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS];
286 unsigned long long vtsum = 0, vhsum = 0;
287 int i;
288 double ratio;
289
290 if (prctl(PR_TASK_PERF_EVENTS_ENABLE) == -1) {
291 tst_brkm(TBROK | TERRNO, cleanup,
292 "prctl(PR_TASK_PERF_EVENTS_ENABLE) failed");
293 }
294
295 do_work();
296
297 if (prctl(PR_TASK_PERF_EVENTS_DISABLE) == -1) {
298 tst_brkm(TBROK | TERRNO, cleanup,
299 "prctl(PR_TASK_PERF_EVENTS_DISABLE) failed");
300 }
301
302 if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0)) {
303 tst_brkm(TBROK | TERRNO, cleanup,
304 "error reading task clock counter");
305 }
306
307 for (i = 0; i < n; ++i) {
308 if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) ||
309 read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i])) {
310 tst_brkm(TBROK | TERRNO, cleanup,
311 "error reading counter(s)");
312 }
313 vtsum += vt[i];
314 vhsum += vh[i];
315 }
316
317 tst_resm(TINFO, "overall task clock: %llu", vt0);
318 tst_resm(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum);
319
320 if (verbose == 1) {
321 printf("hw counters:");
322 for (i = 0; i < n; ++i)
323 printf(" %llu", vh[i]);
324 printf("\ntask clock counters:");
325 for (i = 0; i < n; ++i)
326 printf(" %llu", vt[i]);
327 printf("\n");
328 }
329
330 ratio = (double)vtsum / vt0;
331 tst_resm(TINFO, "ratio: %.2f", ratio);
332 if (ratio > nhw + 0.0001) {
333 tst_resm(TFAIL, "test failed (ratio was greater than )");
334 } else {
335 tst_resm(TPASS, "test passed");
336 }
337 }
338
help(void)339 static void help(void)
340 {
341 printf("-v print verbose infomation\n");
342 }
343
344 #else
345
main(void)346 int main(void)
347 {
348 tst_brkm(TCONF, NULL, "This system doesn't have "
349 "header file:<linux/perf_event.h> or "
350 "no struct perf_event_attr defined");
351 }
352 #endif
353