1 #define _GNU_SOURCE
2
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <stdint.h>
6 #include <stdarg.h>
7 #include <string.h>
8 #include <getopt.h>
9 #include <pthread.h>
10 #include <errno.h>
11 #include "sched.h"
12
13
14 #define MAX_CPUS 32
15 #define DEFAULT_THRESHOLD 500 /* default maximum TSC skew */
16
17
18 char *program;
19 long threshold = DEFAULT_THRESHOLD;
20 int silent = 0;
21 int verbose = 0;
22
23
24 struct option options[] = {
25 { "cpus", required_argument, 0, 'c' },
26 { "help", no_argument, 0, 'h' },
27 { "silent", no_argument, 0, 's' },
28 { "threshold", required_argument, 0, 't' },
29 { "verbose", no_argument, 0, 'v' },
30 { 0, 0, 0, 0 }
31 };
32
33
usage(void)34 void usage(void)
35 {
36 printf("usage: %s [-hsv] [-c <cpu_set>] [-t threshold]\n", program);
37 }
38
39
help(void)40 void help(void)
41 {
42 usage();
43 printf("check TSC synchronization between CPUs\n");
44 printf(" -c,--cpus set of cpus to test (default: all)\n");
45 printf(" -h,--help show this message\n");
46 printf(" -s,--silent no output if test is successful\n");
47 printf(" -t,--threshold TSC skew threshold (default: %d cycles)\n",
48 DEFAULT_THRESHOLD);
49 printf(" -v,--verbose verbose output\n");
50 }
51
52
error(int err,const char * fmt,...)53 void error(int err, const char *fmt, ...)
54 {
55 va_list ap;
56
57 fprintf(stderr, "%s: ", program);
58 va_start(ap, fmt);
59 vfprintf(stderr, fmt, ap);
60 va_end(ap);
61
62 if (err)
63 fprintf(stderr, ": %s\n", strerror(err));
64 putc('\n', stderr);
65 }
66
67
68 /*
69 * parse a string containing a comma separated list of ranges
70 * of cpu numbers such as: "0,2,4-7" into a cpu_set_t
71 */
parse_cpu_set(const char * s,cpu_set_t * cpus)72 int parse_cpu_set(const char *s, cpu_set_t *cpus)
73 {
74 CPU_ZERO(cpus);
75
76 while (*s) {
77 char *next;
78 int cpu;
79 int start, end;
80
81 start = end = (int)strtol(s, &next, 0);
82 if (s == next)
83 break;
84 s = next;
85
86 if (*s == '-') {
87 ++s;
88 end = (int)strtol(s, &next, 0);
89 if (s == next)
90 break;
91 s = next;
92 }
93
94 if (*s == ',')
95 ++s;
96
97 if (start < 0 || start >= CPU_SETSIZE) {
98 error(0, "bad cpu number '%d' in cpu set", start);
99 return 1;
100 }
101
102 if (end < 0 || end >= CPU_SETSIZE) {
103 error(0, "bad cpu number '%d' in cpu set", end);
104 return 1;
105 }
106
107 if (end < start) {
108 error(0, "bad cpu range '%d-%d' in cpu set",
109 start, end);
110 return 1;
111 }
112
113 for (cpu = start; cpu <= end; ++cpu)
114 CPU_SET(cpu, cpus);
115
116 }
117
118 if (*s) {
119 error(0, "unexpected character '%c' in cpu set", *s);
120 return 1;
121 }
122
123 return 0;
124 }
125
126
127 #define CACHE_LINE_SIZE 256
128 typedef union state {
129 int state;
130 char pad[CACHE_LINE_SIZE];
131 } state_t;
132
133 #define barrier() __asm__ __volatile__("" : : : "memory")
134
set_state(state_t * s,int v)135 static void inline set_state(state_t *s, int v)
136 {
137 s->state = v;
138 }
139
wait_for_state(state_t * s,int v)140 static void inline wait_for_state(state_t *s, int v)
141 {
142 while (s->state != v)
143 barrier();
144 }
145
146 #if defined(__x86_64__)
rdtsc(void)147 static inline uint64_t rdtsc(void)
148 {
149 uint32_t tsc_lo, tsc_hi;
150
151 __asm__ __volatile__("rdtsc" : "=a" (tsc_lo), "=d" (tsc_hi));
152
153 return ((uint64_t)tsc_hi << 32) | tsc_lo;
154 }
155 #else
rdtsc(void)156 static inline uint64_t rdtsc(void)
157 {
158 uint64_t tsc;
159
160 __asm__ __volatile__("rdtsc" : "=A" (tsc));
161
162 return tsc;
163 }
164 #endif
165
166 #define READY 1
167 #define DONE 2
168 #define ERROR 3
169
170 state_t master;
171 state_t slave;
172
173 int64_t slave_tsc;
174 int slave_cpu;
175
176
set_cpu_affinity(int cpu)177 int set_cpu_affinity(int cpu)
178 {
179 cpu_set_t cpus;
180
181 CPU_ZERO(&cpus);
182 CPU_SET(cpu, &cpus);
183 if (sched_setaffinity(0, sizeof cpus, &cpus) < 0) {
184 error(errno, "sched_setaffinity() failed for CPU %d", cpu);
185 return -1;
186 }
187 return 0;
188 }
189
190 #define NUM_ITERS 10
191
192 int64_t
tsc_delta(int cpu_a,int cpu_b)193 tsc_delta(int cpu_a, int cpu_b)
194 {
195 uint64_t best_t0 = 0;
196 uint64_t best_t1 = ~0ULL;
197 uint64_t best_tm = 0;
198 int64_t delta;
199 uint64_t t0, t1, tm;
200 int i;
201
202 if (verbose)
203 printf("CPU %d - CPU %d\n", cpu_a, cpu_b);
204
205 if (set_cpu_affinity(cpu_a) < 0)
206 return -1;
207
208 slave_cpu = cpu_b;
209
210 for (i = 0; i < NUM_ITERS; i++) {
211
212 set_state(&master, READY);
213
214 wait_for_state(&slave, READY);
215
216 t0 = rdtsc();
217 set_state(&master, DONE);
218 wait_for_state(&slave, DONE);
219 t1 = rdtsc();
220
221 if ((t1 - t0) < (best_t1 - best_t0)) {
222 best_t0 = t0;
223 best_t1 = t1;
224 best_tm = slave_tsc;
225 }
226 if (verbose)
227 printf("loop %2d: roundtrip = %5Ld\n", i, t1 - t0);
228 }
229
230 delta = (best_t0/2 + best_t1/2 + (best_t0 & best_t1 & 1)) - best_tm;
231
232 if (!silent)
233 printf("CPU %d - CPU %d = % 5Ld\n", cpu_a, cpu_b, delta);
234
235 return delta;
236 }
237
238
239 void *
slave_thread(void * arg)240 slave_thread(void *arg)
241 {
242 int current_cpu = -1;
243
244 for(;;) {
245
246 wait_for_state(&master, READY);
247
248 if (slave_cpu < 0) {
249 return NULL;
250 }
251
252 if (slave_cpu != current_cpu) {
253
254 if (set_cpu_affinity(slave_cpu) < 0) {
255 set_state(&slave, ERROR);
256 return NULL;
257 }
258
259 current_cpu = slave_cpu;
260 }
261
262 set_state(&slave, READY);
263
264 wait_for_state(&master, DONE);
265
266 slave_tsc = rdtsc();
267
268 set_state(&slave, DONE);
269 }
270 return NULL;
271 }
272
273
274 int
check_tsc(cpu_set_t * cpus)275 check_tsc(cpu_set_t *cpus)
276 {
277 int cpu_a, cpu_b;
278 int64_t delta;
279 int err = 0;
280 pthread_t thread;
281
282 if ((err = pthread_create(&thread, NULL, slave_thread, NULL))) {
283 error(err, "pthread_create_failed");
284 return -1;
285 }
286
287
288 for (cpu_a = 0; cpu_a < MAX_CPUS; cpu_a++) {
289 if (!CPU_ISSET(cpu_a, cpus))
290 continue;
291
292 for (cpu_b = 0; cpu_b < MAX_CPUS; cpu_b++) {
293 if (!CPU_ISSET(cpu_b, cpus) || cpu_a == cpu_b)
294 continue;
295
296 delta = tsc_delta(cpu_a, cpu_b);
297
298 if (llabs(delta) > threshold) {
299 ++err;
300 }
301 }
302 }
303
304 /*
305 * tell the slave thread to exit
306 */
307 slave_cpu = -1;
308 set_state(&master, READY);
309
310 pthread_join(thread, NULL);
311
312 return err;
313 }
314
315
316 int
main(int argc,char * argv[])317 main(int argc, char *argv[])
318 {
319 int c;
320 cpu_set_t cpus;
321 int errs = 0;
322 extern int optind;
323 extern char *optarg;
324
325 if ((program = strrchr(argv[0], '/')) != NULL)
326 ++program;
327 else
328 program = argv[0];
329
330 /*
331 * default to checking all cpus
332 */
333 for (c = 0; c < MAX_CPUS; c++) {
334 CPU_SET(c, &cpus);
335 }
336
337 while ((c = getopt_long(argc, argv, "c:hst:v", options, NULL)) != EOF) {
338 switch (c) {
339 case 'c':
340 if (parse_cpu_set(optarg, &cpus) != 0)
341 ++errs;
342 break;
343 case 'h':
344 help();
345 exit(0);
346 case 's':
347 ++silent;
348 break;
349 case 't':
350 threshold = strtol(optarg, NULL, 0);
351 break;
352 case 'v':
353 ++verbose;
354 break;
355 default:
356 ++errs;
357 break;
358 }
359 }
360
361 if (errs || optind < argc) {
362 usage();
363 exit(1);
364 }
365
366 /*
367 * limit the set of CPUs to the ones that are currently available
368 * (Note that on some kernel versions sched_setaffinity() will fail
369 * if you specify CPUs that are not currently online so we ignore
370 * the return value and hope for the best)
371 */
372 sched_setaffinity(0, sizeof cpus, &cpus);
373 if (sched_getaffinity(0, sizeof cpus, &cpus) < 0) {
374 error(errno, "sched_getaffinity() failed");
375 exit(1);
376 }
377
378 errs = check_tsc(&cpus);
379
380 if (!silent) {
381 printf("%s\n", errs ? "FAIL" : "PASS");
382 }
383
384 return errs ? EXIT_FAILURE : EXIT_SUCCESS;
385 }
386