1 #define _GNU_SOURCE
2 
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <stdint.h>
6 #include <stdarg.h>
7 #include <string.h>
8 #include <getopt.h>
9 #include <pthread.h>
10 #include <errno.h>
11 #include "sched.h"
12 
13 
14 #define MAX_CPUS		32
15 #define	DEFAULT_THRESHOLD	500	/* default maximum TSC skew	*/
16 
17 
18 char	*program;
19 long	threshold	= DEFAULT_THRESHOLD;
20 int	silent		= 0;
21 int	verbose		= 0;
22 
23 
24 struct option options[] = {
25 	{ "cpus",	required_argument,	0, 	'c'	},
26 	{ "help",	no_argument,		0, 	'h'	},
27 	{ "silent",	no_argument,		0, 	's'	},
28 	{ "threshold",	required_argument,	0, 	't'	},
29 	{ "verbose",	no_argument,		0, 	'v'	},
30 	{ 0,	0,	0,	0 }
31 };
32 
33 
usage(void)34 void usage(void)
35 {
36 	printf("usage: %s [-hsv] [-c <cpu_set>] [-t threshold]\n", program);
37 }
38 
39 
help(void)40 void help(void)
41 {
42 	usage();
43 	printf("check TSC synchronization between CPUs\n");
44 	printf("  -c,--cpus        set of cpus to test (default: all)\n");
45 	printf("  -h,--help        show this message\n");
46 	printf("  -s,--silent      no output if test is successful\n");
47 	printf("  -t,--threshold   TSC skew threshold (default: %d cycles)\n",
48 		DEFAULT_THRESHOLD);
49 	printf("  -v,--verbose     verbose output\n");
50 }
51 
52 
error(int err,const char * fmt,...)53 void error(int err, const char *fmt, ...)
54 {
55 	va_list	ap;
56 
57 	fprintf(stderr, "%s: ", program);
58 	va_start(ap, fmt);
59 	vfprintf(stderr, fmt, ap);
60 	va_end(ap);
61 
62 	if (err)
63 		fprintf(stderr, ": %s\n", strerror(err));
64 	putc('\n', stderr);
65 }
66 
67 
68 /*
69  * parse a string containing a comma separated list of ranges
70  * of cpu numbers such as: "0,2,4-7" into a cpu_set_t
71  */
parse_cpu_set(const char * s,cpu_set_t * cpus)72 int parse_cpu_set(const char *s, cpu_set_t *cpus)
73 {
74 	CPU_ZERO(cpus);
75 
76 	while (*s) {
77 		char	*next;
78 		int	cpu;
79 		int	start, end;
80 
81 		start = end = (int)strtol(s, &next, 0);
82 		if (s == next)
83 			break;
84 		s = next;
85 
86 		if (*s == '-') {
87 			++s;
88 			end = (int)strtol(s, &next, 0);
89 			if (s == next)
90 				break;
91 			s = next;
92 		}
93 
94 		if (*s == ',')
95 			++s;
96 
97 		if (start < 0 || start >= CPU_SETSIZE) {
98 			error(0, "bad cpu number '%d' in cpu set", start);
99 			return 1;
100 		}
101 
102 		if (end < 0 || end >= CPU_SETSIZE) {
103 			error(0, "bad cpu number '%d' in cpu set", end);
104 			return 1;
105 		}
106 
107 		if (end < start) {
108 			error(0, "bad cpu range '%d-%d' in cpu set",
109 				start, end);
110 			return 1;
111 		}
112 
113 		for (cpu = start; cpu <= end; ++cpu)
114 			CPU_SET(cpu, cpus);
115 
116 	}
117 
118 	if (*s) {
119 		error(0, "unexpected character '%c' in cpu set", *s);
120 		return 1;
121 	}
122 
123 	return 0;
124 }
125 
126 
127 #define	CACHE_LINE_SIZE	256
128 typedef union state {
129 	int	state;
130 	char	pad[CACHE_LINE_SIZE];
131 } state_t;
132 
133 #define barrier()	__asm__ __volatile__("" : : : "memory")
134 
set_state(state_t * s,int v)135 static void inline set_state(state_t *s, int v)
136 {
137 	s->state = v;
138 }
139 
wait_for_state(state_t * s,int v)140 static void inline wait_for_state(state_t *s, int v)
141 {
142 	while (s->state != v)
143 		barrier();
144 }
145 
146 #if defined(__x86_64__)
rdtsc(void)147 static inline uint64_t rdtsc(void)
148 {
149 	uint32_t	tsc_lo, tsc_hi;
150 
151 	__asm__ __volatile__("rdtsc" : "=a" (tsc_lo), "=d" (tsc_hi));
152 
153 	return ((uint64_t)tsc_hi << 32) | tsc_lo;
154 }
155 #else
rdtsc(void)156 static inline uint64_t rdtsc(void)
157 {
158 	uint64_t	tsc;
159 
160 	__asm__ __volatile__("rdtsc" : "=A" (tsc));
161 
162 	return tsc;
163 }
164 #endif
165 
166 #define	READY	1
167 #define	DONE	2
168 #define	ERROR	3
169 
170 state_t		master;
171 state_t		slave;
172 
173 int64_t		slave_tsc;
174 int		slave_cpu;
175 
176 
set_cpu_affinity(int cpu)177 int set_cpu_affinity(int cpu)
178 {
179 	cpu_set_t cpus;
180 
181 	CPU_ZERO(&cpus);
182 	CPU_SET(cpu, &cpus);
183 	if (sched_setaffinity(0, sizeof cpus, &cpus) < 0) {
184 		error(errno, "sched_setaffinity() failed for CPU %d", cpu);
185 		return -1;
186 	}
187 	return 0;
188 }
189 
190 #define NUM_ITERS	10
191 
192 int64_t
tsc_delta(int cpu_a,int cpu_b)193 tsc_delta(int cpu_a, int cpu_b)
194 {
195 	uint64_t	best_t0	= 0;
196 	uint64_t	best_t1	= ~0ULL;
197 	uint64_t	best_tm	= 0;
198 	int64_t		delta;
199 	uint64_t	t0, t1, tm;
200 	int		i;
201 
202 	if (verbose)
203 		printf("CPU %d - CPU %d\n", cpu_a, cpu_b);
204 
205 	if (set_cpu_affinity(cpu_a) < 0)
206 		return -1;
207 
208 	slave_cpu = cpu_b;
209 
210 	for (i = 0; i < NUM_ITERS; i++) {
211 
212 		set_state(&master, READY);
213 
214 		wait_for_state(&slave, READY);
215 
216 		t0 = rdtsc();
217 		set_state(&master, DONE);
218 		wait_for_state(&slave, DONE);
219 		t1 = rdtsc();
220 
221 		if ((t1 - t0) < (best_t1 - best_t0)) {
222 			best_t0 = t0;
223 			best_t1 = t1;
224 			best_tm = slave_tsc;
225 		}
226 		if (verbose)
227 			printf("loop %2d: roundtrip = %5Ld\n", i, t1 - t0);
228 	}
229 
230 	delta = (best_t0/2 + best_t1/2 + (best_t0 & best_t1 & 1)) - best_tm;
231 
232 	if (!silent)
233 		printf("CPU %d - CPU %d = % 5Ld\n", cpu_a, cpu_b, delta);
234 
235 	return delta;
236 }
237 
238 
239 void *
slave_thread(void * arg)240 slave_thread(void *arg)
241 {
242 	int	current_cpu = -1;
243 
244 	for(;;) {
245 
246 		wait_for_state(&master, READY);
247 
248 		if (slave_cpu < 0) {
249 			return NULL;
250 		}
251 
252 		if (slave_cpu != current_cpu) {
253 
254 			if (set_cpu_affinity(slave_cpu) < 0) {
255 				set_state(&slave, ERROR);
256 				return NULL;
257 			}
258 
259 			current_cpu = slave_cpu;
260 		}
261 
262 		set_state(&slave, READY);
263 
264 		wait_for_state(&master, DONE);
265 
266 		slave_tsc = rdtsc();
267 
268 		set_state(&slave, DONE);
269 	}
270 	return NULL;
271 }
272 
273 
274 int
check_tsc(cpu_set_t * cpus)275 check_tsc(cpu_set_t *cpus)
276 {
277 	int		cpu_a, cpu_b;
278 	int64_t		delta;
279 	int		err	= 0;
280 	pthread_t	thread;
281 
282 	if ((err = pthread_create(&thread, NULL, slave_thread, NULL))) {
283 		error(err, "pthread_create_failed");
284 		return -1;
285 	}
286 
287 
288 	for (cpu_a = 0; cpu_a < MAX_CPUS; cpu_a++) {
289 		if (!CPU_ISSET(cpu_a, cpus))
290 			continue;
291 
292 		for (cpu_b = 0; cpu_b < MAX_CPUS; cpu_b++) {
293 			if (!CPU_ISSET(cpu_b, cpus) || cpu_a == cpu_b)
294 				continue;
295 
296 			delta = tsc_delta(cpu_a, cpu_b);
297 
298 			if (llabs(delta) > threshold) {
299 				++err;
300 			}
301 		}
302 	}
303 
304 	/*
305 	 * tell the slave thread to exit
306 	 */
307 	slave_cpu = -1;
308 	set_state(&master, READY);
309 
310 	pthread_join(thread, NULL);
311 
312 	return err;
313 }
314 
315 
316 int
main(int argc,char * argv[])317 main(int argc, char *argv[])
318 {
319 	int		c;
320 	cpu_set_t	cpus;
321 	int		errs	= 0;
322 	extern int	optind;
323 	extern char	*optarg;
324 
325 	if ((program = strrchr(argv[0], '/')) != NULL)
326 		++program;
327 	else
328 		program = argv[0];
329 
330 	/*
331 	 * default to checking all cpus
332 	 */
333 	for (c = 0; c < MAX_CPUS; c++) {
334 		CPU_SET(c, &cpus);
335 	}
336 
337 	while ((c = getopt_long(argc, argv, "c:hst:v", options, NULL)) != EOF) {
338 		switch (c) {
339 			case 'c':
340 				if (parse_cpu_set(optarg, &cpus) != 0)
341 					++errs;
342 				break;
343 			case 'h':
344 				help();
345 				exit(0);
346 			case 's':
347 				++silent;
348 				break;
349 			case 't':
350 				threshold = strtol(optarg, NULL, 0);
351 				break;
352 			case 'v':
353 				++verbose;
354 				break;
355 			default:
356 				++errs;
357 				break;
358 		}
359 	}
360 
361 	if (errs || optind < argc) {
362 		usage();
363 		exit(1);
364 	}
365 
366 	/*
367 	 * limit the set of CPUs to the ones that are currently available
368 	 * (Note that on some kernel versions sched_setaffinity() will fail
369 	 * if you specify CPUs that are not currently online so we ignore
370 	 * the return value and hope for the best)
371 	 */
372 	sched_setaffinity(0, sizeof cpus, &cpus);
373 	if (sched_getaffinity(0, sizeof cpus, &cpus) < 0) {
374 		error(errno, "sched_getaffinity() failed");
375 		exit(1);
376 	}
377 
378 	errs = check_tsc(&cpus);
379 
380 	if (!silent) {
381 		printf("%s\n", errs ? "FAIL" : "PASS");
382 	}
383 
384 	return errs ? EXIT_FAILURE : EXIT_SUCCESS;
385 }
386