1 /*  time-schedule.c
2 
3     Programme to test how long a context switch takes.
4 
5     Copyright (C) 1998  Richard Gooch
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
22     The postal address is:
23       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
24 */
25 
26 /*
27     This programme will determine the context switch (scheduling) overhead on
28     a system. It takes into account SMP machines. True context switches are
29     measured.
30 
31     Written by      Richard Gooch   15-SEP-1998
32 
33     Last updated by Richard Gooch   25-SEP-1998
34 
35 */
36 #include <unistd.h>
37 #ifdef _POSIX_THREADS
38 #ifndef _REENTRANT
39 #define _REENTRANT
40 #endif
41 #ifndef _POSIX_THREAD_SAFE_FUNCTIONS
42 #define _POSIX_THREAD_SAFE_FUNCTIONS
43 #endif
44 #include <pthread.h>
45 #endif
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <math.h>
49 #include <string.h>
50 #include <ctype.h>
51 #include <signal.h>
52 #include <sched.h>
53 #include <sys/time.h>
54 #include <sys/mman.h>
55 #include <sys/types.h>
56 #include <dirent.h>
57 #include <errno.h>
58 
59 #ifndef __KARMA__
60 #define mt_num_processors() 1	/*  Set to the number of processors   */
61 #define ERRSTRING strerror(errno)
62 #define FALSE 0
63 #define TRUE  1
64 #else
65 #include <karma.h>
66 #include <karma_mt.h>
67 #endif
68 
69 #define MAX_ITERATIONS      1000
70 
71 static unsigned int hog_other_cpus();
72 static void run_yielder(int use_threads, int read_fd);
73 static void *yielder_main(void *arg);
74 static void s_term_handler();
75 static void run_low_priority(unsigned int num, int read_fd);
76 static unsigned long compute_median(unsigned long values[MAX_ITERATIONS],
77 				    unsigned long max_value);
78 static unsigned int get_run_queue_size();
79 static unsigned long get_num_switches();
80 static void use_fpu_value(double val);
81 
82 static volatile unsigned int sched_count = 0;
83 /*  For yielder  */
84 static int pipe_read_fd = -1;
85 static int pipe_write_fd = -1;
86 static pid_t child = -1;
87 
main(int argc,char ** argv)88 int main(int argc, char **argv)
89 {
90 	int use_threads = FALSE;
91 	int use_pipe = FALSE;
92 	int no_test = FALSE;
93 	int frob_fpu = FALSE;
94 	int read_fd = -1;
95 	int write_fd = -1;
96 	int num_low_priority = -1;
97 	int i, j;
98 	int fds[2];
99 	unsigned int count, num_yields, run_queue_size1, run_queue_size2,
100 	    num_hogs;
101 	unsigned long median, switches1, num_switches, num_overhead_switches;
102 	signed long overhead, total_diffs;
103 	signed long min_diff = 1000000000;
104 	signed long max_diff = -1000000000;
105 	double dcount = 0.0;
106 	unsigned long diffs[MAX_ITERATIONS];
107 	struct timeval before, after;
108 	sigset_t set;
109 	static char *usage =
110 	    "time-schedule [-h] [-thread] [-notest] [-pipe] [-fpu] [num_running]";
111 
112 	setpgrp();
113 	/*  First create pipe used to sychronise low priority processes  */
114 	if (pipe(fds) != 0) {
115 		fprintf(stderr, "Error creating pipe\t%s\n", ERRSTRING);
116 		exit(1);
117 	}
118 	read_fd = fds[0];
119 	pipe_write_fd = fds[1];
120 	for (count = 1; count < argc; ++count) {
121 		if (strcmp(argv[count], "-thread") == 0) {
122 #ifdef _POSIX_THREADS
123 			use_threads = TRUE;
124 #else
125 			fprintf(stderr, "POSIX threads not available\n");
126 #endif
127 		} else if (strcmp(argv[count], "-pipe") == 0)
128 			use_pipe = TRUE;
129 		else if (strcmp(argv[count], "-notest") == 0)
130 			no_test = TRUE;
131 		else if (strcmp(argv[count], "-fpu") == 0)
132 			frob_fpu = TRUE;
133 		else if (isdigit(argv[count][0]))
134 			num_low_priority = atoi(argv[count]);
135 		else {
136 			fprintf(stderr,
137 				"Programme to time context switches (schedules)\n");
138 			fprintf(stderr,
139 				"(C) 1998  Richard Gooch <rgooch@atnf.csiro.au>\n");
140 			fprintf(stderr, "Usage:\t%s\n", usage);
141 			fprintf(stderr,
142 				"\t-thread\t\tswitch threads not processes\n");
143 			fprintf(stderr,
144 				"\t-pipe\t\tuse pipes not sched_yield()\n");
145 			fprintf(stderr,
146 				"\t-fpu\t\tpollute the FPU after each switch in main\n");
147 			fprintf(stderr,
148 				"\tnum_running\tnumber of extra processes\n");
149 			exit(0);
150 		}
151 	}
152 	if (no_test) {
153 		if (num_low_priority > 0)
154 			run_low_priority(num_low_priority, read_fd);
155 		while (TRUE)
156 			pause();
157 	}
158 	if (geteuid() == 0) {
159 		struct sched_param sp;
160 
161 		memset(&sp, 0, sizeof sp);
162 		sp.sched_priority = 10;
163 		if (sched_setscheduler(0, SCHED_FIFO, &sp) != 0) {
164 			fprintf(stderr, "Error changing to RT class\t%s\n",
165 				ERRSTRING);
166 			exit(1);
167 		}
168 		if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
169 			fprintf(stderr, "Error locking pages\t%s\n", ERRSTRING);
170 			exit(1);
171 		}
172 	} else
173 		fprintf(stderr, "Not running with RT priority!\n");
174 	/*  Give shell and login programme time to finish up and get off the run
175 	   queue  */
176 	usleep(200000);
177 	if (use_pipe) {
178 		if (pipe(fds) != 0) {
179 			fprintf(stderr, "Error creating pipe\t%s\n", ERRSTRING);
180 			exit(1);
181 		}
182 		pipe_read_fd = fds[0];
183 		write_fd = fds[1];
184 	}
185 	num_hogs = hog_other_cpus();
186 	/*  Determine overhead. Do it in a loop=2. The first iteration should warm
187 	   the cache, the second will compute the overhead  */
188 	for (j = 0; j < 2; ++j) {
189 		switches1 = get_num_switches();
190 		gettimeofday(&before, NULL);
191 		for (i = 0; i < 20; ++i) {
192 			if (use_pipe) {
193 				char ch = 0;
194 
195 				write(pipe_write_fd, &ch, 1);
196 				read(read_fd, &ch, 1);
197 			} else
198 				sched_yield();
199 			if (frob_fpu)
200 				++dcount;
201 		}
202 		gettimeofday(&after, NULL);
203 		num_overhead_switches = get_num_switches() - switches1;
204 		overhead = 1000000 * (after.tv_sec - before.tv_sec);
205 		overhead += after.tv_usec - before.tv_usec;
206 	}
207 	use_fpu_value(dcount);
208 	if (num_low_priority > 0)
209 		run_low_priority(num_low_priority, read_fd);
210 	/*  Set up for the benchmark  */
211 	run_yielder(use_threads, read_fd);
212 	memset(diffs, 0, sizeof diffs);
213 	run_queue_size1 = get_run_queue_size();
214 	total_diffs = 0;
215 	switches1 = get_num_switches();
216 	/*  Benchmark!  */
217 	for (count = 0; count < MAX_ITERATIONS; ++count) {
218 		signed long diff;
219 
220 		gettimeofday(&before, NULL);
221 		/*  Generate 20 context switches  */
222 		for (i = 0; i < 10; ++i) {
223 			if (use_pipe) {
224 				char ch = 0;
225 
226 				write(write_fd, &ch, 1);
227 				read(read_fd, &ch, 1);
228 			} else
229 				sched_yield();
230 			if (frob_fpu)
231 				dcount += 1.0;
232 		}
233 		gettimeofday(&after, NULL);
234 		diff = 1000000 * (after.tv_sec - before.tv_sec);
235 		diff += after.tv_usec - before.tv_usec;
236 		diffs[count] = diff;
237 		total_diffs += diff;
238 		if (diff < min_diff)
239 			min_diff = diff;
240 		if (diff > max_diff)
241 			max_diff = diff;
242 	}
243 	num_yields = sched_count;
244 	run_queue_size2 = get_run_queue_size();
245 	num_switches = get_num_switches() - switches1;
246 	if (!use_threads)
247 		kill(child, SIGTERM);
248 	fprintf(stderr, "Started %u hog processes\n", num_hogs);
249 	fprintf(stderr, "Syscall%s overhead: %.1f us\n", frob_fpu ? "/FPU" : "",
250 		(double)overhead / 20.0);
251 	if (switches1 > 0)
252 		fprintf(stderr, "Num switches during overhead check: %lu\n",
253 			num_overhead_switches);
254 	fprintf(stderr, "Minimum scheduling latency: %.1f (%.1f) us\n",
255 		(double)min_diff / 20.0, (double)(min_diff - overhead) / 20.0);
256 	median = compute_median(diffs, max_diff);
257 	fprintf(stderr, "Median scheduling latency:  %.1f (%.1f) us\n",
258 		(double)median / 20.0, (double)(median - overhead) / 20.0);
259 	fprintf(stderr, "Average scheduling latency: %.1f (%.1f) us\n",
260 		(double)total_diffs / (double)MAX_ITERATIONS / 20.0,
261 		(double)(total_diffs - overhead * MAX_ITERATIONS) /
262 		(double)MAX_ITERATIONS / 20.0);
263 	fprintf(stderr, "Maximum scheduling latency: %.1f (%.1f) us\n",
264 		(double)max_diff / 20.0, (double)(max_diff - overhead) / 20.0);
265 	fprintf(stderr, "Run queue size: %u, %u\n",
266 		run_queue_size1, run_queue_size2);
267 	use_fpu_value(dcount);
268 	if (use_threads)
269 		fprintf(stderr, "Number of yields: %u\n", num_yields);
270 	if (num_switches > 0)
271 		fprintf(stderr, "Num switches: %lu\n", num_switches);
272 
273 	/*  Terminate all child processes  */
274 	sigemptyset(&set);
275 	sigaddset(&set, SIGTERM);
276 	sigprocmask(SIG_BLOCK, &set, NULL);
277 
278 	kill(0, SIGTERM);
279 	return (0);
280 }				/*  End Function main  */
281 
hog_other_cpus()282 static unsigned int hog_other_cpus()
283 /*  [SUMMARY] Hog other CPUs with a high-priority job.
284     [RETURNS] The number of hogged CPUs.
285 */
286 {
287 	unsigned int count;
288 
289 	for (count = mt_num_processors(); count > 1; --count) {
290 		switch (fork()) {
291 		case 0:
292 			/*  Child  */
293 			while (TRUE) ;
294 			break;
295 		case -1:
296 			/*  Error  */
297 			fprintf(stderr, "Error forking\t%s\n", ERRSTRING);
298 			kill(0, SIGTERM);
299 			break;
300 		default:
301 			/*  Parent  */
302 			break;
303 		}
304 	}
305 	return mt_num_processors() - 1;
306 }				/*  End Function hog_other_cpus  */
307 
run_yielder(int use_threads,int read_fd)308 static void run_yielder(int use_threads, int read_fd)
309 /*  [SUMMARY] Run other process which will continuously yield.
310     <use_threads> If TRUE, the yielding process is just a thread.
311     <read_fd> The pipe to read the synchronisation byte from.
312     [RETURNS] Nothing.
313 */
314 {
315 	char ch;
316 	struct sigaction new_action;
317 #ifdef _POSIX_THREADS
318 	pthread_t thread;
319 
320 	if (use_threads) {
321 		if (pthread_create(&thread, NULL, yielder_main, NULL) != 0) {
322 			fprintf(stderr, "Error creating thread\t%s\n",
323 				ERRSTRING);
324 			kill(0, SIGTERM);
325 		}
326 		read(read_fd, &ch, 1);
327 		return;
328 	}
329 #endif
330 	switch (child = fork()) {
331 	case 0:
332 		/*  Child  */
333 		break;
334 	case -1:
335 		/*  Error  */
336 		fprintf(stderr, "Error forking\t%s\n", ERRSTRING);
337 		kill(0, SIGTERM);
338 		break;
339 	default:
340 		/*  Parent  */
341 		read(read_fd, &ch, 1);
342 		return;
343 		/*break; */
344 	}
345 	memset(&new_action, 0, sizeof new_action);
346 	sigemptyset(&new_action.sa_mask);
347 	new_action.sa_handler = s_term_handler;
348 	if (sigaction(SIGTERM, &new_action, NULL) != 0) {
349 		fprintf(stderr, "Error setting SIGTERM handler\t%s\n",
350 			ERRSTRING);
351 		exit(1);
352 	}
353 	yielder_main(NULL);
354 }				/*  End Function run_yielder  */
355 
yielder_main(void * arg)356 static void *yielder_main(void *arg)
357 /*  [SUMMARY] Yielder function.
358     <arg> An arbirtrary pointer. Ignored.
359     [RETURNS] NULL.
360 */
361 {
362 	char ch = 0;
363 
364 	sched_count = 0;
365 	write(pipe_write_fd, &ch, 1);
366 	while (TRUE) {
367 		if (pipe_read_fd >= 0) {
368 			read(pipe_read_fd, &ch, 1);
369 			write(pipe_write_fd, &ch, 1);
370 		} else
371 			sched_yield();
372 		++sched_count;
373 	}
374 	return (NULL);
375 }				/*  End Function yielder_main  */
376 
s_term_handler()377 static void s_term_handler()
378 {
379 	fprintf(stderr, "Number of yields: %u\n", sched_count);
380 	exit(0);
381 }				/*  End Function s_term_handler  */
382 
run_low_priority(unsigned int num,int read_fd)383 static void run_low_priority(unsigned int num, int read_fd)
384 /*  [SUMMARY] Run low priority processes.
385     <num> Number of processes.
386     <read_fd> The pipe to read the synchronisation byte from.
387     [RETURNS] Nothing.
388 */
389 {
390 	char ch = 0;
391 
392 	for (; num > 0; --num) {
393 		switch (fork()) {
394 		case 0:
395 			/*  Child  */
396 			if (geteuid() == 0) {
397 				struct sched_param sp;
398 
399 				memset(&sp, 0, sizeof sp);
400 				sp.sched_priority = 0;
401 				if (sched_setscheduler(0, SCHED_OTHER, &sp) !=
402 				    0) {
403 					fprintf(stderr,
404 						"Error changing to SCHED_OTHER class\t%s\n",
405 						ERRSTRING);
406 					exit(1);
407 				}
408 			}
409 			if (nice(20) != 0) {
410 				fprintf(stderr, "Error nicing\t%s\n",
411 					ERRSTRING);
412 				kill(0, SIGTERM);
413 			}
414 			write(pipe_write_fd, &ch, 1);
415 			while (TRUE)
416 				sched_yield();
417 			break;
418 		case -1:
419 			/*  Error  */
420 			fprintf(stderr, "Error forking\t%s\n", ERRSTRING);
421 			kill(0, SIGTERM);
422 			break;
423 		default:
424 			/*  Parent  */
425 			read(read_fd, &ch, 1);
426 			break;
427 		}
428 	}
429 }				/*  End Function run_low_priority  */
430 
compute_median(unsigned long values[MAX_ITERATIONS],unsigned long max_value)431 static unsigned long compute_median(unsigned long values[MAX_ITERATIONS],
432 				    unsigned long max_value)
433 /*  [SUMMARY] Compute the median from an array of values.
434     <values> The array of values.
435     <max_value> The maximum value in the array.
436     [RETURNS] The median value.
437 */
438 {
439 	unsigned long count;
440 	unsigned long median = 0;
441 	unsigned long peak = 0;
442 	unsigned long *table;
443 
444 	/*  Crude but effective  */
445 	if ((table = calloc(max_value + 1, sizeof *table)) == NULL) {
446 		fprintf(stderr, "Error allocating median table\n");
447 		exit(1);
448 	}
449 	for (count = 0; count < MAX_ITERATIONS; ++count) {
450 		++table[values[count]];
451 	}
452 	/*  Now search for peak. Position of peak is median  */
453 	for (count = 0; count < max_value + 1; ++count) {
454 		if (table[count] < peak)
455 			continue;
456 		peak = table[count];
457 		median = count;
458 	}
459 	free(table);
460 	return (median);
461 }				/*  End Function compute_median  */
462 
get_run_queue_size()463 static unsigned int get_run_queue_size()
464 /*  [SUMMARY] Compute the current size of the run queue.
465     [RETURNS] The length of the run queue.
466 */
467 {
468 	int dummy_i;
469 	unsigned int length = 0;
470 	FILE *fp;
471 	DIR *dp;
472 	struct dirent *de;
473 	char txt[64], dummy_str[64];
474 
475 	if ((dp = opendir("/proc")) == NULL)
476 		return (0);
477 	while ((de = readdir(dp)) != NULL) {
478 		if (!isdigit(de->d_name[0]))
479 			continue;
480 		sprintf(txt, "/proc/%s/stat", de->d_name);
481 		if ((fp = fopen(txt, "r")) == NULL)
482 			return (length);
483 		fscanf(fp, "%d %s %s", &dummy_i, dummy_str, txt);
484 		if (txt[0] == 'R')
485 			++length;
486 		fclose(fp);
487 	}
488 	closedir(dp);
489 	return (length);
490 }				/*  End Function get_run_queue_size  */
491 
get_num_switches()492 static unsigned long get_num_switches()
493 /*  [SUMMARY] Get the number of context switches.
494     [RETURNS] The number of context switches on success, else 0.
495 */
496 {
497 	unsigned long val;
498 	FILE *fp;
499 	char line[256], name[64];
500 
501 	if ((fp = fopen("/proc/stat", "r")) == NULL)
502 		return (0);
503 	while (fgets(line, sizeof line, fp) != NULL) {
504 		sscanf(line, "%s %lu", name, &val);
505 		if (strcasecmp(name, "ctxt") != 0)
506 			continue;
507 		fclose(fp);
508 		return (val);
509 	}
510 	fclose(fp);
511 	return (0);
512 }				/*  End Function get_num_switches  */
513 
use_fpu_value(double val)514 static void use_fpu_value(double val)
515 /*  [SUMMARY] Dummy function to consume FPU value. Fools compiler.
516     <val> The value.
517     [RETURNS] Nothing.
518 */
519 {
520 }				/*  End Function use_fpu_value  */
521