1 /*
2  * The main pounder process controller and scheduler program.
3  * Author: Darrick Wong <djwong@us.ibm.com>
4  */
5 
6 /*
7  * Copyright (C) 2003-2006 IBM
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  * 02111-1307, USA.
23  */
24 
25 #include <errno.h>
26 #include <signal.h>
27 #include <sys/wait.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #include <sys/time.h>
35 #include <time.h>
36 #include <stdio.h>
37 #include <dirent.h>
38 #include <sys/stat.h>
39 
40 #include "proclist.h"
41 #include "debug.h"
42 
43 // List of subprocesses to wait upon
44 struct proclist_t wait_ons = { NULL };
45 struct proclist_t daemons = { NULL };
46 
47 static int is_leader = 0;
48 static char *pidfile = "";
49 
50 static inline int is_executable(const char *fname);
51 static inline int is_directory(const char *fname);
52 static inline int test_filter(const struct dirent *p);
53 static inline int test_sort(const struct dirent **a, const struct dirent **b);
54 static int wait_for_pids(void);
55 static void wait_for_daemons(void);
56 static void note_process(pid_t pid, char *name);
57 static void note_daemon(pid_t pid, char *name);
58 static void kill_tests(void);
59 static void kill_daemons(void);
60 static int process_dir(const char *fname);
61 static pid_t spawn_test(char *fname);
62 static void note_child(pid_t pid, char *fname, char type);
63 static int child_finished(const char *name, int stat);
64 static char *progname;
65 
66 #define TEST_PATH_LEN 512
67 #define TEST_FORK_WAIT 100
68 
69 /**
70  * Kill everything upon ^C.
71  */
jump_out(int signum)72 static void jump_out(int signum)
73 {
74 	pounder_fprintf(stdout, "Control-C received; aborting!\n");
75 	//unlink("pounder_pgrp");
76 	kill_tests();
77 	kill_daemons();
78 	if (is_leader) {
79 		unlink(pidfile);
80 	}
81 	exit(0);
82 }
83 
84 /**
85  * Kills tests launched from within.
86  */
kill_tests(void)87 static void kill_tests(void)
88 {
89 	struct proclist_item_t *curr;
90 
91 	curr = wait_ons.head;
92 	while (curr != NULL) {
93 		kill(-curr->pid, SIGTERM);
94 		curr = curr->next;
95 	}
96 }
97 
98 /**
99  * Kills daemons launched from within.
100  */
kill_daemons(void)101 static void kill_daemons(void)
102 {
103 	struct proclist_item_t *curr;
104 
105 	curr = daemons.head;
106 	while (curr != NULL) {
107 		kill(-curr->pid, SIGTERM);
108 		curr = curr->next;
109 	}
110 }
111 
112 /**
113  * Record the pounder leader's PID in a file.
114  */
record_pid(void)115 static void record_pid(void)
116 {
117 	FILE *fp;
118 
119 	pidfile = getenv("POUNDER_PIDFILE");
120 	if (pidfile == NULL) {
121 		pidfile = "pounder.pid";
122 	}
123 
124 	fp = fopen(pidfile, "w");
125 	if (fp == NULL) {
126 		perror(pidfile);
127 	}
128 	fprintf(fp, "%d", getpid());
129 	fclose(fp);
130 }
131 
132 /**
133  * Main program.  Returns 1 if all programs run successfully, 0 if
134  * something failed and -1 if there was an error running programs.
135  */
main(int argc,char * argv[])136 int main(int argc, char *argv[])
137 {
138 	int retcode;
139 	struct sigaction zig;
140 	pid_t pid;
141 	char *c;
142 
143 	/* Check parameters */
144 	if (argc < 2) {
145 		fprintf(stderr, "Usage: %s test_prog\n", argv[0]);
146 		return 1;
147 	}
148 
149 	if (argc > 2 && strcmp(argv[2], "--leader") == 0) {
150 		pounder_fprintf(stdout,
151 				"Logging this test output to %s/POUNDERLOG.\n",
152 				getenv("POUNDER_LOGDIR"));
153 		is_leader = 1;
154 		record_pid();
155 	}
156 
157 	progname = argv[0];
158 
159 	/* Set up signals */
160 	memset(&zig, 0x00, sizeof(zig));
161 	zig.sa_handler = jump_out;
162 	sigaction(SIGHUP, &zig, NULL);
163 	sigaction(SIGINT, &zig, NULL);
164 	sigaction(SIGTERM, &zig, NULL);
165 
166 	if (is_directory(argv[1])) {
167 		retcode = process_dir(argv[1]);
168 	} else {
169 		if (is_executable(argv[1])) {
170 			c = rindex(argv[1], '/');
171 			c++;
172 
173 			// Start the test
174 			pid = spawn_test(argv[1]);
175 			if (pid < 0) {
176 				perror("fork");
177 				retcode = -1;
178 				goto out;
179 			}
180 			// Track the test
181 			note_process(pid, argv[1]);
182 			if (wait_for_pids() == 0) {
183 				retcode = 1;
184 			} else {
185 				retcode = 0;
186 			}
187 		} else {
188 			pounder_fprintf(stderr,
189 					"%s: Not a directory or a test.\n",
190 					argv[1]);
191 			retcode = -1;
192 		}
193 	}
194 
195 out:
196 	kill_daemons();
197 	wait_for_daemons();
198 	if (is_leader) {
199 		if (retcode == 0) {
200 			pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg);
201 		} else if (retcode < 0 || retcode == 255) {
202 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
203 					argv[1], abort_msg, retcode);
204 		} else {
205 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
206 					argv[1], fail_msg, retcode);
207 		}
208 		unlink(pidfile);
209 	}
210 	exit(retcode);
211 }
212 
213 /**
214  * Helper function to determine if a file is executable.
215  * Returns 1 if yes, 0 if no and -1 if error.
216  */
is_executable(const char * fname)217 static inline int is_executable(const char *fname)
218 {
219 	struct stat tmp;
220 
221 	if (stat(fname, &tmp) < 0) {
222 		return -1;
223 	}
224 
225 	if (geteuid() == 0) {
226 		return 1;
227 	} else if (geteuid() == tmp.st_uid) {
228 		return tmp.st_mode & S_IXUSR;
229 	} else if (getegid() == tmp.st_gid) {
230 		return tmp.st_mode & S_IXGRP;
231 	} else {
232 		return tmp.st_mode & S_IXOTH;
233 	}
234 }
235 
236 /**
237  * Helper function to determine if a file is a directory.
238  * Returns 1 if yes, 0 if no and -1 if error.
239  */
is_directory(const char * fname)240 static inline int is_directory(const char *fname)
241 {
242 	struct stat tmp;
243 
244 	if (stat(fname, &tmp) < 0) {
245 		return 0;
246 	}
247 
248 	return S_ISDIR(tmp.st_mode);
249 }
250 
251 /**
252  * Returns 1 if the directory entry's filename fits the test name pattern.
253  */
test_filter(const struct dirent * p)254 static inline int test_filter(const struct dirent *p)
255 {
256 	return ((p->d_name[0] == 'T' || p->d_name[0] == 'D')
257 		&& isdigit(p->d_name[1]) && isdigit(p->d_name[2]));
258 }
259 
260 /**
261  * Simple routine to compare two tests names such that lower number/name pairs
262  * are considered "lesser" values.
263  */
264 //static inline int test_sort(const struct dirent **a, const struct dirent **b) {
test_sort(const struct dirent ** a,const struct dirent ** b)265 static inline int test_sort(const struct dirent **a, const struct dirent **b)
266 {
267 	return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]);
268 }
269 
270 /**
271  * Takes the wait() status integer and prints a log message.
272  * Returns 1 if there was a failure.
273  */
child_finished(const char * name,int stat)274 static int child_finished(const char *name, int stat)
275 {
276 	int x;
277 	// did we sig-exit?
278 	if (WIFSIGNALED(stat)) {
279 		pounder_fprintf(stdout, "%s: %s on signal %d.\n",
280 				name, fail_msg, WTERMSIG(stat));
281 		return 1;
282 	} else {
283 		x = WEXITSTATUS(stat);
284 		if (x == 0) {
285 			pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg);
286 			return 0;
287 		} else if (x < 0 || x == 255) {
288 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
289 					name, abort_msg, x);
290 			return 1;
291 			// FIXME: add test to blacklist
292 		} else {
293 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
294 					name, fail_msg, x);
295 			return 1;
296 		}
297 	}
298 }
299 
300 /**
301  * Wait for some number of PIDs.  If any of them return nonzero, we
302  * assume that there was some kind of failure and return 0.  Otherwise,
303  * we return 1 to indicate success.
304  */
wait_for_pids(void)305 static int wait_for_pids(void)
306 {
307 	struct proclist_item_t *curr;
308 	int i, stat, res, nprocs;
309 	pid_t pid;
310 
311 	res = 1;
312 
313 	// figure out how many times we have to wait...
314 	curr = wait_ons.head;
315 	nprocs = 0;
316 	while (curr != NULL) {
317 		nprocs++;
318 		curr = curr->next;
319 	}
320 
321 	// now wait for children.
322 	for (i = 0; i < nprocs;) {
323 		pid = wait(&stat);
324 
325 		if (pid < 0) {
326 			perror("wait");
327 			return 0;
328 		}
329 		// go find the child
330 		curr = wait_ons.head;
331 		while (curr != NULL) {
332 			if (curr->pid == pid) {
333 				res =
334 				    (child_finished(curr->name, stat) ? 0 :
335 				     res);
336 
337 				// one less pid to wait for
338 				i++;
339 
340 				// stop observing
341 				remove_from_proclist(&wait_ons, curr);
342 				free(curr->name);
343 				free(curr);
344 				break;
345 			}
346 			curr = curr->next;
347 		}
348 
349 		curr = daemons.head;
350 		while (curr != NULL) {
351 			if (curr->pid == pid) {
352 				child_finished(curr->name, stat);
353 				remove_from_proclist(&daemons, curr);
354 				free(curr->name);
355 				free(curr);
356 				break;
357 			}
358 			curr = curr->next;
359 		}
360 	}
361 
362 	return res;
363 }
364 
365 /**
366  * Wait for daemons to finish.  This function does NOT wait for wait_ons.
367  */
wait_for_daemons(void)368 static void wait_for_daemons(void)
369 {
370 	struct proclist_item_t *curr;
371 	int i, stat, res, nprocs;
372 	pid_t pid;
373 
374 	res = 1;
375 
376 	// figure out how many times we have to wait...
377 	curr = daemons.head;
378 	nprocs = 0;
379 	while (curr != NULL) {
380 		nprocs++;
381 		curr = curr->next;
382 	}
383 
384 	// now wait for daemons.
385 	for (i = 0; i < nprocs;) {
386 		pid = wait(&stat);
387 
388 		if (pid < 0) {
389 			perror("wait");
390 			if (errno == ECHILD) {
391 				return;
392 			}
393 		}
394 
395 		curr = daemons.head;
396 		while (curr != NULL) {
397 			if (curr->pid == pid) {
398 				child_finished(curr->name, stat);
399 				i++;
400 				remove_from_proclist(&daemons, curr);
401 				free(curr->name);
402 				free(curr);
403 				break;
404 			}
405 			curr = curr->next;
406 		}
407 	}
408 }
409 
410 /**
411  * Creates a record of processes that we want to watch for.
412  */
note_process(pid_t pid,char * name)413 static void note_process(pid_t pid, char *name)
414 {
415 	struct proclist_item_t *it;
416 
417 	it = calloc(1, sizeof(struct proclist_item_t));
418 	if (it == NULL) {
419 		perror("malloc proclist_item_t");
420 		// XXX: Maybe we should just waitpid?
421 		return;
422 	}
423 	it->pid = pid;
424 	it->name = calloc(strlen(name) + 1, sizeof(char));
425 	if (it->name == NULL) {
426 		perror("malloc procitem name");
427 		// XXX: Maybe we should just waitpid?
428 		return;
429 	}
430 	strcpy(it->name, name);
431 
432 	add_to_proclist(&wait_ons, it);
433 }
434 
435 /**
436  * Creates a record of daemons that should be killed on exit.
437  */
note_daemon(pid_t pid,char * name)438 static void note_daemon(pid_t pid, char *name)
439 {
440 	struct proclist_item_t *it;
441 
442 	it = calloc(1, sizeof(struct proclist_item_t));
443 	if (it == NULL) {
444 		perror("malloc proclist_item_t");
445 		// XXX: what do we do here?
446 		return;
447 	}
448 	it->pid = pid;
449 	it->name = calloc(strlen(name) + 1, sizeof(char));
450 	if (it->name == NULL) {
451 		perror("malloc procitem name");
452 		// XXX: what do we do here?
453 		return;
454 	}
455 	strcpy(it->name, name);
456 
457 	add_to_proclist(&daemons, it);
458 }
459 
460 /**
461  * Starts a test, with the stdin/out/err fd's redirected to logs.
462  * The 'fname' parameter should be a relative path from $POUNDER_HOME.
463  */
spawn_test(char * fname)464 static pid_t spawn_test(char *fname)
465 {
466 	pid_t pid;
467 	int fd, tmp;
468 	char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN];
469 	char *last_slash;
470 
471 	pid = fork();
472 	if (pid == 0) {
473 		if (setpgrp() < 0) {
474 			perror("setpgid");
475 		}
476 
477 		pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg);
478 
479 		// reroute stdin
480 		fd = open("/dev/null", O_RDWR);
481 		if (fd < 0) {
482 			perror("/dev/null");
483 			exit(-1);
484 		}
485 		close(0);
486 		tmp = dup2(fd, 0);
487 		if (tmp < 0) {
488 			perror("dup(/dev/null)");
489 			exit(-1);
490 		}
491 		close(fd);
492 
493 		// generate log name-- '/' -> '-'.
494 		snprintf(buf2, TEST_PATH_LEN, "%s|%s",
495 			 getenv("POUNDER_LOGDIR"), fname);
496 
497 		fd = strlen(buf2);
498 		for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) {
499 			if (buf2[tmp] == '/') {
500 				buf2[tmp] = '-';
501 			} else if (buf2[tmp] == '|') {
502 				buf2[tmp] = '/';
503 			}
504 		}
505 
506 		// make it so that we have a way to get back to the
507 		// original console.
508 		tmp = dup2(1, 3);
509 		if (tmp < 0) {
510 			perror("dup(stdout, 3)");
511 			exit(-1);
512 		}
513 		// reroute stdout/stderr
514 		fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC,
515 			  S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
516 		if (fd < 0) {
517 			perror(buf2);
518 			exit(-1);
519 		}
520 		close(1);
521 		tmp = dup2(fd, 1);
522 		if (tmp < 0) {
523 			perror("dup(log, 1)");
524 			exit(-1);
525 		}
526 		close(2);
527 		tmp = dup2(fd, 2);
528 		if (tmp < 0) {
529 			perror("dup(log, 2)");
530 			exit(-1);
531 		}
532 		close(fd);
533 
534 		// let us construct the absolute pathname of the test.
535 		// first find the current directory
536 		if (getcwd(buf, TEST_PATH_LEN) == NULL) {
537 			perror("getcwd");
538 			exit(-1);
539 		}
540 		// then splice cwd + fname
541 		snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname);
542 
543 		// find the location of the last slash
544 		last_slash = rindex(buf2, '/');
545 
546 		if (last_slash != NULL) {
547 			// copy the filename part into a new buffer
548 			snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1);
549 
550 			// truncate at the last slash
551 			*last_slash = 0;
552 
553 			// and chdir
554 			if (chdir(buf2) != 0) {
555 				perror(buf2);
556 				exit(-1);
557 			}
558 			// reassign variables
559 			fname = buf;
560 		}
561 		// spawn the process
562 		execlp(fname, fname, NULL);
563 
564 		// If we get here, we can't run the test.
565 		perror(fname);
566 		exit(-1);
567 	}
568 
569 	tmp = errno;
570 	/* yield for a short while, so that the test has
571 	 * a little bit of time to run.
572 	 */
573 	usleep(TEST_FORK_WAIT);
574 	errno = tmp;
575 
576 	return pid;
577 }
578 
579 /**
580  * Adds a child process to either the running-test or running-daemon
581  * list.
582  */
note_child(pid_t pid,char * fname,char type)583 static void note_child(pid_t pid, char *fname, char type)
584 {
585 	if (type == 'T') {
586 		note_process(pid, fname);
587 	} else if (type == 'D') {
588 		note_daemon(pid, fname);
589 	} else {
590 		pounder_fprintf(stdout,
591 				"Don't know what to do with child `%s' of type %c.\n",
592 				fname, type);
593 	}
594 }
595 
596 /**
597  * Process a directory--for each entry in a directory, execute files or spawn
598  * a new copy of ourself on the new directory.  Process execution is subject to
599  * these rules:
600  *
601  * - Test files that start with the same number '00foo' and '00bar' are allowed
602  *   to run simultaneously.
603  * - Test files are run in order of number and then name.
604  *
605  * If a the fork fails, bit 1 of the return code is set.  If a
606  * program runs but fails, bit 2 is set.
607  */
process_dir(const char * fname)608 static int process_dir(const char *fname)
609 {
610 	struct dirent **namelist;
611 	int i, result = 0;
612 	char buf[TEST_PATH_LEN];
613 	int curr_level_num = -1;
614 	int test_level_num;
615 	pid_t pid;
616 	int children_ok = 1;
617 
618 	pounder_fprintf(stdout, "%s: Entering directory.\n", fname);
619 
620 	i = scandir(fname, &namelist, test_filter,
621 		    (int (*)(const void *, const void *))test_sort);
622 	if (i < 0) {
623 		perror(fname);
624 		return -1;
625 	}
626 
627 	while (i--) {
628 		/* determine level number */
629 		test_level_num = ((namelist[i]->d_name[1] - '0') * 10)
630 		    + (namelist[i]->d_name[2] - '0');
631 
632 		if (curr_level_num == -1) {
633 			curr_level_num = test_level_num;
634 		}
635 
636 		if (curr_level_num != test_level_num) {
637 			children_ok &= wait_for_pids();
638 			curr_level_num = test_level_num;
639 		}
640 
641 		snprintf(buf, TEST_PATH_LEN, "%s/%s", fname,
642 			 namelist[i]->d_name);
643 		if (is_directory(buf)) {
644 			pid = fork();
645 			if (pid == 0) {
646 				if (setpgrp() < 0) {
647 					perror("setpgid");
648 				}
649 				// spawn a new copy of ourself.
650 				execl(progname, progname, buf, NULL);
651 
652 				perror(progname);
653 				exit(-1);
654 			}
655 		} else {
656 			pid = spawn_test(buf);
657 		}
658 
659 		if (pid < 0) {
660 			perror("fork");
661 			result |= 1;
662 			free(namelist[i]);
663 			continue;
664 		}
665 
666 		note_child(pid, buf, namelist[i]->d_name[0]);
667 
668 		free(namelist[i]);
669 	}
670 	free(namelist);
671 
672 	/* wait for remaining runners */
673 	children_ok &= wait_for_pids();
674 	if (children_ok == 0) {
675 		result |= 2;
676 	}
677 
678 	pounder_fprintf(stdout, "%s: Leaving directory.\n", fname);
679 
680 	return result;
681 }
682