1 /*
2  * Blktrace replay utility - Play traces back
3  *
4  * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 
21 static char build_date[] = __DATE__ " at "__TIME__;
22 
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <libaio.h>
27 #include <pthread.h>
28 #include <sched.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/time.h>
38 #include <sys/types.h>
39 #include <dirent.h>
40 #include <stdarg.h>
41 
42 #if !defined(_GNU_SOURCE)
43 #	define _GNU_SOURCE
44 #endif
45 #include <getopt.h>
46 
47 #include "list.h"
48 #include "btrecord.h"
49 
50 /*
51  * ========================================================================
52  * ==== STRUCTURE DEFINITIONS =============================================
53  * ========================================================================
54  */
55 
56 /**
57  * Each device map has one of these:
58  *
59  * @head:	Linked on to map_devs
60  * @from_dev:	Device name as seen on recorded system
61  * @to_dev:	Device name to be used on replay system
62  */
63 struct map_dev {
64 	struct list_head head;
65 	char *from_dev, *to_dev;
66 };
67 
68 /**
69  * Each device name specified has one of these (until threads are created)
70  *
71  * @head: 	Linked onto input_devs
72  * @devnm: 	Device name -- 'sd*'
73  */
74 struct dev_info {
75 	struct list_head head;
76 	char *devnm;
77 };
78 
79 /*
80  * Per input file information
81  *
82  * @head: 	Used to link up on input_files
83  * @free_iocbs: List of free iocb's available for use
84  * @used_iocbs: List of iocb's currently outstanding
85  * @mutex: 	Mutex used with condition variable to protect volatile values
86  * @cond: 	Condition variable used when waiting on a volatile value change
87  * @naios_out: 	Current number of AIOs outstanding on this context
88  * @naios_free: Number of AIOs on the free list (short cut for list_len)
89  * @send_wait: 	Boolean: When true, the sub thread is waiting on free IOCBs
90  * @reap_wait: 	Boolean: When true, the rec thread is waiting on used IOCBs
91  * @send_done: 	Boolean: When true, the sub thread has completed work
92  * @reap_done: 	Boolean: When true, the rec thread has completed work
93  * @sub_thread: Thread used to submit IOs.
94  * @rec_thread: Thread used to reclaim IOs.
95  * @ctx: 	IO context
96  * @devnm: 	Copy of the device name being managed by this thread
97  * @file_name: 	Full name of the input file
98  * @cpu: 	CPU this thread is pinned to
99  * @ifd: 	Input file descriptor
100  * @ofd: 	Output file descriptor
101  * @iterations: Remaining iterations to process
102  * @vfp:	For verbose dumping of actions performed
103  */
104 struct thr_info {
105 	struct list_head head, free_iocbs, used_iocbs;
106 	pthread_mutex_t mutex;
107 	pthread_cond_t cond;
108 	volatile long naios_out, naios_free;
109 	volatile int send_wait, reap_wait, send_done, reap_done;
110 	pthread_t sub_thread, rec_thread;
111 	io_context_t ctx;
112 	char *devnm, *file_name;
113 	int cpu, ifd, ofd, iterations;
114 	FILE *vfp;
115 };
116 
117 /*
118  * Every Asynchronous IO used has one of these (naios per file/device).
119  *
120  * @iocb:	IOCB sent down via io_submit
121  * @head:	Linked onto file_list.free_iocbs or file_list.used_iocbs
122  * @tip:	Pointer to per-thread information this IO is associated with
123  * @nbytes:	Number of bytes in buffer associated with iocb
124  */
125 struct iocb_pkt {
126 	struct iocb iocb;
127 	struct list_head head;
128 	struct thr_info *tip;
129 	int nbytes;
130 };
131 
132 /*
133  * ========================================================================
134  * ==== GLOBAL VARIABLES ==================================================
135  * ========================================================================
136  */
137 
138 static volatile int signal_done = 0;	// Boolean: Signal'ed, need to quit
139 
140 static char *ibase = "replay";		// Input base name
141 static char *idir = ".";		// Input directory base
142 static int cpus_to_use = -1;		// Number of CPUs to use
143 static int def_iterations = 1;		// Default number of iterations
144 static int naios = 512;			// Number of AIOs per thread
145 static int ncpus = 0;			// Number of CPUs in the system
146 static int verbose = 0;			// Boolean: Output some extra info
147 static int write_enabled = 0;		// Boolean: Enable writing
148 static __u64 genesis = ~0;		// Earliest time seen
149 static __u64 rgenesis;			// Our start time
150 static size_t pgsize;			// System Page size
151 static int nb_sec = 512;		// Number of bytes per sector
152 static LIST_HEAD(input_devs);		// List of devices to handle
153 static LIST_HEAD(input_files);		// List of input files to handle
154 static LIST_HEAD(map_devs);		// List of device maps
155 static int nfiles = 0;			// Number of files to handle
156 static int no_stalls = 0;		// Boolean: Disable pre-stalls
157 static unsigned acc_factor = 1;		// Int: Acceleration factor
158 static int find_records = 0;		// Boolean: Find record files auto
159 
160 /*
161  * Variables managed under control of condition variables.
162  *
163  * n_reclaims_done: 	Counts number of reclaim threads that have completed.
164  * n_replays_done:	Counts number of replay threads that have completed.
165  * n_replays_ready:	Counts number of replay threads ready to start.
166  * n_iters_done:	Counts number of replay threads done one iteration.
167  * iter_start:		Starts an iteration for the replay threads.
168  */
169 static volatile int n_reclaims_done = 0;
170 static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER;
171 static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER;
172 
173 static volatile int n_replays_done = 0;
174 static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER;
175 static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER;
176 
177 static volatile int n_replays_ready = 0;
178 static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER;
179 static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER;
180 
181 static volatile int n_iters_done = 0;
182 static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER;
183 static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER;
184 
185 static volatile int iter_start = 0;
186 static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER;
187 static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER;
188 
189 /*
190  * ========================================================================
191  * ==== FORWARD REFERENECES ===============================================
192  * ========================================================================
193  */
194 
195 static void *replay_sub(void *arg);
196 static void *replay_rec(void *arg);
197 static char usage_str[];
198 
199 /*
200  * ========================================================================
201  * ==== INLINE ROUTINES ===================================================
202  * ========================================================================
203  */
204 
205 /*
206  * The 'fatal' macro will output a perror message (if errstring is !NULL)
207  * and display a string (with variable arguments) and then exit with the
208  * specified exit value.
209  */
210 #define ERR_ARGS			1
211 #define ERR_SYSCALL			2
fatal(const char * errstring,const int exitval,const char * fmt,...)212 static inline void fatal(const char *errstring, const int exitval,
213 			 const char *fmt, ...)
214 {
215 	va_list ap;
216 
217 	if (errstring)
218 		perror(errstring);
219 
220 	va_start(ap, fmt);
221 	vfprintf(stderr, fmt, ap);
222 	va_end(ap);
223 
224 	exit(exitval);
225 	/*NOTREACHED*/
226 }
227 
du64_to_sec(__u64 du64)228 static inline long long unsigned du64_to_sec(__u64 du64)
229 {
230 	return (long long unsigned)du64 / (1000 * 1000 * 1000);
231 }
232 
du64_to_nsec(__u64 du64)233 static inline long long unsigned du64_to_nsec(__u64 du64)
234 {
235 	return llabs((long long)du64) % (1000 * 1000 * 1000);
236 }
237 
238 /**
239  * min - Return minimum of two integers
240  */
min(int a,int b)241 static inline int min(int a, int b)
242 {
243 	return a < b ? a : b;
244 }
245 
246 /**
247  * minl - Return minimum of two longs
248  */
minl(long a,long b)249 static inline long minl(long a, long b)
250 {
251 	return a < b ? a : b;
252 }
253 
254 /**
255  * usage - Display usage string and version
256  */
usage(void)257 static inline void usage(void)
258 {
259 	fprintf(stderr, "Usage: btreplay -- version %s\n%s",
260 		my_btversion, usage_str);
261 }
262 
263 /**
264  * is_send_done - Returns true if sender should quit early
265  * @tip: Per-thread information
266  */
is_send_done(struct thr_info * tip)267 static inline int is_send_done(struct thr_info *tip)
268 {
269 	return signal_done || tip->send_done;
270 }
271 
272 /**
273  * is_reap_done - Returns true if reaper should quit early
274  * @tip: Per-thread information
275  */
is_reap_done(struct thr_info * tip)276 static inline int is_reap_done(struct thr_info *tip)
277 {
278 	return tip->send_done && tip->naios_out == 0;
279 }
280 
281 /**
282  * ts2ns - Convert timespec values to a nanosecond value
283  */
284 #define NS_TICKS		((__u64)1000 * (__u64)1000 * (__u64)1000)
ts2ns(struct timespec * ts)285 static inline __u64 ts2ns(struct timespec *ts)
286 {
287 	return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec);
288 }
289 
290 /**
291  * ts2ns - Convert timeval values to a nanosecond value
292  */
tv2ns(struct timeval * tp)293 static inline __u64 tv2ns(struct timeval *tp)
294 {
295 	return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000);
296 }
297 
298 /**
299  * touch_memory - Force physical memory to be allocating it
300  *
301  * For malloc()ed memory we need to /touch/ it to make it really
302  * exist. Otherwise, for write's (to storage) things may not work
303  * as planned - we see Linux just use a single area to /read/ from
304  * (as there isn't any memory that has been associated with the
305  * allocated virtual addresses yet).
306  */
touch_memory(char * buf,size_t bsize)307 static inline void touch_memory(char *buf, size_t bsize)
308 {
309 #if defined(PREP_BUFS)
310 	memset(buf, 0, bsize);
311 #else
312 	size_t i;
313 
314 	for (i = 0; i < bsize; i += pgsize)
315 		buf[i] = 0;
316 #endif
317 }
318 
319 /**
320  * buf_alloc - Returns a page-aligned buffer of the specified size
321  * @nbytes: Number of bytes to allocate
322  */
buf_alloc(size_t nbytes)323 static inline void *buf_alloc(size_t nbytes)
324 {
325 	void *buf;
326 
327 	if (posix_memalign(&buf, pgsize, nbytes)) {
328 		fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n");
329 		/*NOTREACHED*/
330 	}
331 
332 	return buf;
333 }
334 
335 /**
336  * gettime - Returns current time
337  */
gettime(void)338 static inline __u64 gettime(void)
339 {
340 	static int use_clock_gettime = -1;		// Which clock to use
341 
342 	if (use_clock_gettime < 0) {
343 		use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0;
344 		if (use_clock_gettime) {
345 			struct timespec ts = {
346 				.tv_sec = 0,
347 				.tv_nsec = 0
348 			};
349 			clock_settime(CLOCK_MONOTONIC, &ts);
350 		}
351 	}
352 
353 	if (use_clock_gettime) {
354 		struct timespec ts;
355 		clock_gettime(CLOCK_MONOTONIC, &ts);
356 		return ts2ns(&ts);
357 	}
358 	else {
359 		struct timeval tp;
360 		gettimeofday(&tp, NULL);
361 		return tv2ns(&tp);
362 	}
363 }
364 
365 /**
366  * setup_signal - Set up a signal handler for the specified signum
367  */
setup_signal(int signum,sighandler_t handler)368 static inline void setup_signal(int signum, sighandler_t handler)
369 {
370 	if (signal(signum, handler) == SIG_ERR) {
371 		fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n",
372 			signum);
373 		/*NOTREACHED*/
374 	}
375 }
376 
377 /*
378  * ========================================================================
379  * ==== CONDITION VARIABLE ROUTINES =======================================
380  * ========================================================================
381  */
382 
383 /**
384  * __set_cv - Increments a variable under condition variable control.
385  * @pmp: 	Pointer to the associated mutex
386  * @pcp: 	Pointer to the associated condition variable
387  * @vp: 	Pointer to the variable being incremented
388  * @mxv: 	Max value for variable (Used only when ASSERTS are on)
389  */
__set_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)390 static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
391 			    volatile int *vp,
392 			    __attribute__((__unused__))int mxv)
393 {
394 	pthread_mutex_lock(pmp);
395 	assert(*vp < mxv);
396 	*vp += 1;
397 	pthread_cond_signal(pcp);
398 	pthread_mutex_unlock(pmp);
399 }
400 
401 /**
402  * __wait_cv - Waits for a variable under cond var control to hit a value
403  * @pmp: 	Pointer to the associated mutex
404  * @pcp: 	Pointer to the associated condition variable
405  * @vp: 	Pointer to the variable being incremented
406  * @mxv: 	Value to wait for
407  */
__wait_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)408 static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
409 			     volatile int *vp, int mxv)
410 {
411 	pthread_mutex_lock(pmp);
412 	while (*vp < mxv)
413 		pthread_cond_wait(pcp, pmp);
414 	*vp = 0;
415 	pthread_mutex_unlock(pmp);
416 }
417 
set_reclaim_done(void)418 static inline void set_reclaim_done(void)
419 {
420 	__set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
421 		 nfiles);
422 }
423 
wait_reclaims_done(void)424 static inline void wait_reclaims_done(void)
425 {
426 	__wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
427 		  nfiles);
428 }
429 
set_replay_ready(void)430 static inline void set_replay_ready(void)
431 {
432 	__set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
433 		 nfiles);
434 }
435 
wait_replays_ready(void)436 static inline void wait_replays_ready(void)
437 {
438 	__wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
439 		  nfiles);
440 }
441 
set_replay_done(void)442 static inline void set_replay_done(void)
443 {
444 	__set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
445 		nfiles);
446 }
447 
wait_replays_done(void)448 static inline void wait_replays_done(void)
449 {
450 	__wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
451 		  nfiles);
452 }
453 
set_iter_done(void)454 static inline void set_iter_done(void)
455 {
456 	__set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
457 		nfiles);
458 }
459 
wait_iters_done(void)460 static inline void wait_iters_done(void)
461 {
462 	__wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
463 		  nfiles);
464 }
465 
466 /**
467  * wait_iter_start - Wait for an iteration to start
468  *
469  * This is /slightly/ different: we are waiting for a value to become
470  * non-zero, and then we decrement it and go on.
471  */
wait_iter_start(void)472 static inline void wait_iter_start(void)
473 {
474 	pthread_mutex_lock(&iter_start_mutex);
475 	while (iter_start == 0)
476 		pthread_cond_wait(&iter_start_cond, &iter_start_mutex);
477 	assert(1 <= iter_start && iter_start <= nfiles);
478 	iter_start--;
479 	pthread_mutex_unlock(&iter_start_mutex);
480 }
481 
482 /**
483  * start_iter - Start an iteration at the replay thread level
484  */
start_iter(void)485 static inline void start_iter(void)
486 {
487 	pthread_mutex_lock(&iter_start_mutex);
488 	assert(iter_start == 0);
489 	iter_start = nfiles;
490 	pthread_cond_broadcast(&iter_start_cond);
491 	pthread_mutex_unlock(&iter_start_mutex);
492 }
493 
494 /*
495  * ========================================================================
496  * ==== CPU RELATED ROUTINES ==============================================
497  * ========================================================================
498  */
499 
500 /**
501  * get_ncpus - Sets up the global 'ncpus' value
502  */
get_ncpus(void)503 static void get_ncpus(void)
504 {
505 	cpu_set_t cpus;
506 
507 	if (sched_getaffinity(getpid(), sizeof(cpus), &cpus)) {
508 		fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n");
509 		/*NOTREACHED*/
510 	}
511 
512 	/*
513 	 * XXX This assumes (perhaps wrongly) that there are no /holes/
514 	 * XXX in the mask.
515 	 */
516 	for (ncpus = 0; ncpus < CPU_SETSIZE && CPU_ISSET(ncpus, &cpus); ncpus++)
517 		;
518 	if (ncpus == 0) {
519 		fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n");
520 		/*NOTREACHED*/
521 	}
522 }
523 
524 /**
525  * pin_to_cpu - Pin this thread to a specific CPU
526  * @tip: Thread information
527  */
pin_to_cpu(struct thr_info * tip)528 static void pin_to_cpu(struct thr_info *tip)
529 {
530 	cpu_set_t cpus;
531 
532 	assert(0 <= tip->cpu && tip->cpu < ncpus);
533 
534 	CPU_ZERO(&cpus);
535 	CPU_SET(tip->cpu, &cpus);
536 	if (sched_setaffinity(getpid(), sizeof(cpus), &cpus)) {
537 		fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n");
538 		/*NOTREACHED*/
539 	}
540 
541 	if (verbose > 1) {
542 		int i;
543 		cpu_set_t now;
544 
545 		(void)sched_getaffinity(getpid(), sizeof(now), &now);
546 		fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu);
547 		for (i = 0; i < ncpus; i++)
548 			fprintf(tip->vfp, "%1d", CPU_ISSET(i, &now));
549 		fprintf(tip->vfp, "\n");
550 	}
551 }
552 
553 /*
554  * ========================================================================
555  * ==== INPUT DEVICE HANDLERS =============================================
556  * ========================================================================
557  */
558 
559 /**
560  * add_input_dev - Add a device ('sd*') to the list of devices to handle
561  */
add_input_dev(char * devnm)562 static void add_input_dev(char *devnm)
563 {
564 	struct list_head *p;
565 	struct dev_info *dip;
566 
567 	__list_for_each(p, &input_devs) {
568 		dip = list_entry(p, struct dev_info, head);
569 		if (strcmp(dip->devnm, devnm) == 0)
570 			return;
571 	}
572 
573 	dip = malloc(sizeof(*dip));
574 	dip->devnm = strdup(devnm);
575 	list_add_tail(&dip->head, &input_devs);
576 }
577 
578 /**
579  * rem_input_dev - Remove resources associated with this device
580  */
rem_input_dev(struct dev_info * dip)581 static void rem_input_dev(struct dev_info *dip)
582 {
583 	list_del(&dip->head);
584 	free(dip->devnm);
585 	free(dip);
586 }
587 
find_input_devs(char * idir)588 static void find_input_devs(char *idir)
589 {
590 	struct dirent *ent;
591 	DIR *dir = opendir(idir);
592 
593 	if (dir == NULL) {
594 		fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
595 		/*NOTREACHED*/
596 	}
597 
598 	while ((ent = readdir(dir)) != NULL) {
599 		char *p, *dsf = malloc(256);
600 
601 		if (strstr(ent->d_name, ".replay.") == NULL)
602 			continue;
603 
604 		dsf = strdup(ent->d_name);
605 		p = index(dsf, '.');
606 		assert(p != NULL);
607 		*p = '\0';
608 		add_input_dev(dsf);
609 		free(dsf);
610 	}
611 
612 	closedir(dir);
613 }
614 
615 /*
616  * ========================================================================
617  * ==== MAP DEVICE INTERFACES =============================================
618  * ========================================================================
619  */
620 
621 /**
622  * read_map_devs - Read in a set of device mapping from the provided file.
623  * @file_name:	File containing device maps
624  *
625  * We support the notion of multiple such files being specifed on the cmd line
626  */
read_map_devs(char * file_name)627 static void read_map_devs(char *file_name)
628 {
629 	FILE *fp;
630 	char *from_dev, *to_dev;
631 
632 	fp = fopen(file_name, "r");
633 	if (!fp) {
634 		fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n");
635 		/*NOTREACHED*/
636 	}
637 
638 	while (fscanf(fp, "%as %as", &from_dev, &to_dev) == 2) {
639 		struct map_dev *mdp = malloc(sizeof(*mdp));
640 
641 		mdp->from_dev = from_dev;
642 		mdp->to_dev = to_dev;
643 		list_add_tail(&mdp->head, &map_devs);
644 	}
645 
646 	fclose(fp);
647 }
648 
649 /**
650  * release_map_devs - Release resources associated with device mappings.
651  */
release_map_devs(void)652 static void release_map_devs(void)
653 {
654 	struct list_head *p, *q;
655 
656 	list_for_each_safe(p, q, &map_devs) {
657 		struct map_dev *mdp = list_entry(p, struct map_dev, head);
658 
659 		list_del(&mdp->head);
660 
661 		free(mdp->from_dev);
662 		free(mdp->to_dev);
663 		free(mdp);
664 	}
665 }
666 
667 /**
668  * map_dev - Return the mapped device for that specified
669  * @from_dev:	Device name as seen on recorded system
670  *
671  * Note: If there is no such mapping, we return the same name.
672  */
map_dev(char * from_dev)673 static char *map_dev(char *from_dev)
674 {
675 	struct list_head *p;
676 
677 	__list_for_each(p, &map_devs) {
678 		struct map_dev *mdp = list_entry(p, struct map_dev, head);
679 
680 		if (strcmp(from_dev, mdp->from_dev) == 0)
681 			return mdp->to_dev;
682 	}
683 
684 	return from_dev;
685 }
686 
687 /*
688  * ========================================================================
689  * ==== IOCB MANAGEMENT ROUTINES ==========================================
690  * ========================================================================
691  */
692 
693 /**
694  * iocb_init - Initialize the fields of an IOCB
695  * @tip: Per-thread information
696  * iocbp: IOCB pointer to update
697  */
iocb_init(struct thr_info * tip,struct iocb_pkt * iocbp)698 static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp)
699 {
700 	iocbp->tip = tip;
701 	iocbp->nbytes = 0;
702 	iocbp->iocb.u.c.buf = NULL;
703 }
704 
705 /**
706  * iocb_setup - Set up an iocb with this AIOs information
707  * @iocbp: IOCB pointer to update
708  * @rw: Direction (0 == write, 1 == read)
709  * @n: Number of bytes to transfer
710  * @off: Offset (in bytes)
711  */
iocb_setup(struct iocb_pkt * iocbp,int rw,int n,long long off)712 static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off)
713 {
714 	char *buf;
715 	struct iocb *iop = &iocbp->iocb;
716 
717 	assert(rw == 0 || rw == 1);
718 	assert(0 < n && (n % nb_sec) == 0);
719 	assert(0 <= off);
720 
721 	if (iocbp->nbytes) {
722 		if (iocbp->nbytes >= n) {
723 			buf = iop->u.c.buf;
724 			goto prep;
725 		}
726 
727 		assert(iop->u.c.buf);
728 		free(iop->u.c.buf);
729 	}
730 
731 	buf = buf_alloc(n);
732 	iocbp->nbytes = n;
733 
734 prep:
735 	if (rw)
736 		io_prep_pread(iop, iocbp->tip->ofd, buf, n, off);
737 	else {
738 		assert(write_enabled);
739 		io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off);
740 		touch_memory(buf, n);
741 	}
742 
743 	iop->data = iocbp;
744 }
745 
746 /*
747  * ========================================================================
748  * ==== PER-THREAD SET UP & TEAR DOWN =====================================
749  * ========================================================================
750  */
751 
752 /**
753  * tip_init - Per thread initialization function
754  */
tip_init(struct thr_info * tip)755 static void tip_init(struct thr_info *tip)
756 {
757 	int i;
758 
759 	INIT_LIST_HEAD(&tip->free_iocbs);
760 	INIT_LIST_HEAD(&tip->used_iocbs);
761 
762 	pthread_mutex_init(&tip->mutex, NULL);
763 	pthread_cond_init(&tip->cond, NULL);
764 
765 	if (io_setup(naios, &tip->ctx)) {
766 		fatal("io_setup", ERR_SYSCALL, "io_setup failed\n");
767 		/*NOTREACHED*/
768 	}
769 
770 	tip->ofd = -1;
771 	tip->naios_out = 0;
772 	tip->send_done = tip->reap_done = 0;
773 	tip->send_wait = tip->reap_wait = 0;
774 
775 	memset(&tip->sub_thread, 0, sizeof(tip->sub_thread));
776 	memset(&tip->rec_thread, 0, sizeof(tip->rec_thread));
777 
778 	for (i = 0; i < naios; i++) {
779 		struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp));
780 
781 		iocb_init(tip, iocbp);
782 		list_add_tail(&iocbp->head, &tip->free_iocbs);
783 	}
784 	tip->naios_free = naios;
785 
786 	if (verbose > 1) {
787 		char fn[MAXPATHLEN];
788 
789 		sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase,
790 			tip->cpu);
791 		tip->vfp = fopen(fn, "w");
792 		if (!tip->vfp) {
793 			fatal(fn, ERR_SYSCALL, "Failed to open report\n");
794 			/*NOTREACHED*/
795 		}
796 
797 		setlinebuf(tip->vfp);
798 	}
799 
800 	if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) {
801 		fatal("pthread_create", ERR_SYSCALL,
802 			"thread create failed\n");
803 		/*NOTREACHED*/
804 	}
805 
806 	if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) {
807 		fatal("pthread_create", ERR_SYSCALL,
808 			"thread create failed\n");
809 		/*NOTREACHED*/
810 	}
811 }
812 
813 /**
814  * tip_release - Release resources associated with this thread
815  */
tip_release(struct thr_info * tip)816 static void tip_release(struct thr_info *tip)
817 {
818 	struct list_head *p, *q;
819 
820 	assert(tip->send_done);
821 	assert(tip->reap_done);
822 	assert(list_len(&tip->used_iocbs) == 0);
823 	assert(tip->naios_free == naios);
824 
825 	if (pthread_join(tip->sub_thread, NULL)) {
826 		fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n");
827 		/*NOTREACHED*/
828 	}
829 	if (pthread_join(tip->rec_thread, NULL)) {
830 		fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n");
831 		/*NOTREACHED*/
832 	}
833 
834 	io_destroy(tip->ctx);
835 
836 	list_splice(&tip->used_iocbs, &tip->free_iocbs);
837 	list_for_each_safe(p, q, &tip->free_iocbs) {
838 		struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head);
839 
840 		list_del(&iocbp->head);
841 		if (iocbp->nbytes)
842 			free(iocbp->iocb.u.c.buf);
843 		free(iocbp);
844 	}
845 
846 	pthread_cond_destroy(&tip->cond);
847 	pthread_mutex_destroy(&tip->mutex);
848 }
849 
850 /**
851  * add_input_file - Allocate and initialize per-input file structure
852  * @cpu: CPU for this file
853  * @devnm: Device name for this file
854  * @file_name: Fully qualifed input file name
855  */
add_input_file(int cpu,char * devnm,char * file_name)856 static void add_input_file(int cpu, char *devnm, char *file_name)
857 {
858 	struct stat buf;
859 	struct io_file_hdr hdr;
860 	struct thr_info *tip = buf_alloc(sizeof(*tip));
861 	__u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
862 
863 	assert(0 <= cpu && cpu < ncpus);
864 
865 	memset(&hdr, 0, sizeof(hdr));
866 	memset(tip, 0, sizeof(*tip));
867 	tip->cpu = cpu % cpus_to_use;
868 	tip->iterations = def_iterations;
869 
870 	tip->ifd = open(file_name, O_RDONLY);
871 	if (tip->ifd < 0) {
872 		fatal(file_name, ERR_ARGS, "Unable to open\n");
873 		/*NOTREACHED*/
874 	}
875 	if (fstat(tip->ifd, &buf) < 0) {
876 		fatal(file_name, ERR_SYSCALL, "fstat failed\n");
877 		/*NOTREACHED*/
878 	}
879 	if (buf.st_size < (off_t)sizeof(hdr)) {
880 		if (verbose)
881 			fprintf(stderr, "\t%s empty\n", file_name);
882 		goto empty_file;
883 	}
884 
885 	if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
886 		fatal(file_name, ERR_ARGS, "Header read failed\n");
887 		/*NOTREACHED*/
888 	}
889 
890 	if (hdr.version != my_version) {
891 		fprintf(stderr, "%llx %llx %llx %llx\n",
892 			(long long unsigned)hdr.version,
893 			(long long unsigned)hdr.genesis,
894 			(long long unsigned)hdr.nbunches,
895 			(long long unsigned)hdr.total_pkts);
896 		fatal(NULL, ERR_ARGS,
897 			"BT version mismatch: %lx versus my %lx\n",
898 			(long)hdr.version, (long)my_version);
899 
900 	}
901 
902 	if (hdr.nbunches == 0) {
903 empty_file:
904 		close(tip->ifd);
905 		free(tip);
906 		return;
907 	}
908 
909 	if (hdr.genesis < genesis) {
910 		if (verbose > 1)
911 			fprintf(stderr, "Setting genesis to %llu.%llu\n",
912 				du64_to_sec(hdr.genesis),
913 				du64_to_nsec(hdr.genesis));
914 		genesis = hdr.genesis;
915 	}
916 
917 	tip->devnm = strdup(devnm);
918 	tip->file_name = strdup(file_name);
919 
920 	list_add_tail(&tip->head, &input_files);
921 
922 	if (verbose)
923 		fprintf(stderr, "Added %s %llu\n", file_name,
924 			(long long)hdr.genesis);
925 }
926 
927 /**
928  * rem_input_file - Release resources associated with an input file
929  * @tip: Per-input file information
930  */
rem_input_file(struct thr_info * tip)931 static void rem_input_file(struct thr_info *tip)
932 {
933 	list_del(&tip->head);
934 
935 	tip_release(tip);
936 
937 	close(tip->ofd);
938 	close(tip->ifd);
939 	free(tip->file_name);
940 	free(tip->devnm);
941 	free(tip);
942 }
943 
944 /**
945  * rem_input_files - Remove all input files
946  */
rem_input_files(void)947 static void rem_input_files(void)
948 {
949 	struct list_head *p, *q;
950 
951 	list_for_each_safe(p, q, &input_files) {
952 		rem_input_file(list_entry(p, struct thr_info, head));
953 	}
954 }
955 
956 /**
957  * __find_input_files - Find input files associated with this device (per cpu)
958  */
__find_input_files(struct dev_info * dip)959 static void __find_input_files(struct dev_info *dip)
960 {
961 	int cpu = 0;
962 
963 	for (;;) {
964 		char full_name[MAXPATHLEN];
965 
966 		sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu);
967 		if (access(full_name, R_OK) != 0)
968 			break;
969 
970 		add_input_file(cpu, dip->devnm, full_name);
971 		cpu++;
972 	}
973 
974 	if (!cpu) {
975 		fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm);
976 		/*NOTREACHED*/
977 	}
978 
979 	rem_input_dev(dip);
980 }
981 
982 
983 /**
984  * find_input_files - Find input files for all devices
985  */
find_input_files(void)986 static void find_input_files(void)
987 {
988 	struct list_head *p, *q;
989 
990 	list_for_each_safe(p, q, &input_devs) {
991 		__find_input_files(list_entry(p, struct dev_info, head));
992 	}
993 }
994 
995 /*
996  * ========================================================================
997  * ==== RECLAIM ROUTINES ==================================================
998  * ========================================================================
999  */
1000 
1001 /**
1002  * reap_wait_aios - Wait for and return number of outstanding AIOs
1003  *
1004  * Will return 0 if we are done
1005  */
reap_wait_aios(struct thr_info * tip)1006 static int reap_wait_aios(struct thr_info *tip)
1007 {
1008 	int naios = 0;
1009 
1010 	if (!is_reap_done(tip)) {
1011 		pthread_mutex_lock(&tip->mutex);
1012 		while (tip->naios_out == 0) {
1013 			tip->reap_wait = 1;
1014 			if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1015 				fatal("pthread_cond_wait", ERR_SYSCALL,
1016 					"nfree_current cond wait failed\n");
1017 				/*NOTREACHED*/
1018 			}
1019 		}
1020 		naios = tip->naios_out;
1021 		pthread_mutex_unlock(&tip->mutex);
1022 	}
1023 	assert(is_reap_done(tip) || naios > 0);
1024 
1025 	return is_reap_done(tip) ? 0 : naios;
1026 }
1027 
1028 /**
1029  * reclaim_ios - Reclaim AIOs completed, recycle IOCBs
1030  * @tip: Per-thread information
1031  * @naios_out: Number of AIOs we have outstanding (min)
1032  */
reclaim_ios(struct thr_info * tip,long naios_out)1033 static void reclaim_ios(struct thr_info *tip, long naios_out)
1034 {
1035 	long i, ndone;
1036 	struct io_event *evp, events[naios_out];
1037 
1038 again:
1039 	assert(naios > 0);
1040 	for (;;) {
1041 		ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL);
1042 		if (ndone > 0)
1043 			break;
1044 
1045 		if (errno && errno != EINTR) {
1046 			fatal("io_getevents", ERR_SYSCALL,
1047 				"io_getevents failed\n");
1048 			/*NOTREACHED*/
1049 		}
1050 	}
1051 	assert(0 < ndone && ndone <= naios_out);
1052 
1053 	pthread_mutex_lock(&tip->mutex);
1054 	for (i = 0, evp = events; i < ndone; i++, evp++) {
1055 		struct iocb_pkt *iocbp = evp->data;
1056 
1057                 if (evp->res != iocbp->iocb.u.c.nbytes) {
1058                         fatal(NULL, ERR_SYSCALL,
1059                               "Event failure %ld/%ld\t(%ld + %ld)\n",
1060                               (long)evp->res, (long)evp->res2,
1061                               (long)iocbp->iocb.u.c.offset / nb_sec,
1062 			      (long)iocbp->iocb.u.c.nbytes / nb_sec);
1063                         /*NOTREACHED*/
1064                 }
1065 
1066 		list_move_tail(&iocbp->head, &tip->free_iocbs);
1067 	}
1068 
1069 	tip->naios_free += ndone;
1070 	tip->naios_out -= ndone;
1071 	naios_out = minl(naios_out, tip->naios_out);
1072 
1073 	if (tip->send_wait) {
1074 		tip->send_wait = 0;
1075 		pthread_cond_signal(&tip->cond);
1076 	}
1077 	pthread_mutex_unlock(&tip->mutex);
1078 
1079 	/*
1080 	 * Short cut: If we /know/ there are some more AIOs, go handle them
1081 	 */
1082 	if (naios_out)
1083 		goto again;
1084 }
1085 
1086 /**
1087  * replay_rec - Worker thread to reclaim AIOs
1088  * @arg: Pointer to thread information
1089  */
replay_rec(void * arg)1090 static void *replay_rec(void *arg)
1091 {
1092 	long naios_out;
1093 	struct thr_info *tip = arg;
1094 
1095 	while ((naios_out = reap_wait_aios(tip)) > 0)
1096 		reclaim_ios(tip, naios_out);
1097 
1098 	assert(tip->send_done);
1099 	tip->reap_done = 1;
1100 	set_reclaim_done();
1101 
1102 	return NULL;
1103 }
1104 
1105 /*
1106  * ========================================================================
1107  * ==== REPLAY ROUTINES ===================================================
1108  * ========================================================================
1109  */
1110 
1111 /**
1112  * next_bunch - Retrieve next bunch of AIOs to process
1113  * @tip: Per-thread information
1114  * @bunch: Bunch information
1115  *
1116  * Returns TRUE if we recovered a bunch of IOs, else hit EOF
1117  */
next_bunch(struct thr_info * tip,struct io_bunch * bunch)1118 static int next_bunch(struct thr_info *tip, struct io_bunch *bunch)
1119 {
1120 	size_t count, result;
1121 
1122 	result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr));
1123 	if (result != sizeof(bunch->hdr)) {
1124 		if (result == 0)
1125 			return 0;
1126 
1127 		fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n",
1128 			(long)result);
1129 		/*NOTREACHED*/
1130 	}
1131 	assert(bunch->hdr.npkts <= BT_MAX_PKTS);
1132 
1133 	count = bunch->hdr.npkts * sizeof(struct io_pkt);
1134 	result = read(tip->ifd, &bunch->pkts, count);
1135 	if (result != count) {
1136 		fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n",
1137 			(long)result, (long)count);
1138 		/*NOTREACHED*/
1139 	}
1140 
1141 	return 1;
1142 }
1143 
1144 /**
1145  * nfree_current - Returns current number of AIOs that are free
1146  *
1147  * Will wait for available ones...
1148  *
1149  * Returns 0 if we have some condition that causes us to exit
1150  */
nfree_current(struct thr_info * tip)1151 static int nfree_current(struct thr_info *tip)
1152 {
1153 	int nfree = 0;
1154 
1155 	pthread_mutex_lock(&tip->mutex);
1156 	while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) {
1157 		tip->send_wait = 1;
1158 		if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1159 			fatal("pthread_cond_wait", ERR_SYSCALL,
1160 				"nfree_current cond wait failed\n");
1161 			/*NOTREACHED*/
1162 		}
1163 	}
1164 	pthread_mutex_unlock(&tip->mutex);
1165 
1166 	return nfree;
1167 }
1168 
1169 /**
1170  * stall - Stall for the number of nanoseconds requested
1171  *
1172  * We may be late, in which case we just return.
1173  */
stall(struct thr_info * tip,long long oclock)1174 static void stall(struct thr_info *tip, long long oclock)
1175 {
1176 	struct timespec req;
1177 	long long dreal, tclock = gettime() - rgenesis;
1178 
1179 	oclock /= acc_factor;
1180 
1181 	if (verbose > 1)
1182 		fprintf(tip->vfp, "   stall(%lld.%09lld, %lld.%09lld)\n",
1183 			du64_to_sec(oclock), du64_to_nsec(oclock),
1184 			du64_to_sec(tclock), du64_to_nsec(tclock));
1185 
1186 	while (!is_send_done(tip) && tclock < oclock) {
1187 		dreal = oclock - tclock;
1188 		req.tv_sec = dreal / (1000 * 1000 * 1000);
1189 		req.tv_nsec = dreal % (1000 * 1000 * 1000);
1190 
1191 		if (verbose > 1) {
1192 			fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n",
1193 				(long long)req.tv_sec,
1194 				(long long)req.tv_nsec);
1195 		}
1196 
1197 		if (nanosleep(&req, NULL) < 0 && signal_done)
1198 			break;
1199 
1200 		tclock = gettime() - rgenesis;
1201 	}
1202 }
1203 
1204 /**
1205  * iocbs_map - Map a set of AIOs onto a set of IOCBs
1206  * @tip: Per-thread information
1207  * @list: List of AIOs created
1208  * @pkts: AIOs to map
1209  * @ntodo: Number of AIOs to map
1210  */
iocbs_map(struct thr_info * tip,struct iocb ** list,struct io_pkt * pkts,int ntodo)1211 static void iocbs_map(struct thr_info *tip, struct iocb **list,
1212 					     struct io_pkt *pkts, int ntodo)
1213 {
1214 	int i;
1215 	struct io_pkt *pkt;
1216 
1217 	assert(0 < ntodo && ntodo <= naios);
1218 
1219 	pthread_mutex_lock(&tip->mutex);
1220 	assert(ntodo <= list_len(&tip->free_iocbs));
1221 	for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) {
1222 		__u32 rw = pkt->rw;
1223 		struct iocb_pkt *iocbp;
1224 
1225 		if (!pkt->rw && !write_enabled)
1226 			rw = 1;
1227 
1228 		if (verbose > 1)
1229 			fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n",
1230 				(unsigned long long)pkt->sector,
1231 				(unsigned long long)pkt->nbytes / nb_sec,
1232 				rw ? 'R' : 'W',
1233 				(rw == 1 && pkt->rw == 0) ? '!' : ' ');
1234 
1235 		iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head);
1236 		iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec);
1237 
1238 		list_move_tail(&iocbp->head, &tip->used_iocbs);
1239 		list[i] = &iocbp->iocb;
1240 	}
1241 
1242 	tip->naios_free -= ntodo;
1243 	assert(tip->naios_free >= 0);
1244 	pthread_mutex_unlock(&tip->mutex);
1245 }
1246 
1247 /**
1248  * process_bunch - Process a bunch of requests
1249  * @tip: Per-thread information
1250  * @bunch: Bunch to process
1251  */
process_bunch(struct thr_info * tip,struct io_bunch * bunch)1252 static void process_bunch(struct thr_info *tip, struct io_bunch *bunch)
1253 {
1254 	__u64 i = 0;
1255 	struct iocb *list[bunch->hdr.npkts];
1256 
1257 	assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS);
1258 	while (!is_send_done(tip) && (i < bunch->hdr.npkts)) {
1259 		long ndone;
1260 		int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i);
1261 
1262 		assert(0 < ntodo && ntodo <= naios);
1263 		iocbs_map(tip, list, &bunch->pkts[i], ntodo);
1264 		if (!no_stalls)
1265 			stall(tip, bunch->hdr.time_stamp - genesis);
1266 
1267 		if (ntodo) {
1268 			if (verbose > 1)
1269 				fprintf(tip->vfp, "submit(%d)\n", ntodo);
1270 			ndone = io_submit(tip->ctx, ntodo, list);
1271 			if (ndone != (long)ntodo) {
1272 				fatal("io_submit", ERR_SYSCALL,
1273 					"%d: io_submit(%d:%ld) failed (%s)\n",
1274 					tip->cpu, ntodo, ndone,
1275 					strerror(labs(ndone)));
1276 				/*NOTREACHED*/
1277 			}
1278 
1279 			pthread_mutex_lock(&tip->mutex);
1280 			tip->naios_out += ndone;
1281 			assert(tip->naios_out <= naios);
1282 			if (tip->reap_wait) {
1283 				tip->reap_wait = 0;
1284 				pthread_cond_signal(&tip->cond);
1285 			}
1286 			pthread_mutex_unlock(&tip->mutex);
1287 
1288 			i += ndone;
1289 			assert(i <= bunch->hdr.npkts);
1290 		}
1291 	}
1292 }
1293 
1294 /**
1295  * reset_input_file - Reset the input file for the next iteration
1296  * @tip: Thread information
1297  *
1298  * We also do a dummy read of the file header to get us to the first bunch.
1299  */
reset_input_file(struct thr_info * tip)1300 static void reset_input_file(struct thr_info *tip)
1301 {
1302 	struct io_file_hdr hdr;
1303 
1304 	lseek(tip->ifd, 0, 0);
1305 
1306 	if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
1307 		fatal(tip->file_name, ERR_ARGS, "Header reread failed\n");
1308 		/*NOTREACHED*/
1309 	}
1310 }
1311 
1312 /**
1313  * replay_sub - Worker thread to submit AIOs that are being replayed
1314  */
replay_sub(void * arg)1315 static void *replay_sub(void *arg)
1316 {
1317 	char path[MAXPATHLEN];
1318 	struct io_bunch bunch;
1319 	struct thr_info *tip = arg;
1320 	int oflags;
1321 
1322 	pin_to_cpu(tip);
1323 
1324 	sprintf(path, "/dev/%s", map_dev(tip->devnm));
1325 
1326 #ifdef O_NOATIME
1327 	oflags = O_NOATIME;
1328 #else
1329 	oflags = 0;
1330 #endif
1331 	tip->ofd = open(path, O_RDWR | O_DIRECT | oflags);
1332 	if (tip->ofd < 0) {
1333 		fatal(path, ERR_SYSCALL, "Failed device open\n");
1334 		/*NOTREACHED*/
1335 	}
1336 
1337 	set_replay_ready();
1338 	while (!is_send_done(tip) && tip->iterations--) {
1339 		wait_iter_start();
1340 		if (verbose > 1)
1341 			fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations);
1342 		while (!is_send_done(tip) && next_bunch(tip, &bunch))
1343 			process_bunch(tip, &bunch);
1344 		set_iter_done();
1345 		reset_input_file(tip);
1346 	}
1347 	tip->send_done = 1;
1348 	set_replay_done();
1349 
1350 	return NULL;
1351 }
1352 
1353 /*
1354  * ========================================================================
1355  * ==== COMMAND LINE ARGUMENT HANDLING ====================================
1356  * ========================================================================
1357  */
1358 
1359 static char usage_str[] = 						\
1360         "\n"								\
1361         "\t[ -c <cpus> : --cpus=<cpus>           ] Default: 1\n"        \
1362         "\t[ -d <dir>  : --input-directory=<dir> ] Default: .\n"        \
1363 	"\t[ -F        : --find-records          ] Default: Off\n"	\
1364         "\t[ -h        : --help                  ] Default: Off\n"      \
1365         "\t[ -i <base> : --input-base=<base>     ] Default: replay\n"   \
1366         "\t[ -I <iters>: --iterations=<iters>    ] Default: 1\n"        \
1367         "\t[ -M <file> : --map-devs=<file>       ] Default: None\n"     \
1368         "\t[ -N        : --no-stalls             ] Default: Off\n"      \
1369         "\t[ -x        : --acc-factor            ] Default: 1\n"	\
1370         "\t[ -v        : --verbose               ] Default: Off\n"      \
1371         "\t[ -V        : --version               ] Default: Off\n"      \
1372         "\t[ -W        : --write-enable          ] Default: Off\n"      \
1373         "\t<dev...>                                Default: None\n"     \
1374         "\n";
1375 
1376 #define S_OPTS	"c:d:Fhi:I:M:Nx:t:vVW"
1377 static struct option l_opts[] = {
1378 	{
1379 		.name = "cpus",
1380 		.has_arg = required_argument,
1381 		.flag = NULL,
1382 		.val = 'c'
1383 	},
1384 	{
1385 		.name = "input-directory",
1386 		.has_arg = required_argument,
1387 		.flag = NULL,
1388 		.val = 'd'
1389 	},
1390 	{
1391 		.name = "find-records",
1392 		.has_arg = no_argument,
1393 		.flag = NULL,
1394 		.val = 'F'
1395 	},
1396 	{
1397 		.name = "help",
1398 		.has_arg = no_argument,
1399 		.flag = NULL,
1400 		.val = 'h'
1401 	},
1402 	{
1403 		.name = "input-base",
1404 		.has_arg = required_argument,
1405 		.flag = NULL,
1406 		.val = 'i'
1407 	},
1408 	{
1409 		.name = "iterations",
1410 		.has_arg = required_argument,
1411 		.flag = NULL,
1412 		.val = 'I'
1413 	},
1414 	{
1415 		.name = "map-devs",
1416 		.has_arg = required_argument,
1417 		.flag = NULL,
1418 		.val = 'M'
1419 	},
1420 	{
1421 		.name = "no-stalls",
1422 		.has_arg = no_argument,
1423 		.flag = NULL,
1424 		.val = 'N'
1425 	},
1426 	{
1427 		.name = "acc-factor",
1428 		.has_arg = required_argument,
1429 		.flag = NULL,
1430 		.val = 'x'
1431 	},
1432 	{
1433 		.name = "verbose",
1434 		.has_arg = no_argument,
1435 		.flag = NULL,
1436 		.val = 'v'
1437 	},
1438 	{
1439 		.name = "version",
1440 		.has_arg = no_argument,
1441 		.flag = NULL,
1442 		.val = 'V'
1443 	},
1444 	{
1445 		.name = "write-enable",
1446 		.has_arg = no_argument,
1447 		.flag = NULL,
1448 		.val = 'W'
1449 	},
1450 	{
1451 		.name = NULL
1452 	}
1453 };
1454 
1455 /**
1456  * handle_args: Parse passed in argument list
1457  * @argc: Number of arguments in argv
1458  * @argv: Arguments passed in
1459  *
1460  * Does rudimentary parameter verification as well.
1461  */
handle_args(int argc,char * argv[])1462 static void handle_args(int argc, char *argv[])
1463 {
1464 	int c;
1465 	int r;
1466 
1467 	while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1468 		switch (c) {
1469 		case 'c':
1470 			cpus_to_use = atoi(optarg);
1471 			if (cpus_to_use <= 0 || cpus_to_use > ncpus) {
1472 				fatal(NULL, ERR_ARGS,
1473 				      "Invalid number of cpus %d (0<x<%d)\n",
1474 				      cpus_to_use, ncpus);
1475 				/*NOTREACHED*/
1476 			}
1477 			break;
1478 
1479 		case 'd':
1480 			idir = optarg;
1481 			if (access(idir, R_OK | X_OK) != 0) {
1482 				fatal(idir, ERR_ARGS,
1483 				      "Invalid input directory specified\n");
1484 				/*NOTREACHED*/
1485 			}
1486 			break;
1487 
1488 		case 'F':
1489 			find_records = 1;
1490 			break;
1491 
1492 		case 'h':
1493 			usage();
1494 			exit(0);
1495 			/*NOTREACHED*/
1496 
1497 		case 'i':
1498 			ibase = optarg;
1499 			break;
1500 
1501 		case 'I':
1502 			def_iterations = atoi(optarg);
1503 			if (def_iterations <= 0) {
1504 				fprintf(stderr,
1505 					"Invalid number of iterations %d\n",
1506 					def_iterations);
1507 				exit(ERR_ARGS);
1508 				/*NOTREACHED*/
1509 			}
1510 			break;
1511 
1512 		case 'M':
1513 			read_map_devs(optarg);
1514 			break;
1515 
1516 		case 'N':
1517 			no_stalls = 1;
1518 			break;
1519 
1520 		case 'x':
1521 			r = sscanf(optarg,"%u",&acc_factor);
1522 			if (r!=1) {
1523 				fprintf(stderr,
1524 					"Invalid acceleration factor\n");
1525 				exit(ERR_ARGS);
1526 				/*NOTREACHED*/
1527 			}
1528 			break;
1529 
1530 		case 'V':
1531 			fprintf(stderr, "btreplay -- version %s\n",
1532 				my_btversion);
1533 			fprintf(stderr, "            Built on %s\n",
1534 				build_date);
1535 			exit(0);
1536 			/*NOTREACHED*/
1537 
1538 		case 'v':
1539 			verbose++;
1540 			break;
1541 
1542 		case 'W':
1543 			write_enabled = 1;
1544 			break;
1545 
1546 		default:
1547 			usage();
1548 			fatal(NULL, ERR_ARGS,
1549 			      "Invalid command line argument %c\n", c);
1550 			/*NOTREACHED*/
1551 		}
1552 	}
1553 
1554 	while (optind < argc)
1555 		add_input_dev(argv[optind++]);
1556 
1557 	if (find_records)
1558 		find_input_devs(idir);
1559 
1560 	if (list_len(&input_devs) == 0) {
1561 		fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n");
1562 		/*NOTREACHED*/
1563 	}
1564 
1565 	if (cpus_to_use < 0)
1566 		cpus_to_use = ncpus;
1567 }
1568 
1569 /*
1570  * ========================================================================
1571  * ==== MAIN ROUTINE ======================================================
1572  * ========================================================================
1573  */
1574 
1575 /**
1576  * set_signal_done - Signal handler, catches signals & sets signal_done
1577  */
set_signal_done(int signum)1578 static void set_signal_done(__attribute__((__unused__))int signum)
1579 {
1580 	signal_done = 1;
1581 }
1582 
1583 /**
1584  * main -
1585  * @argc: Number of arguments
1586  * @argv: Array of arguments
1587  */
main(int argc,char * argv[])1588 int main(int argc, char *argv[])
1589 {
1590 	int i;
1591 	struct list_head *p;
1592 
1593 	pgsize = getpagesize();
1594 	assert(pgsize > 0);
1595 
1596 	setup_signal(SIGINT, set_signal_done);
1597 	setup_signal(SIGTERM, set_signal_done);
1598 
1599 	get_ncpus();
1600 	handle_args(argc, argv);
1601 	find_input_files();
1602 
1603 	nfiles = list_len(&input_files);
1604 	__list_for_each(p, &input_files) {
1605 		tip_init(list_entry(p, struct thr_info, head));
1606 	}
1607 
1608 	wait_replays_ready();
1609 	for (i = 0; i < def_iterations; i++) {
1610 		rgenesis = gettime();
1611 		start_iter();
1612 		if (verbose)
1613 			fprintf(stderr, "I");
1614 		wait_iters_done();
1615 	}
1616 
1617 	wait_replays_done();
1618 	wait_reclaims_done();
1619 
1620 	if (verbose)
1621 		fprintf(stderr, "\n");
1622 
1623 	rem_input_files();
1624 	release_map_devs();
1625 
1626 	return 0;
1627 }
1628