1 /*
2  * Blktrace replay utility - Play traces back
3  *
4  * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <libaio.h>
25 #include <pthread.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <sys/time.h>
36 #include <sys/types.h>
37 #include <dirent.h>
38 #include <stdarg.h>
39 
40 #if !defined(_GNU_SOURCE)
41 #	define _GNU_SOURCE
42 #endif
43 #include <getopt.h>
44 
45 #include "list.h"
46 #include "btrecord.h"
47 
48 /*
49  * ========================================================================
50  * ==== STRUCTURE DEFINITIONS =============================================
51  * ========================================================================
52  */
53 
54 /**
55  * Each device map has one of these:
56  *
57  * @head:	Linked on to map_devs
58  * @from_dev:	Device name as seen on recorded system
59  * @to_dev:	Device name to be used on replay system
60  */
61 struct map_dev {
62 	struct list_head head;
63 	char *from_dev, *to_dev;
64 };
65 
66 /**
67  * Each device name specified has one of these (until threads are created)
68  *
69  * @head: 	Linked onto input_devs
70  * @devnm: 	Device name -- 'sd*'
71  */
72 struct dev_info {
73 	struct list_head head;
74 	char *devnm;
75 };
76 
77 /*
78  * Per input file information
79  *
80  * @head: 	Used to link up on input_files
81  * @free_iocbs: List of free iocb's available for use
82  * @used_iocbs: List of iocb's currently outstanding
83  * @mutex: 	Mutex used with condition variable to protect volatile values
84  * @cond: 	Condition variable used when waiting on a volatile value change
85  * @naios_out: 	Current number of AIOs outstanding on this context
86  * @naios_free: Number of AIOs on the free list (short cut for list_len)
87  * @send_wait: 	Boolean: When true, the sub thread is waiting on free IOCBs
88  * @reap_wait: 	Boolean: When true, the rec thread is waiting on used IOCBs
89  * @send_done: 	Boolean: When true, the sub thread has completed work
90  * @reap_done: 	Boolean: When true, the rec thread has completed work
91  * @sub_thread: Thread used to submit IOs.
92  * @rec_thread: Thread used to reclaim IOs.
93  * @ctx: 	IO context
94  * @devnm: 	Copy of the device name being managed by this thread
95  * @file_name: 	Full name of the input file
96  * @cpu: 	CPU this thread is pinned to
97  * @ifd: 	Input file descriptor
98  * @ofd: 	Output file descriptor
99  * @iterations: Remaining iterations to process
100  * @vfp:	For verbose dumping of actions performed
101  */
102 struct thr_info {
103 	struct list_head head, free_iocbs, used_iocbs;
104 	pthread_mutex_t mutex;
105 	pthread_cond_t cond;
106 	volatile long naios_out, naios_free;
107 	volatile int send_wait, reap_wait, send_done, reap_done;
108 	pthread_t sub_thread, rec_thread;
109 	io_context_t ctx;
110 	char *devnm, *file_name;
111 	int cpu, ifd, ofd, iterations;
112 	FILE *vfp;
113 };
114 
115 /*
116  * Every Asynchronous IO used has one of these (naios per file/device).
117  *
118  * @iocb:	IOCB sent down via io_submit
119  * @head:	Linked onto file_list.free_iocbs or file_list.used_iocbs
120  * @tip:	Pointer to per-thread information this IO is associated with
121  * @nbytes:	Number of bytes in buffer associated with iocb
122  */
123 struct iocb_pkt {
124 	struct iocb iocb;
125 	struct list_head head;
126 	struct thr_info *tip;
127 	int nbytes;
128 };
129 
130 /*
131  * ========================================================================
132  * ==== GLOBAL VARIABLES ==================================================
133  * ========================================================================
134  */
135 
136 static volatile int signal_done = 0;	// Boolean: Signal'ed, need to quit
137 
138 static char *ibase = "replay";		// Input base name
139 static char *idir = ".";		// Input directory base
140 static int cpus_to_use = -1;		// Number of CPUs to use
141 static int def_iterations = 1;		// Default number of iterations
142 static int naios = 512;			// Number of AIOs per thread
143 static int ncpus = 0;			// Number of CPUs in the system
144 static int verbose = 0;			// Boolean: Output some extra info
145 static int write_enabled = 0;		// Boolean: Enable writing
146 static __u64 genesis = ~0;		// Earliest time seen
147 static __u64 rgenesis;			// Our start time
148 static size_t pgsize;			// System Page size
149 static int nb_sec = 512;		// Number of bytes per sector
150 static LIST_HEAD(input_devs);		// List of devices to handle
151 static LIST_HEAD(input_files);		// List of input files to handle
152 static LIST_HEAD(map_devs);		// List of device maps
153 static int nfiles = 0;			// Number of files to handle
154 static int no_stalls = 0;		// Boolean: Disable pre-stalls
155 static unsigned acc_factor = 1;		// Int: Acceleration factor
156 static int find_records = 0;		// Boolean: Find record files auto
157 
158 /*
159  * Variables managed under control of condition variables.
160  *
161  * n_reclaims_done: 	Counts number of reclaim threads that have completed.
162  * n_replays_done:	Counts number of replay threads that have completed.
163  * n_replays_ready:	Counts number of replay threads ready to start.
164  * n_iters_done:	Counts number of replay threads done one iteration.
165  * iter_start:		Starts an iteration for the replay threads.
166  */
167 static volatile int n_reclaims_done = 0;
168 static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER;
169 static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER;
170 
171 static volatile int n_replays_done = 0;
172 static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER;
173 static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER;
174 
175 static volatile int n_replays_ready = 0;
176 static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER;
177 static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER;
178 
179 static volatile int n_iters_done = 0;
180 static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER;
181 static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER;
182 
183 static volatile int iter_start = 0;
184 static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER;
185 static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER;
186 
187 /*
188  * ========================================================================
189  * ==== FORWARD REFERENECES ===============================================
190  * ========================================================================
191  */
192 
193 static void *replay_sub(void *arg);
194 static void *replay_rec(void *arg);
195 static char usage_str[];
196 
197 /*
198  * ========================================================================
199  * ==== INLINE ROUTINES ===================================================
200  * ========================================================================
201  */
202 
203 /*
204  * The 'fatal' macro will output a perror message (if errstring is !NULL)
205  * and display a string (with variable arguments) and then exit with the
206  * specified exit value.
207  */
208 #define ERR_ARGS			1
209 #define ERR_SYSCALL			2
fatal(const char * errstring,const int exitval,const char * fmt,...)210 static inline void fatal(const char *errstring, const int exitval,
211 			 const char *fmt, ...)
212 {
213 	va_list ap;
214 
215 	if (errstring)
216 		perror(errstring);
217 
218 	va_start(ap, fmt);
219 	vfprintf(stderr, fmt, ap);
220 	va_end(ap);
221 
222 	exit(exitval);
223 	/*NOTREACHED*/
224 }
225 
du64_to_sec(__u64 du64)226 static inline long long unsigned du64_to_sec(__u64 du64)
227 {
228 	return (long long unsigned)du64 / (1000 * 1000 * 1000);
229 }
230 
du64_to_nsec(__u64 du64)231 static inline long long unsigned du64_to_nsec(__u64 du64)
232 {
233 	return llabs((long long)du64) % (1000 * 1000 * 1000);
234 }
235 
236 /**
237  * min - Return minimum of two integers
238  */
min(int a,int b)239 static inline int min(int a, int b)
240 {
241 	return a < b ? a : b;
242 }
243 
244 /**
245  * minl - Return minimum of two longs
246  */
minl(long a,long b)247 static inline long minl(long a, long b)
248 {
249 	return a < b ? a : b;
250 }
251 
252 /**
253  * usage - Display usage string and version
254  */
usage(void)255 static inline void usage(void)
256 {
257 	fprintf(stderr, "Usage: btreplay -- version %s\n%s",
258 		my_btversion, usage_str);
259 }
260 
261 /**
262  * is_send_done - Returns true if sender should quit early
263  * @tip: Per-thread information
264  */
is_send_done(struct thr_info * tip)265 static inline int is_send_done(struct thr_info *tip)
266 {
267 	return signal_done || tip->send_done;
268 }
269 
270 /**
271  * is_reap_done - Returns true if reaper should quit early
272  * @tip: Per-thread information
273  */
is_reap_done(struct thr_info * tip)274 static inline int is_reap_done(struct thr_info *tip)
275 {
276 	return signal_done || (tip->send_done && tip->naios_out == 0);
277 }
278 
279 /**
280  * ts2ns - Convert timespec values to a nanosecond value
281  */
282 #define NS_TICKS		((__u64)1000 * (__u64)1000 * (__u64)1000)
ts2ns(struct timespec * ts)283 static inline __u64 ts2ns(struct timespec *ts)
284 {
285 	return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec);
286 }
287 
288 /**
289  * ts2ns - Convert timeval values to a nanosecond value
290  */
tv2ns(struct timeval * tp)291 static inline __u64 tv2ns(struct timeval *tp)
292 {
293 	return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000);
294 }
295 
296 /**
297  * touch_memory - Force physical memory to be allocating it
298  *
299  * For malloc()ed memory we need to /touch/ it to make it really
300  * exist. Otherwise, for write's (to storage) things may not work
301  * as planned - we see Linux just use a single area to /read/ from
302  * (as there isn't any memory that has been associated with the
303  * allocated virtual addresses yet).
304  */
touch_memory(char * buf,size_t bsize)305 static inline void touch_memory(char *buf, size_t bsize)
306 {
307 #if defined(PREP_BUFS)
308 	memset(buf, 0, bsize);
309 #else
310 	size_t i;
311 
312 	for (i = 0; i < bsize; i += pgsize)
313 		buf[i] = 0;
314 #endif
315 }
316 
317 /**
318  * buf_alloc - Returns a page-aligned buffer of the specified size
319  * @nbytes: Number of bytes to allocate
320  */
buf_alloc(size_t nbytes)321 static inline void *buf_alloc(size_t nbytes)
322 {
323 	void *buf;
324 
325 	if (posix_memalign(&buf, pgsize, nbytes)) {
326 		fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n");
327 		/*NOTREACHED*/
328 	}
329 
330 	return buf;
331 }
332 
333 /**
334  * gettime - Returns current time
335  */
gettime(void)336 static inline __u64 gettime(void)
337 {
338 	static int use_clock_gettime = -1;		// Which clock to use
339 
340 	if (use_clock_gettime < 0) {
341 		use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0;
342 		if (use_clock_gettime) {
343 			struct timespec ts = {
344 				.tv_sec = 0,
345 				.tv_nsec = 0
346 			};
347 			clock_settime(CLOCK_MONOTONIC, &ts);
348 		}
349 	}
350 
351 	if (use_clock_gettime) {
352 		struct timespec ts;
353 		clock_gettime(CLOCK_MONOTONIC, &ts);
354 		return ts2ns(&ts);
355 	}
356 	else {
357 		struct timeval tp;
358 		gettimeofday(&tp, NULL);
359 		return tv2ns(&tp);
360 	}
361 }
362 
363 /**
364  * setup_signal - Set up a signal handler for the specified signum
365  */
setup_signal(int signum,sighandler_t handler)366 static inline void setup_signal(int signum, sighandler_t handler)
367 {
368 	if (signal(signum, handler) == SIG_ERR) {
369 		fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n",
370 			signum);
371 		/*NOTREACHED*/
372 	}
373 }
374 
375 /*
376  * ========================================================================
377  * ==== CONDITION VARIABLE ROUTINES =======================================
378  * ========================================================================
379  */
380 
381 /**
382  * __set_cv - Increments a variable under condition variable control.
383  * @pmp: 	Pointer to the associated mutex
384  * @pcp: 	Pointer to the associated condition variable
385  * @vp: 	Pointer to the variable being incremented
386  * @mxv: 	Max value for variable (Used only when ASSERTS are on)
387  */
__set_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)388 static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
389 			    volatile int *vp,
390 			    __attribute__((__unused__))int mxv)
391 {
392 	pthread_mutex_lock(pmp);
393 	assert(*vp < mxv);
394 	*vp += 1;
395 	pthread_cond_signal(pcp);
396 	pthread_mutex_unlock(pmp);
397 }
398 
399 /**
400  * __wait_cv - Waits for a variable under cond var control to hit a value
401  * @pmp: 	Pointer to the associated mutex
402  * @pcp: 	Pointer to the associated condition variable
403  * @vp: 	Pointer to the variable being incremented
404  * @mxv: 	Value to wait for
405  */
__wait_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)406 static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
407 			     volatile int *vp, int mxv)
408 {
409 	pthread_mutex_lock(pmp);
410 	while (*vp < mxv)
411 		pthread_cond_wait(pcp, pmp);
412 	*vp = 0;
413 	pthread_mutex_unlock(pmp);
414 }
415 
set_reclaim_done(void)416 static inline void set_reclaim_done(void)
417 {
418 	__set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
419 		 nfiles);
420 }
421 
wait_reclaims_done(void)422 static inline void wait_reclaims_done(void)
423 {
424 	__wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
425 		  nfiles);
426 }
427 
set_replay_ready(void)428 static inline void set_replay_ready(void)
429 {
430 	__set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
431 		 nfiles);
432 }
433 
wait_replays_ready(void)434 static inline void wait_replays_ready(void)
435 {
436 	__wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
437 		  nfiles);
438 }
439 
set_replay_done(void)440 static inline void set_replay_done(void)
441 {
442 	__set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
443 		nfiles);
444 }
445 
wait_replays_done(void)446 static inline void wait_replays_done(void)
447 {
448 	__wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
449 		  nfiles);
450 }
451 
set_iter_done(void)452 static inline void set_iter_done(void)
453 {
454 	__set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
455 		nfiles);
456 }
457 
wait_iters_done(void)458 static inline void wait_iters_done(void)
459 {
460 	__wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
461 		  nfiles);
462 }
463 
464 /**
465  * wait_iter_start - Wait for an iteration to start
466  *
467  * This is /slightly/ different: we are waiting for a value to become
468  * non-zero, and then we decrement it and go on.
469  */
wait_iter_start(void)470 static inline void wait_iter_start(void)
471 {
472 	pthread_mutex_lock(&iter_start_mutex);
473 	while (iter_start == 0)
474 		pthread_cond_wait(&iter_start_cond, &iter_start_mutex);
475 	assert(1 <= iter_start && iter_start <= nfiles);
476 	iter_start--;
477 	pthread_mutex_unlock(&iter_start_mutex);
478 }
479 
480 /**
481  * start_iter - Start an iteration at the replay thread level
482  */
start_iter(void)483 static inline void start_iter(void)
484 {
485 	pthread_mutex_lock(&iter_start_mutex);
486 	assert(iter_start == 0);
487 	iter_start = nfiles;
488 	pthread_cond_broadcast(&iter_start_cond);
489 	pthread_mutex_unlock(&iter_start_mutex);
490 }
491 
492 /*
493  * ========================================================================
494  * ==== CPU RELATED ROUTINES ==============================================
495  * ========================================================================
496  */
497 
498 /**
499  * get_ncpus - Sets up the global 'ncpus' value
500  */
get_ncpus(void)501 static void get_ncpus(void)
502 {
503 #ifdef _SC_NPROCESSORS_ONLN
504 	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
505 #else
506 	int nrcpus = 4096;
507 	cpu_set_t * cpus;
508 
509 realloc:
510 	cpus = CPU_ALLOC(nrcpus);
511 	size = CPU_ALLOC_SIZE(nrcpus);
512 	CPU_ZERO_S(size, cpus);
513 
514 	if (sched_getaffinity(0, size, cpus)) {
515 		if( errno == EINVAL && nrcpus < (4096<<4) ) {
516 			CPU_FREE(cpus);
517 			nrcpus <<= 1;
518 			goto realloc;
519 		}
520 		fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n");
521 		/*NOTREACHED*/
522 	}
523 
524 	ncpus = -1;
525 	for (last_cpu = 0; last_cpu < CPU_SETSIZE && CPU_ISSET(last_cpu, &cpus); last_cpu++)
526 		if (CPU_ISSET( last_cpu, &cpus) )
527 			ncpus = last_cpu;
528 	ncpus++;
529 	CPU_FREE(cpus);
530 #endif
531 	if (ncpus == 0) {
532 		fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n");
533 		/*NOTREACHED*/
534 	}
535 }
536 
537 /**
538  * pin_to_cpu - Pin this thread to a specific CPU
539  * @tip: Thread information
540  */
pin_to_cpu(struct thr_info * tip)541 static void pin_to_cpu(struct thr_info *tip)
542 {
543 	cpu_set_t *cpus;
544 	size_t size;
545 
546 	cpus = CPU_ALLOC(ncpus);
547 	size = CPU_ALLOC_SIZE(ncpus);
548 
549 	assert(0 <= tip->cpu && tip->cpu < ncpus);
550 
551 	CPU_ZERO_S(size, cpus);
552 	CPU_SET_S(tip->cpu, size, cpus);
553 	if (sched_setaffinity(0, size, cpus)) {
554 		fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n");
555 		/*NOTREACHED*/
556 	}
557 	assert(tip->cpu == sched_getcpu());
558 
559 	if (verbose > 1) {
560 		int i;
561 		cpu_set_t *now = CPU_ALLOC(ncpus);
562 
563 		(void)sched_getaffinity(0, size, now);
564 		fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu);
565 		for (i = 0; i < ncpus; i++)
566 			fprintf(tip->vfp, "%1d", CPU_ISSET_S(i, size, now));
567 		fprintf(tip->vfp, "\n");
568 	}
569 }
570 
571 /*
572  * ========================================================================
573  * ==== INPUT DEVICE HANDLERS =============================================
574  * ========================================================================
575  */
576 
577 /**
578  * add_input_dev - Add a device ('sd*') to the list of devices to handle
579  */
add_input_dev(char * devnm)580 static void add_input_dev(char *devnm)
581 {
582 	struct list_head *p;
583 	struct dev_info *dip;
584 
585 	__list_for_each(p, &input_devs) {
586 		dip = list_entry(p, struct dev_info, head);
587 		if (strcmp(dip->devnm, devnm) == 0)
588 			return;
589 	}
590 
591 	dip = malloc(sizeof(*dip));
592 	dip->devnm = strdup(devnm);
593 	list_add_tail(&dip->head, &input_devs);
594 }
595 
596 /**
597  * rem_input_dev - Remove resources associated with this device
598  */
rem_input_dev(struct dev_info * dip)599 static void rem_input_dev(struct dev_info *dip)
600 {
601 	list_del(&dip->head);
602 	free(dip->devnm);
603 	free(dip);
604 }
605 
find_input_devs(char * idir)606 static void find_input_devs(char *idir)
607 {
608 	struct dirent *ent;
609 	DIR *dir = opendir(idir);
610 
611 	if (dir == NULL) {
612 		fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
613 		/*NOTREACHED*/
614 	}
615 
616 	while ((ent = readdir(dir)) != NULL) {
617 		char *p, *dsf;
618 
619 		if (strstr(ent->d_name, ".replay.") == NULL)
620 			continue;
621 
622 		dsf = strdup(ent->d_name);
623 		p = index(dsf, '.');
624 		assert(p != NULL);
625 		*p = '\0';
626 		add_input_dev(dsf);
627 		free(dsf);
628 	}
629 
630 	closedir(dir);
631 }
632 
633 /*
634  * ========================================================================
635  * ==== MAP DEVICE INTERFACES =============================================
636  * ========================================================================
637  */
638 
639 /**
640  * read_map_devs - Read in a set of device mapping from the provided file.
641  * @file_name:	File containing device maps
642  *
643  * We support the notion of multiple such files being specifed on the cmd line
644  */
read_map_devs(char * file_name)645 static void read_map_devs(char *file_name)
646 {
647 	FILE *fp;
648 	char from_dev[256], to_dev[256];
649 
650 	fp = fopen(file_name, "r");
651 	if (!fp) {
652 		fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n");
653 		/*NOTREACHED*/
654 	}
655 
656 	while (fscanf(fp, "%s %s", from_dev, to_dev) == 2) {
657 		struct map_dev *mdp = malloc(sizeof(*mdp));
658 
659 		mdp->from_dev = from_dev;
660 		mdp->to_dev = to_dev;
661 		list_add_tail(&mdp->head, &map_devs);
662 	}
663 
664 	fclose(fp);
665 }
666 
667 /**
668  * release_map_devs - Release resources associated with device mappings.
669  */
release_map_devs(void)670 static void release_map_devs(void)
671 {
672 	struct list_head *p, *q;
673 
674 	list_for_each_safe(p, q, &map_devs) {
675 		struct map_dev *mdp = list_entry(p, struct map_dev, head);
676 
677 		list_del(&mdp->head);
678 
679 		free(mdp->from_dev);
680 		free(mdp->to_dev);
681 		free(mdp);
682 	}
683 }
684 
685 /**
686  * map_dev - Return the mapped device for that specified
687  * @from_dev:	Device name as seen on recorded system
688  *
689  * Note: If there is no such mapping, we return the same name.
690  */
map_dev(char * from_dev)691 static char *map_dev(char *from_dev)
692 {
693 	struct list_head *p;
694 
695 	__list_for_each(p, &map_devs) {
696 		struct map_dev *mdp = list_entry(p, struct map_dev, head);
697 
698 		if (strcmp(from_dev, mdp->from_dev) == 0)
699 			return mdp->to_dev;
700 	}
701 
702 	return from_dev;
703 }
704 
705 /*
706  * ========================================================================
707  * ==== IOCB MANAGEMENT ROUTINES ==========================================
708  * ========================================================================
709  */
710 
711 /**
712  * iocb_init - Initialize the fields of an IOCB
713  * @tip: Per-thread information
714  * iocbp: IOCB pointer to update
715  */
iocb_init(struct thr_info * tip,struct iocb_pkt * iocbp)716 static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp)
717 {
718 	iocbp->tip = tip;
719 	iocbp->nbytes = 0;
720 	iocbp->iocb.u.c.buf = NULL;
721 }
722 
723 /**
724  * iocb_setup - Set up an iocb with this AIOs information
725  * @iocbp: IOCB pointer to update
726  * @rw: Direction (0 == write, 1 == read)
727  * @n: Number of bytes to transfer
728  * @off: Offset (in bytes)
729  */
iocb_setup(struct iocb_pkt * iocbp,int rw,int n,long long off)730 static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off)
731 {
732 	char *buf;
733 	struct iocb *iop = &iocbp->iocb;
734 
735 	assert(rw == 0 || rw == 1);
736 	assert(0 < n && (n % nb_sec) == 0);
737 	assert(0 <= off);
738 
739 	if (iocbp->nbytes) {
740 		if (iocbp->nbytes >= n) {
741 			buf = iop->u.c.buf;
742 			goto prep;
743 		}
744 
745 		assert(iop->u.c.buf);
746 		free(iop->u.c.buf);
747 	}
748 
749 	buf = buf_alloc(n);
750 	iocbp->nbytes = n;
751 
752 prep:
753 	if (rw)
754 		io_prep_pread(iop, iocbp->tip->ofd, buf, n, off);
755 	else {
756 		assert(write_enabled);
757 		io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off);
758 		touch_memory(buf, n);
759 	}
760 
761 	iop->data = iocbp;
762 }
763 
764 /*
765  * ========================================================================
766  * ==== PER-THREAD SET UP & TEAR DOWN =====================================
767  * ========================================================================
768  */
769 
770 /**
771  * tip_init - Per thread initialization function
772  */
tip_init(struct thr_info * tip)773 static void tip_init(struct thr_info *tip)
774 {
775 	int i;
776 
777 	INIT_LIST_HEAD(&tip->free_iocbs);
778 	INIT_LIST_HEAD(&tip->used_iocbs);
779 
780 	pthread_mutex_init(&tip->mutex, NULL);
781 	pthread_cond_init(&tip->cond, NULL);
782 
783 	if (io_setup(naios, &tip->ctx)) {
784 		fatal("io_setup", ERR_SYSCALL, "io_setup failed\n");
785 		/*NOTREACHED*/
786 	}
787 
788 	tip->ofd = -1;
789 	tip->naios_out = 0;
790 	tip->send_done = tip->reap_done = 0;
791 	tip->send_wait = tip->reap_wait = 0;
792 
793 	memset(&tip->sub_thread, 0, sizeof(tip->sub_thread));
794 	memset(&tip->rec_thread, 0, sizeof(tip->rec_thread));
795 
796 	for (i = 0; i < naios; i++) {
797 		struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp));
798 
799 		iocb_init(tip, iocbp);
800 		list_add_tail(&iocbp->head, &tip->free_iocbs);
801 	}
802 	tip->naios_free = naios;
803 
804 	if (verbose > 1) {
805 		char fn[MAXPATHLEN];
806 
807 		sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase,
808 			tip->cpu);
809 		tip->vfp = fopen(fn, "w");
810 		if (!tip->vfp) {
811 			fatal(fn, ERR_SYSCALL, "Failed to open report\n");
812 			/*NOTREACHED*/
813 		}
814 
815 		setlinebuf(tip->vfp);
816 	}
817 
818 	if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) {
819 		fatal("pthread_create", ERR_SYSCALL,
820 			"thread create failed\n");
821 		/*NOTREACHED*/
822 	}
823 
824 	if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) {
825 		fatal("pthread_create", ERR_SYSCALL,
826 			"thread create failed\n");
827 		/*NOTREACHED*/
828 	}
829 }
830 
831 /**
832  * tip_release - Release resources associated with this thread
833  */
tip_release(struct thr_info * tip)834 static void tip_release(struct thr_info *tip)
835 {
836 	struct list_head *p, *q;
837 
838 	assert(tip->send_done);
839 	assert(tip->reap_done);
840 	assert(list_len(&tip->used_iocbs) == 0);
841 	assert(tip->naios_free == naios);
842 
843 	if (pthread_join(tip->sub_thread, NULL)) {
844 		fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n");
845 		/*NOTREACHED*/
846 	}
847 	if (pthread_join(tip->rec_thread, NULL)) {
848 		fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n");
849 		/*NOTREACHED*/
850 	}
851 
852 	io_destroy(tip->ctx);
853 
854 	list_splice(&tip->used_iocbs, &tip->free_iocbs);
855 	list_for_each_safe(p, q, &tip->free_iocbs) {
856 		struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head);
857 
858 		list_del(&iocbp->head);
859 		if (iocbp->nbytes)
860 			free(iocbp->iocb.u.c.buf);
861 		free(iocbp);
862 	}
863 
864 	pthread_cond_destroy(&tip->cond);
865 	pthread_mutex_destroy(&tip->mutex);
866 }
867 
868 /**
869  * add_input_file - Allocate and initialize per-input file structure
870  * @cpu: CPU for this file
871  * @devnm: Device name for this file
872  * @file_name: Fully qualifed input file name
873  */
add_input_file(int cpu,char * devnm,char * file_name)874 static void add_input_file(int cpu, char *devnm, char *file_name)
875 {
876 	struct stat buf;
877 	struct io_file_hdr hdr;
878 	struct thr_info *tip = buf_alloc(sizeof(*tip));
879 	__u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
880 
881 	assert(0 <= cpu && cpu < ncpus);
882 
883 	memset(&hdr, 0, sizeof(hdr));
884 	memset(tip, 0, sizeof(*tip));
885 	tip->cpu = cpu % cpus_to_use;
886 	tip->iterations = def_iterations;
887 
888 	tip->ifd = open(file_name, O_RDONLY);
889 	if (tip->ifd < 0) {
890 		fatal(file_name, ERR_ARGS, "Unable to open\n");
891 		/*NOTREACHED*/
892 	}
893 	if (fstat(tip->ifd, &buf) < 0) {
894 		fatal(file_name, ERR_SYSCALL, "fstat failed\n");
895 		/*NOTREACHED*/
896 	}
897 	if (buf.st_size < (off_t)sizeof(hdr)) {
898 		if (verbose)
899 			fprintf(stderr, "\t%s empty\n", file_name);
900 		goto empty_file;
901 	}
902 
903 	if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
904 		fatal(file_name, ERR_ARGS, "Header read failed\n");
905 		/*NOTREACHED*/
906 	}
907 
908 	if (hdr.version != my_version) {
909 		fprintf(stderr, "%llx %llx %llx %llx\n",
910 			(long long unsigned)hdr.version,
911 			(long long unsigned)hdr.genesis,
912 			(long long unsigned)hdr.nbunches,
913 			(long long unsigned)hdr.total_pkts);
914 		fatal(NULL, ERR_ARGS,
915 			"BT version mismatch: %lx versus my %lx\n",
916 			(long)hdr.version, (long)my_version);
917 
918 	}
919 
920 	if (hdr.nbunches == 0) {
921 empty_file:
922 		close(tip->ifd);
923 		free(tip);
924 		return;
925 	}
926 
927 	if (hdr.genesis < genesis) {
928 		if (verbose > 1)
929 			fprintf(stderr, "Setting genesis to %llu.%llu\n",
930 				du64_to_sec(hdr.genesis),
931 				du64_to_nsec(hdr.genesis));
932 		genesis = hdr.genesis;
933 	}
934 
935 	tip->devnm = strdup(devnm);
936 	tip->file_name = strdup(file_name);
937 
938 	list_add_tail(&tip->head, &input_files);
939 
940 	if (verbose)
941 		fprintf(stderr, "Added %s %llu\n", file_name,
942 			(long long)hdr.genesis);
943 }
944 
945 /**
946  * rem_input_file - Release resources associated with an input file
947  * @tip: Per-input file information
948  */
rem_input_file(struct thr_info * tip)949 static void rem_input_file(struct thr_info *tip)
950 {
951 	list_del(&tip->head);
952 
953 	tip_release(tip);
954 
955 	close(tip->ofd);
956 	close(tip->ifd);
957 	free(tip->file_name);
958 	free(tip->devnm);
959 	free(tip);
960 }
961 
962 /**
963  * rem_input_files - Remove all input files
964  */
rem_input_files(void)965 static void rem_input_files(void)
966 {
967 	struct list_head *p, *q;
968 
969 	list_for_each_safe(p, q, &input_files) {
970 		rem_input_file(list_entry(p, struct thr_info, head));
971 	}
972 }
973 
974 /**
975  * __find_input_files - Find input files associated with this device (per cpu)
976  */
__find_input_files(struct dev_info * dip)977 static void __find_input_files(struct dev_info *dip)
978 {
979 	int cpu = 0;
980 
981 	for (;;) {
982 		char full_name[MAXPATHLEN];
983 
984 		sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu);
985 		if (access(full_name, R_OK) != 0)
986 			break;
987 
988 		add_input_file(cpu, dip->devnm, full_name);
989 		cpu++;
990 	}
991 
992 	if (!cpu) {
993 		fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm);
994 		/*NOTREACHED*/
995 	}
996 
997 	rem_input_dev(dip);
998 }
999 
1000 
1001 /**
1002  * find_input_files - Find input files for all devices
1003  */
find_input_files(void)1004 static void find_input_files(void)
1005 {
1006 	struct list_head *p, *q;
1007 
1008 	list_for_each_safe(p, q, &input_devs) {
1009 		__find_input_files(list_entry(p, struct dev_info, head));
1010 	}
1011 }
1012 
1013 /*
1014  * ========================================================================
1015  * ==== RECLAIM ROUTINES ==================================================
1016  * ========================================================================
1017  */
1018 
1019 /**
1020  * reap_wait_aios - Wait for and return number of outstanding AIOs
1021  *
1022  * Will return 0 if we are done
1023  */
reap_wait_aios(struct thr_info * tip)1024 static int reap_wait_aios(struct thr_info *tip)
1025 {
1026 	int naios = 0;
1027 
1028 	if (!is_reap_done(tip)) {
1029 		pthread_mutex_lock(&tip->mutex);
1030 		while (tip->naios_out == 0) {
1031 			tip->reap_wait = 1;
1032 			if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1033 				fatal("pthread_cond_wait", ERR_SYSCALL,
1034 					"nfree_current cond wait failed\n");
1035 				/*NOTREACHED*/
1036 			}
1037 		}
1038 		naios = tip->naios_out;
1039 		pthread_mutex_unlock(&tip->mutex);
1040 	}
1041 	assert(is_reap_done(tip) || naios > 0);
1042 
1043 	return is_reap_done(tip) ? 0 : naios;
1044 }
1045 
1046 /**
1047  * reclaim_ios - Reclaim AIOs completed, recycle IOCBs
1048  * @tip: Per-thread information
1049  * @naios_out: Number of AIOs we have outstanding (min)
1050  */
reclaim_ios(struct thr_info * tip,long naios_out)1051 static void reclaim_ios(struct thr_info *tip, long naios_out)
1052 {
1053 	long i, ndone;
1054 	struct io_event *evp, events[naios_out];
1055 
1056 again:
1057 	assert(naios > 0);
1058 	for (;;) {
1059 		ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL);
1060 		if (ndone > 0)
1061 			break;
1062 
1063 		if (errno && errno != EINTR) {
1064 			fatal("io_getevents", ERR_SYSCALL,
1065 				"io_getevents failed\n");
1066 			/*NOTREACHED*/
1067 		}
1068 	}
1069 	assert(0 < ndone && ndone <= naios_out);
1070 
1071 	pthread_mutex_lock(&tip->mutex);
1072 	for (i = 0, evp = events; i < ndone; i++, evp++) {
1073 		struct iocb_pkt *iocbp = evp->data;
1074 
1075                 if (evp->res != iocbp->iocb.u.c.nbytes) {
1076                         fatal(NULL, ERR_SYSCALL,
1077                               "Event failure %ld/%ld\t(%ld + %ld)\n",
1078                               (long)evp->res, (long)evp->res2,
1079                               (long)iocbp->iocb.u.c.offset / nb_sec,
1080 			      (long)iocbp->iocb.u.c.nbytes / nb_sec);
1081                         /*NOTREACHED*/
1082                 }
1083 
1084 		list_move_tail(&iocbp->head, &tip->free_iocbs);
1085 	}
1086 
1087 	tip->naios_free += ndone;
1088 	tip->naios_out -= ndone;
1089 	naios_out = minl(naios_out, tip->naios_out);
1090 
1091 	if (tip->send_wait) {
1092 		tip->send_wait = 0;
1093 		pthread_cond_signal(&tip->cond);
1094 	}
1095 	pthread_mutex_unlock(&tip->mutex);
1096 
1097 	/*
1098 	 * Short cut: If we /know/ there are some more AIOs, go handle them
1099 	 */
1100 	if (naios_out)
1101 		goto again;
1102 }
1103 
1104 /**
1105  * replay_rec - Worker thread to reclaim AIOs
1106  * @arg: Pointer to thread information
1107  */
replay_rec(void * arg)1108 static void *replay_rec(void *arg)
1109 {
1110 	long naios_out;
1111 	struct thr_info *tip = arg;
1112 
1113 	while ((naios_out = reap_wait_aios(tip)) > 0)
1114 		reclaim_ios(tip, naios_out);
1115 
1116 	assert(tip->send_done);
1117 	tip->reap_done = 1;
1118 	set_reclaim_done();
1119 
1120 	return NULL;
1121 }
1122 
1123 /*
1124  * ========================================================================
1125  * ==== REPLAY ROUTINES ===================================================
1126  * ========================================================================
1127  */
1128 
1129 /**
1130  * next_bunch - Retrieve next bunch of AIOs to process
1131  * @tip: Per-thread information
1132  * @bunch: Bunch information
1133  *
1134  * Returns TRUE if we recovered a bunch of IOs, else hit EOF
1135  */
next_bunch(struct thr_info * tip,struct io_bunch * bunch)1136 static int next_bunch(struct thr_info *tip, struct io_bunch *bunch)
1137 {
1138 	size_t count, result;
1139 
1140 	result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr));
1141 	if (result != sizeof(bunch->hdr)) {
1142 		if (result == 0)
1143 			return 0;
1144 
1145 		fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n",
1146 			(long)result);
1147 		/*NOTREACHED*/
1148 	}
1149 	assert(bunch->hdr.npkts <= BT_MAX_PKTS);
1150 
1151 	count = bunch->hdr.npkts * sizeof(struct io_pkt);
1152 	result = read(tip->ifd, &bunch->pkts, count);
1153 	if (result != count) {
1154 		fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n",
1155 			(long)result, (long)count);
1156 		/*NOTREACHED*/
1157 	}
1158 
1159 	return 1;
1160 }
1161 
1162 /**
1163  * nfree_current - Returns current number of AIOs that are free
1164  *
1165  * Will wait for available ones...
1166  *
1167  * Returns 0 if we have some condition that causes us to exit
1168  */
nfree_current(struct thr_info * tip)1169 static int nfree_current(struct thr_info *tip)
1170 {
1171 	int nfree = 0;
1172 
1173 	pthread_mutex_lock(&tip->mutex);
1174 	while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) {
1175 		tip->send_wait = 1;
1176 		if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1177 			fatal("pthread_cond_wait", ERR_SYSCALL,
1178 				"nfree_current cond wait failed\n");
1179 			/*NOTREACHED*/
1180 		}
1181 	}
1182 	pthread_mutex_unlock(&tip->mutex);
1183 
1184 	return nfree;
1185 }
1186 
1187 /**
1188  * stall - Stall for the number of nanoseconds requested
1189  *
1190  * We may be late, in which case we just return.
1191  */
stall(struct thr_info * tip,long long oclock)1192 static void stall(struct thr_info *tip, long long oclock)
1193 {
1194 	struct timespec req;
1195 	long long dreal, tclock = gettime() - rgenesis;
1196 
1197 	oclock /= acc_factor;
1198 
1199 	if (verbose > 1)
1200 		fprintf(tip->vfp, "   stall(%lld.%09lld, %lld.%09lld)\n",
1201 			du64_to_sec(oclock), du64_to_nsec(oclock),
1202 			du64_to_sec(tclock), du64_to_nsec(tclock));
1203 
1204 	while (!is_send_done(tip) && tclock < oclock) {
1205 		dreal = oclock - tclock;
1206 		req.tv_sec = dreal / (1000 * 1000 * 1000);
1207 		req.tv_nsec = dreal % (1000 * 1000 * 1000);
1208 
1209 		if (verbose > 1) {
1210 			fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n",
1211 				(long long)req.tv_sec,
1212 				(long long)req.tv_nsec);
1213 		}
1214 
1215 		if (nanosleep(&req, NULL) < 0 && signal_done)
1216 			break;
1217 
1218 		tclock = gettime() - rgenesis;
1219 	}
1220 }
1221 
1222 /**
1223  * iocbs_map - Map a set of AIOs onto a set of IOCBs
1224  * @tip: Per-thread information
1225  * @list: List of AIOs created
1226  * @pkts: AIOs to map
1227  * @ntodo: Number of AIOs to map
1228  */
iocbs_map(struct thr_info * tip,struct iocb ** list,struct io_pkt * pkts,int ntodo)1229 static void iocbs_map(struct thr_info *tip, struct iocb **list,
1230 					     struct io_pkt *pkts, int ntodo)
1231 {
1232 	int i;
1233 	struct io_pkt *pkt;
1234 
1235 	assert(0 < ntodo && ntodo <= naios);
1236 
1237 	pthread_mutex_lock(&tip->mutex);
1238 	assert(ntodo <= list_len(&tip->free_iocbs));
1239 	for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) {
1240 		__u32 rw = pkt->rw;
1241 		struct iocb_pkt *iocbp;
1242 
1243 		if (!pkt->rw && !write_enabled)
1244 			rw = 1;
1245 
1246 		if (verbose > 1)
1247 			fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n",
1248 				(unsigned long long)pkt->sector,
1249 				(unsigned long long)pkt->nbytes / nb_sec,
1250 				rw ? 'R' : 'W',
1251 				(rw == 1 && pkt->rw == 0) ? '!' : ' ');
1252 
1253 		iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head);
1254 		iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec);
1255 
1256 		list_move_tail(&iocbp->head, &tip->used_iocbs);
1257 		list[i] = &iocbp->iocb;
1258 	}
1259 
1260 	tip->naios_free -= ntodo;
1261 	assert(tip->naios_free >= 0);
1262 	pthread_mutex_unlock(&tip->mutex);
1263 }
1264 
1265 /**
1266  * process_bunch - Process a bunch of requests
1267  * @tip: Per-thread information
1268  * @bunch: Bunch to process
1269  */
process_bunch(struct thr_info * tip,struct io_bunch * bunch)1270 static void process_bunch(struct thr_info *tip, struct io_bunch *bunch)
1271 {
1272 	__u64 i = 0;
1273 	struct iocb *list[bunch->hdr.npkts];
1274 
1275 	assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS);
1276 	while (!is_send_done(tip) && (i < bunch->hdr.npkts)) {
1277 		long ndone;
1278 		int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i);
1279 
1280 		assert(0 < ntodo && ntodo <= naios);
1281 		iocbs_map(tip, list, &bunch->pkts[i], ntodo);
1282 		if (!no_stalls)
1283 			stall(tip, bunch->hdr.time_stamp - genesis);
1284 
1285 		if (ntodo) {
1286 			if (verbose > 1)
1287 				fprintf(tip->vfp, "submit(%d)\n", ntodo);
1288 			ndone = io_submit(tip->ctx, ntodo, list);
1289 			if (ndone != (long)ntodo) {
1290 				fatal("io_submit", ERR_SYSCALL,
1291 					"%d: io_submit(%d:%ld) failed (%s)\n",
1292 					tip->cpu, ntodo, ndone,
1293 					strerror(labs(ndone)));
1294 				/*NOTREACHED*/
1295 			}
1296 
1297 			pthread_mutex_lock(&tip->mutex);
1298 			tip->naios_out += ndone;
1299 			assert(tip->naios_out <= naios);
1300 			if (tip->reap_wait) {
1301 				tip->reap_wait = 0;
1302 				pthread_cond_signal(&tip->cond);
1303 			}
1304 			pthread_mutex_unlock(&tip->mutex);
1305 
1306 			i += ndone;
1307 			assert(i <= bunch->hdr.npkts);
1308 		}
1309 	}
1310 }
1311 
1312 /**
1313  * reset_input_file - Reset the input file for the next iteration
1314  * @tip: Thread information
1315  *
1316  * We also do a dummy read of the file header to get us to the first bunch.
1317  */
reset_input_file(struct thr_info * tip)1318 static void reset_input_file(struct thr_info *tip)
1319 {
1320 	struct io_file_hdr hdr;
1321 
1322 	lseek(tip->ifd, 0, 0);
1323 
1324 	if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
1325 		fatal(tip->file_name, ERR_ARGS, "Header reread failed\n");
1326 		/*NOTREACHED*/
1327 	}
1328 }
1329 
1330 /**
1331  * replay_sub - Worker thread to submit AIOs that are being replayed
1332  */
replay_sub(void * arg)1333 static void *replay_sub(void *arg)
1334 {
1335         unsigned int i;
1336 	char *mdev;
1337 	char path[MAXPATHLEN];
1338 	struct io_bunch bunch;
1339 	struct thr_info *tip = arg;
1340 	int oflags;
1341 
1342 	pin_to_cpu(tip);
1343 
1344 	mdev = map_dev(tip->devnm);
1345 	sprintf(path, "/dev/%s", mdev);
1346 	/*
1347 	 * convert underscores to slashes to
1348 	 * restore device names that have larger paths
1349 	 */
1350 	for (i = 0; i < strlen(mdev); i++)
1351 	        if (path[strlen("/dev/") + i] == '_')
1352 		        path[strlen("/dev/") + i] = '/';
1353 #ifdef O_NOATIME
1354 	oflags = O_NOATIME;
1355 #else
1356 	oflags = 0;
1357 #endif
1358 	tip->ofd = open(path, O_RDWR | O_DIRECT | oflags);
1359 	if (tip->ofd < 0) {
1360 		fatal(path, ERR_SYSCALL, "Failed device open\n");
1361 		/*NOTREACHED*/
1362 	}
1363 
1364 	set_replay_ready();
1365 	while (!is_send_done(tip) && tip->iterations--) {
1366 		wait_iter_start();
1367 		if (verbose > 1)
1368 			fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations);
1369 		while (!is_send_done(tip) && next_bunch(tip, &bunch))
1370 			process_bunch(tip, &bunch);
1371 		set_iter_done();
1372 		reset_input_file(tip);
1373 	}
1374 	tip->send_done = 1;
1375 	set_replay_done();
1376 
1377 	return NULL;
1378 }
1379 
1380 /*
1381  * ========================================================================
1382  * ==== COMMAND LINE ARGUMENT HANDLING ====================================
1383  * ========================================================================
1384  */
1385 
1386 static char usage_str[] = 						\
1387         "\n"								\
1388         "\t[ -c <cpus> : --cpus=<cpus>           ] Default: 1\n"        \
1389         "\t[ -d <dir>  : --input-directory=<dir> ] Default: .\n"        \
1390 	"\t[ -F        : --find-records          ] Default: Off\n"	\
1391         "\t[ -h        : --help                  ] Default: Off\n"      \
1392         "\t[ -i <base> : --input-base=<base>     ] Default: replay\n"   \
1393         "\t[ -I <iters>: --iterations=<iters>    ] Default: 1\n"        \
1394         "\t[ -M <file> : --map-devs=<file>       ] Default: None\n"     \
1395         "\t[ -N        : --no-stalls             ] Default: Off\n"      \
1396         "\t[ -x        : --acc-factor            ] Default: 1\n"	\
1397         "\t[ -v        : --verbose               ] Default: Off\n"      \
1398         "\t[ -V        : --version               ] Default: Off\n"      \
1399         "\t[ -W        : --write-enable          ] Default: Off\n"      \
1400         "\t<dev...>                                Default: None\n"     \
1401         "\n";
1402 
1403 #define S_OPTS	"c:d:Fhi:I:M:Nx:t:vVW"
1404 static struct option l_opts[] = {
1405 	{
1406 		.name = "cpus",
1407 		.has_arg = required_argument,
1408 		.flag = NULL,
1409 		.val = 'c'
1410 	},
1411 	{
1412 		.name = "input-directory",
1413 		.has_arg = required_argument,
1414 		.flag = NULL,
1415 		.val = 'd'
1416 	},
1417 	{
1418 		.name = "find-records",
1419 		.has_arg = no_argument,
1420 		.flag = NULL,
1421 		.val = 'F'
1422 	},
1423 	{
1424 		.name = "help",
1425 		.has_arg = no_argument,
1426 		.flag = NULL,
1427 		.val = 'h'
1428 	},
1429 	{
1430 		.name = "input-base",
1431 		.has_arg = required_argument,
1432 		.flag = NULL,
1433 		.val = 'i'
1434 	},
1435 	{
1436 		.name = "iterations",
1437 		.has_arg = required_argument,
1438 		.flag = NULL,
1439 		.val = 'I'
1440 	},
1441 	{
1442 		.name = "map-devs",
1443 		.has_arg = required_argument,
1444 		.flag = NULL,
1445 		.val = 'M'
1446 	},
1447 	{
1448 		.name = "no-stalls",
1449 		.has_arg = no_argument,
1450 		.flag = NULL,
1451 		.val = 'N'
1452 	},
1453 	{
1454 		.name = "acc-factor",
1455 		.has_arg = required_argument,
1456 		.flag = NULL,
1457 		.val = 'x'
1458 	},
1459 	{
1460 		.name = "verbose",
1461 		.has_arg = no_argument,
1462 		.flag = NULL,
1463 		.val = 'v'
1464 	},
1465 	{
1466 		.name = "version",
1467 		.has_arg = no_argument,
1468 		.flag = NULL,
1469 		.val = 'V'
1470 	},
1471 	{
1472 		.name = "write-enable",
1473 		.has_arg = no_argument,
1474 		.flag = NULL,
1475 		.val = 'W'
1476 	},
1477 	{
1478 		.name = NULL
1479 	}
1480 };
1481 
1482 /**
1483  * handle_args: Parse passed in argument list
1484  * @argc: Number of arguments in argv
1485  * @argv: Arguments passed in
1486  *
1487  * Does rudimentary parameter verification as well.
1488  */
handle_args(int argc,char * argv[])1489 static void handle_args(int argc, char *argv[])
1490 {
1491 	int c;
1492 	int r;
1493 
1494 	while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1495 		switch (c) {
1496 		case 'c':
1497 			cpus_to_use = atoi(optarg);
1498 			if (cpus_to_use <= 0 || cpus_to_use > ncpus) {
1499 				fatal(NULL, ERR_ARGS,
1500 				      "Invalid number of cpus %d (0<x<%d)\n",
1501 				      cpus_to_use, ncpus);
1502 				/*NOTREACHED*/
1503 			}
1504 			break;
1505 
1506 		case 'd':
1507 			idir = optarg;
1508 			if (access(idir, R_OK | X_OK) != 0) {
1509 				fatal(idir, ERR_ARGS,
1510 				      "Invalid input directory specified\n");
1511 				/*NOTREACHED*/
1512 			}
1513 			break;
1514 
1515 		case 'F':
1516 			find_records = 1;
1517 			break;
1518 
1519 		case 'h':
1520 			usage();
1521 			exit(0);
1522 			/*NOTREACHED*/
1523 
1524 		case 'i':
1525 			ibase = optarg;
1526 			break;
1527 
1528 		case 'I':
1529 			def_iterations = atoi(optarg);
1530 			if (def_iterations <= 0) {
1531 				fprintf(stderr,
1532 					"Invalid number of iterations %d\n",
1533 					def_iterations);
1534 				exit(ERR_ARGS);
1535 				/*NOTREACHED*/
1536 			}
1537 			break;
1538 
1539 		case 'M':
1540 			read_map_devs(optarg);
1541 			break;
1542 
1543 		case 'N':
1544 			no_stalls = 1;
1545 			break;
1546 
1547 		case 'x':
1548 			r = sscanf(optarg,"%u",&acc_factor);
1549 			if (r!=1) {
1550 				fprintf(stderr,
1551 					"Invalid acceleration factor\n");
1552 				exit(ERR_ARGS);
1553 				/*NOTREACHED*/
1554 			}
1555 			break;
1556 
1557 		case 'V':
1558 			fprintf(stderr, "btreplay -- version %s\n",
1559 				my_btversion);
1560 			exit(0);
1561 			/*NOTREACHED*/
1562 
1563 		case 'v':
1564 			verbose++;
1565 			break;
1566 
1567 		case 'W':
1568 			write_enabled = 1;
1569 			break;
1570 
1571 		default:
1572 			usage();
1573 			fatal(NULL, ERR_ARGS,
1574 			      "Invalid command line argument %c\n", c);
1575 			/*NOTREACHED*/
1576 		}
1577 	}
1578 
1579 	while (optind < argc)
1580 		add_input_dev(argv[optind++]);
1581 
1582 	if (find_records)
1583 		find_input_devs(idir);
1584 
1585 	if (list_len(&input_devs) == 0) {
1586 		fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n");
1587 		/*NOTREACHED*/
1588 	}
1589 
1590 	if (cpus_to_use < 0)
1591 		cpus_to_use = ncpus;
1592 }
1593 
1594 /*
1595  * ========================================================================
1596  * ==== MAIN ROUTINE ======================================================
1597  * ========================================================================
1598  */
1599 
1600 /**
1601  * set_signal_done - Signal handler, catches signals & sets signal_done
1602  */
set_signal_done(int signum)1603 static void set_signal_done(__attribute__((__unused__))int signum)
1604 {
1605 	signal_done = 1;
1606 }
1607 
1608 /**
1609  * main -
1610  * @argc: Number of arguments
1611  * @argv: Array of arguments
1612  */
main(int argc,char * argv[])1613 int main(int argc, char *argv[])
1614 {
1615 	int i;
1616 	struct list_head *p;
1617 
1618 	pgsize = getpagesize();
1619 	assert(pgsize > 0);
1620 
1621 	setup_signal(SIGINT, set_signal_done);
1622 	setup_signal(SIGTERM, set_signal_done);
1623 
1624 	get_ncpus();
1625 	handle_args(argc, argv);
1626 	find_input_files();
1627 
1628 	nfiles = list_len(&input_files);
1629 	__list_for_each(p, &input_files) {
1630 		tip_init(list_entry(p, struct thr_info, head));
1631 	}
1632 
1633 	wait_replays_ready();
1634 	for (i = 0; i < def_iterations; i++) {
1635 		rgenesis = gettime();
1636 		start_iter();
1637 		if (verbose)
1638 			fprintf(stderr, "I");
1639 		wait_iters_done();
1640 	}
1641 
1642 	wait_replays_done();
1643 	wait_reclaims_done();
1644 
1645 	if (verbose)
1646 		fprintf(stderr, "\n");
1647 
1648 	rem_input_files();
1649 	release_map_devs();
1650 
1651 	return 0;
1652 }
1653