1 /*
2 * Blktrace replay utility - Play traces back
3 *
4 * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #include <assert.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <libaio.h>
25 #include <pthread.h>
26 #include <sched.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <sys/time.h>
36 #include <sys/types.h>
37 #include <dirent.h>
38 #include <stdarg.h>
39
40 #if !defined(_GNU_SOURCE)
41 # define _GNU_SOURCE
42 #endif
43 #include <getopt.h>
44
45 #include "list.h"
46 #include "btrecord.h"
47
48 /*
49 * ========================================================================
50 * ==== STRUCTURE DEFINITIONS =============================================
51 * ========================================================================
52 */
53
54 /**
55 * Each device map has one of these:
56 *
57 * @head: Linked on to map_devs
58 * @from_dev: Device name as seen on recorded system
59 * @to_dev: Device name to be used on replay system
60 */
61 struct map_dev {
62 struct list_head head;
63 char *from_dev, *to_dev;
64 };
65
66 /**
67 * Each device name specified has one of these (until threads are created)
68 *
69 * @head: Linked onto input_devs
70 * @devnm: Device name -- 'sd*'
71 */
72 struct dev_info {
73 struct list_head head;
74 char *devnm;
75 };
76
77 /*
78 * Per input file information
79 *
80 * @head: Used to link up on input_files
81 * @free_iocbs: List of free iocb's available for use
82 * @used_iocbs: List of iocb's currently outstanding
83 * @mutex: Mutex used with condition variable to protect volatile values
84 * @cond: Condition variable used when waiting on a volatile value change
85 * @naios_out: Current number of AIOs outstanding on this context
86 * @naios_free: Number of AIOs on the free list (short cut for list_len)
87 * @send_wait: Boolean: When true, the sub thread is waiting on free IOCBs
88 * @reap_wait: Boolean: When true, the rec thread is waiting on used IOCBs
89 * @send_done: Boolean: When true, the sub thread has completed work
90 * @reap_done: Boolean: When true, the rec thread has completed work
91 * @sub_thread: Thread used to submit IOs.
92 * @rec_thread: Thread used to reclaim IOs.
93 * @ctx: IO context
94 * @devnm: Copy of the device name being managed by this thread
95 * @file_name: Full name of the input file
96 * @cpu: CPU this thread is pinned to
97 * @ifd: Input file descriptor
98 * @ofd: Output file descriptor
99 * @iterations: Remaining iterations to process
100 * @vfp: For verbose dumping of actions performed
101 */
102 struct thr_info {
103 struct list_head head, free_iocbs, used_iocbs;
104 pthread_mutex_t mutex;
105 pthread_cond_t cond;
106 volatile long naios_out, naios_free;
107 volatile int send_wait, reap_wait, send_done, reap_done;
108 pthread_t sub_thread, rec_thread;
109 io_context_t ctx;
110 char *devnm, *file_name;
111 int cpu, ifd, ofd, iterations;
112 FILE *vfp;
113 };
114
115 /*
116 * Every Asynchronous IO used has one of these (naios per file/device).
117 *
118 * @iocb: IOCB sent down via io_submit
119 * @head: Linked onto file_list.free_iocbs or file_list.used_iocbs
120 * @tip: Pointer to per-thread information this IO is associated with
121 * @nbytes: Number of bytes in buffer associated with iocb
122 */
123 struct iocb_pkt {
124 struct iocb iocb;
125 struct list_head head;
126 struct thr_info *tip;
127 int nbytes;
128 };
129
130 /*
131 * ========================================================================
132 * ==== GLOBAL VARIABLES ==================================================
133 * ========================================================================
134 */
135
136 static volatile int signal_done = 0; // Boolean: Signal'ed, need to quit
137
138 static char *ibase = "replay"; // Input base name
139 static char *idir = "."; // Input directory base
140 static int cpus_to_use = -1; // Number of CPUs to use
141 static int def_iterations = 1; // Default number of iterations
142 static int naios = 512; // Number of AIOs per thread
143 static int ncpus = 0; // Number of CPUs in the system
144 static int verbose = 0; // Boolean: Output some extra info
145 static int write_enabled = 0; // Boolean: Enable writing
146 static __u64 genesis = ~0; // Earliest time seen
147 static __u64 rgenesis; // Our start time
148 static size_t pgsize; // System Page size
149 static int nb_sec = 512; // Number of bytes per sector
150 static LIST_HEAD(input_devs); // List of devices to handle
151 static LIST_HEAD(input_files); // List of input files to handle
152 static LIST_HEAD(map_devs); // List of device maps
153 static int nfiles = 0; // Number of files to handle
154 static int no_stalls = 0; // Boolean: Disable pre-stalls
155 static unsigned acc_factor = 1; // Int: Acceleration factor
156 static int find_records = 0; // Boolean: Find record files auto
157
158 /*
159 * Variables managed under control of condition variables.
160 *
161 * n_reclaims_done: Counts number of reclaim threads that have completed.
162 * n_replays_done: Counts number of replay threads that have completed.
163 * n_replays_ready: Counts number of replay threads ready to start.
164 * n_iters_done: Counts number of replay threads done one iteration.
165 * iter_start: Starts an iteration for the replay threads.
166 */
167 static volatile int n_reclaims_done = 0;
168 static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER;
169 static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER;
170
171 static volatile int n_replays_done = 0;
172 static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER;
173 static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER;
174
175 static volatile int n_replays_ready = 0;
176 static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER;
177 static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER;
178
179 static volatile int n_iters_done = 0;
180 static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER;
181 static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER;
182
183 static volatile int iter_start = 0;
184 static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER;
185 static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER;
186
187 /*
188 * ========================================================================
189 * ==== FORWARD REFERENECES ===============================================
190 * ========================================================================
191 */
192
193 static void *replay_sub(void *arg);
194 static void *replay_rec(void *arg);
195 static char usage_str[];
196
197 /*
198 * ========================================================================
199 * ==== INLINE ROUTINES ===================================================
200 * ========================================================================
201 */
202
203 /*
204 * The 'fatal' macro will output a perror message (if errstring is !NULL)
205 * and display a string (with variable arguments) and then exit with the
206 * specified exit value.
207 */
208 #define ERR_ARGS 1
209 #define ERR_SYSCALL 2
fatal(const char * errstring,const int exitval,const char * fmt,...)210 static inline void fatal(const char *errstring, const int exitval,
211 const char *fmt, ...)
212 {
213 va_list ap;
214
215 if (errstring)
216 perror(errstring);
217
218 va_start(ap, fmt);
219 vfprintf(stderr, fmt, ap);
220 va_end(ap);
221
222 exit(exitval);
223 /*NOTREACHED*/
224 }
225
du64_to_sec(__u64 du64)226 static inline long long unsigned du64_to_sec(__u64 du64)
227 {
228 return (long long unsigned)du64 / (1000 * 1000 * 1000);
229 }
230
du64_to_nsec(__u64 du64)231 static inline long long unsigned du64_to_nsec(__u64 du64)
232 {
233 return llabs((long long)du64) % (1000 * 1000 * 1000);
234 }
235
236 /**
237 * min - Return minimum of two integers
238 */
min(int a,int b)239 static inline int min(int a, int b)
240 {
241 return a < b ? a : b;
242 }
243
244 /**
245 * minl - Return minimum of two longs
246 */
minl(long a,long b)247 static inline long minl(long a, long b)
248 {
249 return a < b ? a : b;
250 }
251
252 /**
253 * usage - Display usage string and version
254 */
usage(void)255 static inline void usage(void)
256 {
257 fprintf(stderr, "Usage: btreplay -- version %s\n%s",
258 my_btversion, usage_str);
259 }
260
261 /**
262 * is_send_done - Returns true if sender should quit early
263 * @tip: Per-thread information
264 */
is_send_done(struct thr_info * tip)265 static inline int is_send_done(struct thr_info *tip)
266 {
267 return signal_done || tip->send_done;
268 }
269
270 /**
271 * is_reap_done - Returns true if reaper should quit early
272 * @tip: Per-thread information
273 */
is_reap_done(struct thr_info * tip)274 static inline int is_reap_done(struct thr_info *tip)
275 {
276 return signal_done || (tip->send_done && tip->naios_out == 0);
277 }
278
279 /**
280 * ts2ns - Convert timespec values to a nanosecond value
281 */
282 #define NS_TICKS ((__u64)1000 * (__u64)1000 * (__u64)1000)
ts2ns(struct timespec * ts)283 static inline __u64 ts2ns(struct timespec *ts)
284 {
285 return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec);
286 }
287
288 /**
289 * ts2ns - Convert timeval values to a nanosecond value
290 */
tv2ns(struct timeval * tp)291 static inline __u64 tv2ns(struct timeval *tp)
292 {
293 return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000);
294 }
295
296 /**
297 * touch_memory - Force physical memory to be allocating it
298 *
299 * For malloc()ed memory we need to /touch/ it to make it really
300 * exist. Otherwise, for write's (to storage) things may not work
301 * as planned - we see Linux just use a single area to /read/ from
302 * (as there isn't any memory that has been associated with the
303 * allocated virtual addresses yet).
304 */
touch_memory(char * buf,size_t bsize)305 static inline void touch_memory(char *buf, size_t bsize)
306 {
307 #if defined(PREP_BUFS)
308 memset(buf, 0, bsize);
309 #else
310 size_t i;
311
312 for (i = 0; i < bsize; i += pgsize)
313 buf[i] = 0;
314 #endif
315 }
316
317 /**
318 * buf_alloc - Returns a page-aligned buffer of the specified size
319 * @nbytes: Number of bytes to allocate
320 */
buf_alloc(size_t nbytes)321 static inline void *buf_alloc(size_t nbytes)
322 {
323 void *buf;
324
325 if (posix_memalign(&buf, pgsize, nbytes)) {
326 fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n");
327 /*NOTREACHED*/
328 }
329
330 return buf;
331 }
332
333 /**
334 * gettime - Returns current time
335 */
gettime(void)336 static inline __u64 gettime(void)
337 {
338 static int use_clock_gettime = -1; // Which clock to use
339
340 if (use_clock_gettime < 0) {
341 use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0;
342 if (use_clock_gettime) {
343 struct timespec ts = {
344 .tv_sec = 0,
345 .tv_nsec = 0
346 };
347 clock_settime(CLOCK_MONOTONIC, &ts);
348 }
349 }
350
351 if (use_clock_gettime) {
352 struct timespec ts;
353 clock_gettime(CLOCK_MONOTONIC, &ts);
354 return ts2ns(&ts);
355 }
356 else {
357 struct timeval tp;
358 gettimeofday(&tp, NULL);
359 return tv2ns(&tp);
360 }
361 }
362
363 /**
364 * setup_signal - Set up a signal handler for the specified signum
365 */
setup_signal(int signum,sighandler_t handler)366 static inline void setup_signal(int signum, sighandler_t handler)
367 {
368 if (signal(signum, handler) == SIG_ERR) {
369 fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n",
370 signum);
371 /*NOTREACHED*/
372 }
373 }
374
375 /*
376 * ========================================================================
377 * ==== CONDITION VARIABLE ROUTINES =======================================
378 * ========================================================================
379 */
380
381 /**
382 * __set_cv - Increments a variable under condition variable control.
383 * @pmp: Pointer to the associated mutex
384 * @pcp: Pointer to the associated condition variable
385 * @vp: Pointer to the variable being incremented
386 * @mxv: Max value for variable (Used only when ASSERTS are on)
387 */
__set_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)388 static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
389 volatile int *vp,
390 __attribute__((__unused__))int mxv)
391 {
392 pthread_mutex_lock(pmp);
393 assert(*vp < mxv);
394 *vp += 1;
395 pthread_cond_signal(pcp);
396 pthread_mutex_unlock(pmp);
397 }
398
399 /**
400 * __wait_cv - Waits for a variable under cond var control to hit a value
401 * @pmp: Pointer to the associated mutex
402 * @pcp: Pointer to the associated condition variable
403 * @vp: Pointer to the variable being incremented
404 * @mxv: Value to wait for
405 */
__wait_cv(pthread_mutex_t * pmp,pthread_cond_t * pcp,volatile int * vp,int mxv)406 static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
407 volatile int *vp, int mxv)
408 {
409 pthread_mutex_lock(pmp);
410 while (*vp < mxv)
411 pthread_cond_wait(pcp, pmp);
412 *vp = 0;
413 pthread_mutex_unlock(pmp);
414 }
415
set_reclaim_done(void)416 static inline void set_reclaim_done(void)
417 {
418 __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
419 nfiles);
420 }
421
wait_reclaims_done(void)422 static inline void wait_reclaims_done(void)
423 {
424 __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
425 nfiles);
426 }
427
set_replay_ready(void)428 static inline void set_replay_ready(void)
429 {
430 __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
431 nfiles);
432 }
433
wait_replays_ready(void)434 static inline void wait_replays_ready(void)
435 {
436 __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
437 nfiles);
438 }
439
set_replay_done(void)440 static inline void set_replay_done(void)
441 {
442 __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
443 nfiles);
444 }
445
wait_replays_done(void)446 static inline void wait_replays_done(void)
447 {
448 __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
449 nfiles);
450 }
451
set_iter_done(void)452 static inline void set_iter_done(void)
453 {
454 __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
455 nfiles);
456 }
457
wait_iters_done(void)458 static inline void wait_iters_done(void)
459 {
460 __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
461 nfiles);
462 }
463
464 /**
465 * wait_iter_start - Wait for an iteration to start
466 *
467 * This is /slightly/ different: we are waiting for a value to become
468 * non-zero, and then we decrement it and go on.
469 */
wait_iter_start(void)470 static inline void wait_iter_start(void)
471 {
472 pthread_mutex_lock(&iter_start_mutex);
473 while (iter_start == 0)
474 pthread_cond_wait(&iter_start_cond, &iter_start_mutex);
475 assert(1 <= iter_start && iter_start <= nfiles);
476 iter_start--;
477 pthread_mutex_unlock(&iter_start_mutex);
478 }
479
480 /**
481 * start_iter - Start an iteration at the replay thread level
482 */
start_iter(void)483 static inline void start_iter(void)
484 {
485 pthread_mutex_lock(&iter_start_mutex);
486 assert(iter_start == 0);
487 iter_start = nfiles;
488 pthread_cond_broadcast(&iter_start_cond);
489 pthread_mutex_unlock(&iter_start_mutex);
490 }
491
492 /*
493 * ========================================================================
494 * ==== CPU RELATED ROUTINES ==============================================
495 * ========================================================================
496 */
497
498 /**
499 * get_ncpus - Sets up the global 'ncpus' value
500 */
get_ncpus(void)501 static void get_ncpus(void)
502 {
503 #ifdef _SC_NPROCESSORS_ONLN
504 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
505 #else
506 int nrcpus = 4096;
507 cpu_set_t * cpus;
508
509 realloc:
510 cpus = CPU_ALLOC(nrcpus);
511 size = CPU_ALLOC_SIZE(nrcpus);
512 CPU_ZERO_S(size, cpus);
513
514 if (sched_getaffinity(0, size, cpus)) {
515 if( errno == EINVAL && nrcpus < (4096<<4) ) {
516 CPU_FREE(cpus);
517 nrcpus <<= 1;
518 goto realloc;
519 }
520 fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n");
521 /*NOTREACHED*/
522 }
523
524 ncpus = -1;
525 for (last_cpu = 0; last_cpu < CPU_SETSIZE && CPU_ISSET(last_cpu, &cpus); last_cpu++)
526 if (CPU_ISSET( last_cpu, &cpus) )
527 ncpus = last_cpu;
528 ncpus++;
529 CPU_FREE(cpus);
530 #endif
531 if (ncpus == 0) {
532 fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n");
533 /*NOTREACHED*/
534 }
535 }
536
537 /**
538 * pin_to_cpu - Pin this thread to a specific CPU
539 * @tip: Thread information
540 */
pin_to_cpu(struct thr_info * tip)541 static void pin_to_cpu(struct thr_info *tip)
542 {
543 cpu_set_t *cpus;
544 size_t size;
545
546 cpus = CPU_ALLOC(ncpus);
547 size = CPU_ALLOC_SIZE(ncpus);
548
549 assert(0 <= tip->cpu && tip->cpu < ncpus);
550
551 CPU_ZERO_S(size, cpus);
552 CPU_SET_S(tip->cpu, size, cpus);
553 if (sched_setaffinity(0, size, cpus)) {
554 fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n");
555 /*NOTREACHED*/
556 }
557 assert(tip->cpu == sched_getcpu());
558
559 if (verbose > 1) {
560 int i;
561 cpu_set_t *now = CPU_ALLOC(ncpus);
562
563 (void)sched_getaffinity(0, size, now);
564 fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu);
565 for (i = 0; i < ncpus; i++)
566 fprintf(tip->vfp, "%1d", CPU_ISSET_S(i, size, now));
567 fprintf(tip->vfp, "\n");
568 }
569 }
570
571 /*
572 * ========================================================================
573 * ==== INPUT DEVICE HANDLERS =============================================
574 * ========================================================================
575 */
576
577 /**
578 * add_input_dev - Add a device ('sd*') to the list of devices to handle
579 */
add_input_dev(char * devnm)580 static void add_input_dev(char *devnm)
581 {
582 struct list_head *p;
583 struct dev_info *dip;
584
585 __list_for_each(p, &input_devs) {
586 dip = list_entry(p, struct dev_info, head);
587 if (strcmp(dip->devnm, devnm) == 0)
588 return;
589 }
590
591 dip = malloc(sizeof(*dip));
592 dip->devnm = strdup(devnm);
593 list_add_tail(&dip->head, &input_devs);
594 }
595
596 /**
597 * rem_input_dev - Remove resources associated with this device
598 */
rem_input_dev(struct dev_info * dip)599 static void rem_input_dev(struct dev_info *dip)
600 {
601 list_del(&dip->head);
602 free(dip->devnm);
603 free(dip);
604 }
605
find_input_devs(char * idir)606 static void find_input_devs(char *idir)
607 {
608 struct dirent *ent;
609 DIR *dir = opendir(idir);
610
611 if (dir == NULL) {
612 fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
613 /*NOTREACHED*/
614 }
615
616 while ((ent = readdir(dir)) != NULL) {
617 char *p, *dsf;
618
619 if (strstr(ent->d_name, ".replay.") == NULL)
620 continue;
621
622 dsf = strdup(ent->d_name);
623 p = index(dsf, '.');
624 assert(p != NULL);
625 *p = '\0';
626 add_input_dev(dsf);
627 free(dsf);
628 }
629
630 closedir(dir);
631 }
632
633 /*
634 * ========================================================================
635 * ==== MAP DEVICE INTERFACES =============================================
636 * ========================================================================
637 */
638
639 /**
640 * read_map_devs - Read in a set of device mapping from the provided file.
641 * @file_name: File containing device maps
642 *
643 * We support the notion of multiple such files being specifed on the cmd line
644 */
read_map_devs(char * file_name)645 static void read_map_devs(char *file_name)
646 {
647 FILE *fp;
648 char from_dev[256], to_dev[256];
649
650 fp = fopen(file_name, "r");
651 if (!fp) {
652 fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n");
653 /*NOTREACHED*/
654 }
655
656 while (fscanf(fp, "%s %s", from_dev, to_dev) == 2) {
657 struct map_dev *mdp = malloc(sizeof(*mdp));
658
659 mdp->from_dev = from_dev;
660 mdp->to_dev = to_dev;
661 list_add_tail(&mdp->head, &map_devs);
662 }
663
664 fclose(fp);
665 }
666
667 /**
668 * release_map_devs - Release resources associated with device mappings.
669 */
release_map_devs(void)670 static void release_map_devs(void)
671 {
672 struct list_head *p, *q;
673
674 list_for_each_safe(p, q, &map_devs) {
675 struct map_dev *mdp = list_entry(p, struct map_dev, head);
676
677 list_del(&mdp->head);
678
679 free(mdp->from_dev);
680 free(mdp->to_dev);
681 free(mdp);
682 }
683 }
684
685 /**
686 * map_dev - Return the mapped device for that specified
687 * @from_dev: Device name as seen on recorded system
688 *
689 * Note: If there is no such mapping, we return the same name.
690 */
map_dev(char * from_dev)691 static char *map_dev(char *from_dev)
692 {
693 struct list_head *p;
694
695 __list_for_each(p, &map_devs) {
696 struct map_dev *mdp = list_entry(p, struct map_dev, head);
697
698 if (strcmp(from_dev, mdp->from_dev) == 0)
699 return mdp->to_dev;
700 }
701
702 return from_dev;
703 }
704
705 /*
706 * ========================================================================
707 * ==== IOCB MANAGEMENT ROUTINES ==========================================
708 * ========================================================================
709 */
710
711 /**
712 * iocb_init - Initialize the fields of an IOCB
713 * @tip: Per-thread information
714 * iocbp: IOCB pointer to update
715 */
iocb_init(struct thr_info * tip,struct iocb_pkt * iocbp)716 static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp)
717 {
718 iocbp->tip = tip;
719 iocbp->nbytes = 0;
720 iocbp->iocb.u.c.buf = NULL;
721 }
722
723 /**
724 * iocb_setup - Set up an iocb with this AIOs information
725 * @iocbp: IOCB pointer to update
726 * @rw: Direction (0 == write, 1 == read)
727 * @n: Number of bytes to transfer
728 * @off: Offset (in bytes)
729 */
iocb_setup(struct iocb_pkt * iocbp,int rw,int n,long long off)730 static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off)
731 {
732 char *buf;
733 struct iocb *iop = &iocbp->iocb;
734
735 assert(rw == 0 || rw == 1);
736 assert(0 < n && (n % nb_sec) == 0);
737 assert(0 <= off);
738
739 if (iocbp->nbytes) {
740 if (iocbp->nbytes >= n) {
741 buf = iop->u.c.buf;
742 goto prep;
743 }
744
745 assert(iop->u.c.buf);
746 free(iop->u.c.buf);
747 }
748
749 buf = buf_alloc(n);
750 iocbp->nbytes = n;
751
752 prep:
753 if (rw)
754 io_prep_pread(iop, iocbp->tip->ofd, buf, n, off);
755 else {
756 assert(write_enabled);
757 io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off);
758 touch_memory(buf, n);
759 }
760
761 iop->data = iocbp;
762 }
763
764 /*
765 * ========================================================================
766 * ==== PER-THREAD SET UP & TEAR DOWN =====================================
767 * ========================================================================
768 */
769
770 /**
771 * tip_init - Per thread initialization function
772 */
tip_init(struct thr_info * tip)773 static void tip_init(struct thr_info *tip)
774 {
775 int i;
776
777 INIT_LIST_HEAD(&tip->free_iocbs);
778 INIT_LIST_HEAD(&tip->used_iocbs);
779
780 pthread_mutex_init(&tip->mutex, NULL);
781 pthread_cond_init(&tip->cond, NULL);
782
783 if (io_setup(naios, &tip->ctx)) {
784 fatal("io_setup", ERR_SYSCALL, "io_setup failed\n");
785 /*NOTREACHED*/
786 }
787
788 tip->ofd = -1;
789 tip->naios_out = 0;
790 tip->send_done = tip->reap_done = 0;
791 tip->send_wait = tip->reap_wait = 0;
792
793 memset(&tip->sub_thread, 0, sizeof(tip->sub_thread));
794 memset(&tip->rec_thread, 0, sizeof(tip->rec_thread));
795
796 for (i = 0; i < naios; i++) {
797 struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp));
798
799 iocb_init(tip, iocbp);
800 list_add_tail(&iocbp->head, &tip->free_iocbs);
801 }
802 tip->naios_free = naios;
803
804 if (verbose > 1) {
805 char fn[MAXPATHLEN];
806
807 sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase,
808 tip->cpu);
809 tip->vfp = fopen(fn, "w");
810 if (!tip->vfp) {
811 fatal(fn, ERR_SYSCALL, "Failed to open report\n");
812 /*NOTREACHED*/
813 }
814
815 setlinebuf(tip->vfp);
816 }
817
818 if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) {
819 fatal("pthread_create", ERR_SYSCALL,
820 "thread create failed\n");
821 /*NOTREACHED*/
822 }
823
824 if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) {
825 fatal("pthread_create", ERR_SYSCALL,
826 "thread create failed\n");
827 /*NOTREACHED*/
828 }
829 }
830
831 /**
832 * tip_release - Release resources associated with this thread
833 */
tip_release(struct thr_info * tip)834 static void tip_release(struct thr_info *tip)
835 {
836 struct list_head *p, *q;
837
838 assert(tip->send_done);
839 assert(tip->reap_done);
840 assert(list_len(&tip->used_iocbs) == 0);
841 assert(tip->naios_free == naios);
842
843 if (pthread_join(tip->sub_thread, NULL)) {
844 fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n");
845 /*NOTREACHED*/
846 }
847 if (pthread_join(tip->rec_thread, NULL)) {
848 fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n");
849 /*NOTREACHED*/
850 }
851
852 io_destroy(tip->ctx);
853
854 list_splice(&tip->used_iocbs, &tip->free_iocbs);
855 list_for_each_safe(p, q, &tip->free_iocbs) {
856 struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head);
857
858 list_del(&iocbp->head);
859 if (iocbp->nbytes)
860 free(iocbp->iocb.u.c.buf);
861 free(iocbp);
862 }
863
864 pthread_cond_destroy(&tip->cond);
865 pthread_mutex_destroy(&tip->mutex);
866 }
867
868 /**
869 * add_input_file - Allocate and initialize per-input file structure
870 * @cpu: CPU for this file
871 * @devnm: Device name for this file
872 * @file_name: Fully qualifed input file name
873 */
add_input_file(int cpu,char * devnm,char * file_name)874 static void add_input_file(int cpu, char *devnm, char *file_name)
875 {
876 struct stat buf;
877 struct io_file_hdr hdr;
878 struct thr_info *tip = buf_alloc(sizeof(*tip));
879 __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
880
881 assert(0 <= cpu && cpu < ncpus);
882
883 memset(&hdr, 0, sizeof(hdr));
884 memset(tip, 0, sizeof(*tip));
885 tip->cpu = cpu % cpus_to_use;
886 tip->iterations = def_iterations;
887
888 tip->ifd = open(file_name, O_RDONLY);
889 if (tip->ifd < 0) {
890 fatal(file_name, ERR_ARGS, "Unable to open\n");
891 /*NOTREACHED*/
892 }
893 if (fstat(tip->ifd, &buf) < 0) {
894 fatal(file_name, ERR_SYSCALL, "fstat failed\n");
895 /*NOTREACHED*/
896 }
897 if (buf.st_size < (off_t)sizeof(hdr)) {
898 if (verbose)
899 fprintf(stderr, "\t%s empty\n", file_name);
900 goto empty_file;
901 }
902
903 if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
904 fatal(file_name, ERR_ARGS, "Header read failed\n");
905 /*NOTREACHED*/
906 }
907
908 if (hdr.version != my_version) {
909 fprintf(stderr, "%llx %llx %llx %llx\n",
910 (long long unsigned)hdr.version,
911 (long long unsigned)hdr.genesis,
912 (long long unsigned)hdr.nbunches,
913 (long long unsigned)hdr.total_pkts);
914 fatal(NULL, ERR_ARGS,
915 "BT version mismatch: %lx versus my %lx\n",
916 (long)hdr.version, (long)my_version);
917
918 }
919
920 if (hdr.nbunches == 0) {
921 empty_file:
922 close(tip->ifd);
923 free(tip);
924 return;
925 }
926
927 if (hdr.genesis < genesis) {
928 if (verbose > 1)
929 fprintf(stderr, "Setting genesis to %llu.%llu\n",
930 du64_to_sec(hdr.genesis),
931 du64_to_nsec(hdr.genesis));
932 genesis = hdr.genesis;
933 }
934
935 tip->devnm = strdup(devnm);
936 tip->file_name = strdup(file_name);
937
938 list_add_tail(&tip->head, &input_files);
939
940 if (verbose)
941 fprintf(stderr, "Added %s %llu\n", file_name,
942 (long long)hdr.genesis);
943 }
944
945 /**
946 * rem_input_file - Release resources associated with an input file
947 * @tip: Per-input file information
948 */
rem_input_file(struct thr_info * tip)949 static void rem_input_file(struct thr_info *tip)
950 {
951 list_del(&tip->head);
952
953 tip_release(tip);
954
955 close(tip->ofd);
956 close(tip->ifd);
957 free(tip->file_name);
958 free(tip->devnm);
959 free(tip);
960 }
961
962 /**
963 * rem_input_files - Remove all input files
964 */
rem_input_files(void)965 static void rem_input_files(void)
966 {
967 struct list_head *p, *q;
968
969 list_for_each_safe(p, q, &input_files) {
970 rem_input_file(list_entry(p, struct thr_info, head));
971 }
972 }
973
974 /**
975 * __find_input_files - Find input files associated with this device (per cpu)
976 */
__find_input_files(struct dev_info * dip)977 static void __find_input_files(struct dev_info *dip)
978 {
979 int cpu = 0;
980
981 for (;;) {
982 char full_name[MAXPATHLEN];
983
984 sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu);
985 if (access(full_name, R_OK) != 0)
986 break;
987
988 add_input_file(cpu, dip->devnm, full_name);
989 cpu++;
990 }
991
992 if (!cpu) {
993 fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm);
994 /*NOTREACHED*/
995 }
996
997 rem_input_dev(dip);
998 }
999
1000
1001 /**
1002 * find_input_files - Find input files for all devices
1003 */
find_input_files(void)1004 static void find_input_files(void)
1005 {
1006 struct list_head *p, *q;
1007
1008 list_for_each_safe(p, q, &input_devs) {
1009 __find_input_files(list_entry(p, struct dev_info, head));
1010 }
1011 }
1012
1013 /*
1014 * ========================================================================
1015 * ==== RECLAIM ROUTINES ==================================================
1016 * ========================================================================
1017 */
1018
1019 /**
1020 * reap_wait_aios - Wait for and return number of outstanding AIOs
1021 *
1022 * Will return 0 if we are done
1023 */
reap_wait_aios(struct thr_info * tip)1024 static int reap_wait_aios(struct thr_info *tip)
1025 {
1026 int naios = 0;
1027
1028 if (!is_reap_done(tip)) {
1029 pthread_mutex_lock(&tip->mutex);
1030 while (tip->naios_out == 0) {
1031 tip->reap_wait = 1;
1032 if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1033 fatal("pthread_cond_wait", ERR_SYSCALL,
1034 "nfree_current cond wait failed\n");
1035 /*NOTREACHED*/
1036 }
1037 }
1038 naios = tip->naios_out;
1039 pthread_mutex_unlock(&tip->mutex);
1040 }
1041 assert(is_reap_done(tip) || naios > 0);
1042
1043 return is_reap_done(tip) ? 0 : naios;
1044 }
1045
1046 /**
1047 * reclaim_ios - Reclaim AIOs completed, recycle IOCBs
1048 * @tip: Per-thread information
1049 * @naios_out: Number of AIOs we have outstanding (min)
1050 */
reclaim_ios(struct thr_info * tip,long naios_out)1051 static void reclaim_ios(struct thr_info *tip, long naios_out)
1052 {
1053 long i, ndone;
1054 struct io_event *evp, events[naios_out];
1055
1056 again:
1057 assert(naios > 0);
1058 for (;;) {
1059 ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL);
1060 if (ndone > 0)
1061 break;
1062
1063 if (errno && errno != EINTR) {
1064 fatal("io_getevents", ERR_SYSCALL,
1065 "io_getevents failed\n");
1066 /*NOTREACHED*/
1067 }
1068 }
1069 assert(0 < ndone && ndone <= naios_out);
1070
1071 pthread_mutex_lock(&tip->mutex);
1072 for (i = 0, evp = events; i < ndone; i++, evp++) {
1073 struct iocb_pkt *iocbp = evp->data;
1074
1075 if (evp->res != iocbp->iocb.u.c.nbytes) {
1076 fatal(NULL, ERR_SYSCALL,
1077 "Event failure %ld/%ld\t(%ld + %ld)\n",
1078 (long)evp->res, (long)evp->res2,
1079 (long)iocbp->iocb.u.c.offset / nb_sec,
1080 (long)iocbp->iocb.u.c.nbytes / nb_sec);
1081 /*NOTREACHED*/
1082 }
1083
1084 list_move_tail(&iocbp->head, &tip->free_iocbs);
1085 }
1086
1087 tip->naios_free += ndone;
1088 tip->naios_out -= ndone;
1089 naios_out = minl(naios_out, tip->naios_out);
1090
1091 if (tip->send_wait) {
1092 tip->send_wait = 0;
1093 pthread_cond_signal(&tip->cond);
1094 }
1095 pthread_mutex_unlock(&tip->mutex);
1096
1097 /*
1098 * Short cut: If we /know/ there are some more AIOs, go handle them
1099 */
1100 if (naios_out)
1101 goto again;
1102 }
1103
1104 /**
1105 * replay_rec - Worker thread to reclaim AIOs
1106 * @arg: Pointer to thread information
1107 */
replay_rec(void * arg)1108 static void *replay_rec(void *arg)
1109 {
1110 long naios_out;
1111 struct thr_info *tip = arg;
1112
1113 while ((naios_out = reap_wait_aios(tip)) > 0)
1114 reclaim_ios(tip, naios_out);
1115
1116 assert(tip->send_done);
1117 tip->reap_done = 1;
1118 set_reclaim_done();
1119
1120 return NULL;
1121 }
1122
1123 /*
1124 * ========================================================================
1125 * ==== REPLAY ROUTINES ===================================================
1126 * ========================================================================
1127 */
1128
1129 /**
1130 * next_bunch - Retrieve next bunch of AIOs to process
1131 * @tip: Per-thread information
1132 * @bunch: Bunch information
1133 *
1134 * Returns TRUE if we recovered a bunch of IOs, else hit EOF
1135 */
next_bunch(struct thr_info * tip,struct io_bunch * bunch)1136 static int next_bunch(struct thr_info *tip, struct io_bunch *bunch)
1137 {
1138 size_t count, result;
1139
1140 result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr));
1141 if (result != sizeof(bunch->hdr)) {
1142 if (result == 0)
1143 return 0;
1144
1145 fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n",
1146 (long)result);
1147 /*NOTREACHED*/
1148 }
1149 assert(bunch->hdr.npkts <= BT_MAX_PKTS);
1150
1151 count = bunch->hdr.npkts * sizeof(struct io_pkt);
1152 result = read(tip->ifd, &bunch->pkts, count);
1153 if (result != count) {
1154 fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n",
1155 (long)result, (long)count);
1156 /*NOTREACHED*/
1157 }
1158
1159 return 1;
1160 }
1161
1162 /**
1163 * nfree_current - Returns current number of AIOs that are free
1164 *
1165 * Will wait for available ones...
1166 *
1167 * Returns 0 if we have some condition that causes us to exit
1168 */
nfree_current(struct thr_info * tip)1169 static int nfree_current(struct thr_info *tip)
1170 {
1171 int nfree = 0;
1172
1173 pthread_mutex_lock(&tip->mutex);
1174 while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) {
1175 tip->send_wait = 1;
1176 if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1177 fatal("pthread_cond_wait", ERR_SYSCALL,
1178 "nfree_current cond wait failed\n");
1179 /*NOTREACHED*/
1180 }
1181 }
1182 pthread_mutex_unlock(&tip->mutex);
1183
1184 return nfree;
1185 }
1186
1187 /**
1188 * stall - Stall for the number of nanoseconds requested
1189 *
1190 * We may be late, in which case we just return.
1191 */
stall(struct thr_info * tip,long long oclock)1192 static void stall(struct thr_info *tip, long long oclock)
1193 {
1194 struct timespec req;
1195 long long dreal, tclock = gettime() - rgenesis;
1196
1197 oclock /= acc_factor;
1198
1199 if (verbose > 1)
1200 fprintf(tip->vfp, " stall(%lld.%09lld, %lld.%09lld)\n",
1201 du64_to_sec(oclock), du64_to_nsec(oclock),
1202 du64_to_sec(tclock), du64_to_nsec(tclock));
1203
1204 while (!is_send_done(tip) && tclock < oclock) {
1205 dreal = oclock - tclock;
1206 req.tv_sec = dreal / (1000 * 1000 * 1000);
1207 req.tv_nsec = dreal % (1000 * 1000 * 1000);
1208
1209 if (verbose > 1) {
1210 fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n",
1211 (long long)req.tv_sec,
1212 (long long)req.tv_nsec);
1213 }
1214
1215 if (nanosleep(&req, NULL) < 0 && signal_done)
1216 break;
1217
1218 tclock = gettime() - rgenesis;
1219 }
1220 }
1221
1222 /**
1223 * iocbs_map - Map a set of AIOs onto a set of IOCBs
1224 * @tip: Per-thread information
1225 * @list: List of AIOs created
1226 * @pkts: AIOs to map
1227 * @ntodo: Number of AIOs to map
1228 */
iocbs_map(struct thr_info * tip,struct iocb ** list,struct io_pkt * pkts,int ntodo)1229 static void iocbs_map(struct thr_info *tip, struct iocb **list,
1230 struct io_pkt *pkts, int ntodo)
1231 {
1232 int i;
1233 struct io_pkt *pkt;
1234
1235 assert(0 < ntodo && ntodo <= naios);
1236
1237 pthread_mutex_lock(&tip->mutex);
1238 assert(ntodo <= list_len(&tip->free_iocbs));
1239 for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) {
1240 __u32 rw = pkt->rw;
1241 struct iocb_pkt *iocbp;
1242
1243 if (!pkt->rw && !write_enabled)
1244 rw = 1;
1245
1246 if (verbose > 1)
1247 fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n",
1248 (unsigned long long)pkt->sector,
1249 (unsigned long long)pkt->nbytes / nb_sec,
1250 rw ? 'R' : 'W',
1251 (rw == 1 && pkt->rw == 0) ? '!' : ' ');
1252
1253 iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head);
1254 iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec);
1255
1256 list_move_tail(&iocbp->head, &tip->used_iocbs);
1257 list[i] = &iocbp->iocb;
1258 }
1259
1260 tip->naios_free -= ntodo;
1261 assert(tip->naios_free >= 0);
1262 pthread_mutex_unlock(&tip->mutex);
1263 }
1264
1265 /**
1266 * process_bunch - Process a bunch of requests
1267 * @tip: Per-thread information
1268 * @bunch: Bunch to process
1269 */
process_bunch(struct thr_info * tip,struct io_bunch * bunch)1270 static void process_bunch(struct thr_info *tip, struct io_bunch *bunch)
1271 {
1272 __u64 i = 0;
1273 struct iocb *list[bunch->hdr.npkts];
1274
1275 assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS);
1276 while (!is_send_done(tip) && (i < bunch->hdr.npkts)) {
1277 long ndone;
1278 int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i);
1279
1280 assert(0 < ntodo && ntodo <= naios);
1281 iocbs_map(tip, list, &bunch->pkts[i], ntodo);
1282 if (!no_stalls)
1283 stall(tip, bunch->hdr.time_stamp - genesis);
1284
1285 if (ntodo) {
1286 if (verbose > 1)
1287 fprintf(tip->vfp, "submit(%d)\n", ntodo);
1288 ndone = io_submit(tip->ctx, ntodo, list);
1289 if (ndone != (long)ntodo) {
1290 fatal("io_submit", ERR_SYSCALL,
1291 "%d: io_submit(%d:%ld) failed (%s)\n",
1292 tip->cpu, ntodo, ndone,
1293 strerror(labs(ndone)));
1294 /*NOTREACHED*/
1295 }
1296
1297 pthread_mutex_lock(&tip->mutex);
1298 tip->naios_out += ndone;
1299 assert(tip->naios_out <= naios);
1300 if (tip->reap_wait) {
1301 tip->reap_wait = 0;
1302 pthread_cond_signal(&tip->cond);
1303 }
1304 pthread_mutex_unlock(&tip->mutex);
1305
1306 i += ndone;
1307 assert(i <= bunch->hdr.npkts);
1308 }
1309 }
1310 }
1311
1312 /**
1313 * reset_input_file - Reset the input file for the next iteration
1314 * @tip: Thread information
1315 *
1316 * We also do a dummy read of the file header to get us to the first bunch.
1317 */
reset_input_file(struct thr_info * tip)1318 static void reset_input_file(struct thr_info *tip)
1319 {
1320 struct io_file_hdr hdr;
1321
1322 lseek(tip->ifd, 0, 0);
1323
1324 if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
1325 fatal(tip->file_name, ERR_ARGS, "Header reread failed\n");
1326 /*NOTREACHED*/
1327 }
1328 }
1329
1330 /**
1331 * replay_sub - Worker thread to submit AIOs that are being replayed
1332 */
replay_sub(void * arg)1333 static void *replay_sub(void *arg)
1334 {
1335 unsigned int i;
1336 char *mdev;
1337 char path[MAXPATHLEN];
1338 struct io_bunch bunch;
1339 struct thr_info *tip = arg;
1340 int oflags;
1341
1342 pin_to_cpu(tip);
1343
1344 mdev = map_dev(tip->devnm);
1345 sprintf(path, "/dev/%s", mdev);
1346 /*
1347 * convert underscores to slashes to
1348 * restore device names that have larger paths
1349 */
1350 for (i = 0; i < strlen(mdev); i++)
1351 if (path[strlen("/dev/") + i] == '_')
1352 path[strlen("/dev/") + i] = '/';
1353 #ifdef O_NOATIME
1354 oflags = O_NOATIME;
1355 #else
1356 oflags = 0;
1357 #endif
1358 tip->ofd = open(path, O_RDWR | O_DIRECT | oflags);
1359 if (tip->ofd < 0) {
1360 fatal(path, ERR_SYSCALL, "Failed device open\n");
1361 /*NOTREACHED*/
1362 }
1363
1364 set_replay_ready();
1365 while (!is_send_done(tip) && tip->iterations--) {
1366 wait_iter_start();
1367 if (verbose > 1)
1368 fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations);
1369 while (!is_send_done(tip) && next_bunch(tip, &bunch))
1370 process_bunch(tip, &bunch);
1371 set_iter_done();
1372 reset_input_file(tip);
1373 }
1374 tip->send_done = 1;
1375 set_replay_done();
1376
1377 return NULL;
1378 }
1379
1380 /*
1381 * ========================================================================
1382 * ==== COMMAND LINE ARGUMENT HANDLING ====================================
1383 * ========================================================================
1384 */
1385
1386 static char usage_str[] = \
1387 "\n" \
1388 "\t[ -c <cpus> : --cpus=<cpus> ] Default: 1\n" \
1389 "\t[ -d <dir> : --input-directory=<dir> ] Default: .\n" \
1390 "\t[ -F : --find-records ] Default: Off\n" \
1391 "\t[ -h : --help ] Default: Off\n" \
1392 "\t[ -i <base> : --input-base=<base> ] Default: replay\n" \
1393 "\t[ -I <iters>: --iterations=<iters> ] Default: 1\n" \
1394 "\t[ -M <file> : --map-devs=<file> ] Default: None\n" \
1395 "\t[ -N : --no-stalls ] Default: Off\n" \
1396 "\t[ -x : --acc-factor ] Default: 1\n" \
1397 "\t[ -v : --verbose ] Default: Off\n" \
1398 "\t[ -V : --version ] Default: Off\n" \
1399 "\t[ -W : --write-enable ] Default: Off\n" \
1400 "\t<dev...> Default: None\n" \
1401 "\n";
1402
1403 #define S_OPTS "c:d:Fhi:I:M:Nx:t:vVW"
1404 static struct option l_opts[] = {
1405 {
1406 .name = "cpus",
1407 .has_arg = required_argument,
1408 .flag = NULL,
1409 .val = 'c'
1410 },
1411 {
1412 .name = "input-directory",
1413 .has_arg = required_argument,
1414 .flag = NULL,
1415 .val = 'd'
1416 },
1417 {
1418 .name = "find-records",
1419 .has_arg = no_argument,
1420 .flag = NULL,
1421 .val = 'F'
1422 },
1423 {
1424 .name = "help",
1425 .has_arg = no_argument,
1426 .flag = NULL,
1427 .val = 'h'
1428 },
1429 {
1430 .name = "input-base",
1431 .has_arg = required_argument,
1432 .flag = NULL,
1433 .val = 'i'
1434 },
1435 {
1436 .name = "iterations",
1437 .has_arg = required_argument,
1438 .flag = NULL,
1439 .val = 'I'
1440 },
1441 {
1442 .name = "map-devs",
1443 .has_arg = required_argument,
1444 .flag = NULL,
1445 .val = 'M'
1446 },
1447 {
1448 .name = "no-stalls",
1449 .has_arg = no_argument,
1450 .flag = NULL,
1451 .val = 'N'
1452 },
1453 {
1454 .name = "acc-factor",
1455 .has_arg = required_argument,
1456 .flag = NULL,
1457 .val = 'x'
1458 },
1459 {
1460 .name = "verbose",
1461 .has_arg = no_argument,
1462 .flag = NULL,
1463 .val = 'v'
1464 },
1465 {
1466 .name = "version",
1467 .has_arg = no_argument,
1468 .flag = NULL,
1469 .val = 'V'
1470 },
1471 {
1472 .name = "write-enable",
1473 .has_arg = no_argument,
1474 .flag = NULL,
1475 .val = 'W'
1476 },
1477 {
1478 .name = NULL
1479 }
1480 };
1481
1482 /**
1483 * handle_args: Parse passed in argument list
1484 * @argc: Number of arguments in argv
1485 * @argv: Arguments passed in
1486 *
1487 * Does rudimentary parameter verification as well.
1488 */
handle_args(int argc,char * argv[])1489 static void handle_args(int argc, char *argv[])
1490 {
1491 int c;
1492 int r;
1493
1494 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1495 switch (c) {
1496 case 'c':
1497 cpus_to_use = atoi(optarg);
1498 if (cpus_to_use <= 0 || cpus_to_use > ncpus) {
1499 fatal(NULL, ERR_ARGS,
1500 "Invalid number of cpus %d (0<x<%d)\n",
1501 cpus_to_use, ncpus);
1502 /*NOTREACHED*/
1503 }
1504 break;
1505
1506 case 'd':
1507 idir = optarg;
1508 if (access(idir, R_OK | X_OK) != 0) {
1509 fatal(idir, ERR_ARGS,
1510 "Invalid input directory specified\n");
1511 /*NOTREACHED*/
1512 }
1513 break;
1514
1515 case 'F':
1516 find_records = 1;
1517 break;
1518
1519 case 'h':
1520 usage();
1521 exit(0);
1522 /*NOTREACHED*/
1523
1524 case 'i':
1525 ibase = optarg;
1526 break;
1527
1528 case 'I':
1529 def_iterations = atoi(optarg);
1530 if (def_iterations <= 0) {
1531 fprintf(stderr,
1532 "Invalid number of iterations %d\n",
1533 def_iterations);
1534 exit(ERR_ARGS);
1535 /*NOTREACHED*/
1536 }
1537 break;
1538
1539 case 'M':
1540 read_map_devs(optarg);
1541 break;
1542
1543 case 'N':
1544 no_stalls = 1;
1545 break;
1546
1547 case 'x':
1548 r = sscanf(optarg,"%u",&acc_factor);
1549 if (r!=1) {
1550 fprintf(stderr,
1551 "Invalid acceleration factor\n");
1552 exit(ERR_ARGS);
1553 /*NOTREACHED*/
1554 }
1555 break;
1556
1557 case 'V':
1558 fprintf(stderr, "btreplay -- version %s\n",
1559 my_btversion);
1560 exit(0);
1561 /*NOTREACHED*/
1562
1563 case 'v':
1564 verbose++;
1565 break;
1566
1567 case 'W':
1568 write_enabled = 1;
1569 break;
1570
1571 default:
1572 usage();
1573 fatal(NULL, ERR_ARGS,
1574 "Invalid command line argument %c\n", c);
1575 /*NOTREACHED*/
1576 }
1577 }
1578
1579 while (optind < argc)
1580 add_input_dev(argv[optind++]);
1581
1582 if (find_records)
1583 find_input_devs(idir);
1584
1585 if (list_len(&input_devs) == 0) {
1586 fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n");
1587 /*NOTREACHED*/
1588 }
1589
1590 if (cpus_to_use < 0)
1591 cpus_to_use = ncpus;
1592 }
1593
1594 /*
1595 * ========================================================================
1596 * ==== MAIN ROUTINE ======================================================
1597 * ========================================================================
1598 */
1599
1600 /**
1601 * set_signal_done - Signal handler, catches signals & sets signal_done
1602 */
set_signal_done(int signum)1603 static void set_signal_done(__attribute__((__unused__))int signum)
1604 {
1605 signal_done = 1;
1606 }
1607
1608 /**
1609 * main -
1610 * @argc: Number of arguments
1611 * @argv: Array of arguments
1612 */
main(int argc,char * argv[])1613 int main(int argc, char *argv[])
1614 {
1615 int i;
1616 struct list_head *p;
1617
1618 pgsize = getpagesize();
1619 assert(pgsize > 0);
1620
1621 setup_signal(SIGINT, set_signal_done);
1622 setup_signal(SIGTERM, set_signal_done);
1623
1624 get_ncpus();
1625 handle_args(argc, argv);
1626 find_input_files();
1627
1628 nfiles = list_len(&input_files);
1629 __list_for_each(p, &input_files) {
1630 tip_init(list_entry(p, struct thr_info, head));
1631 }
1632
1633 wait_replays_ready();
1634 for (i = 0; i < def_iterations; i++) {
1635 rgenesis = gettime();
1636 start_iter();
1637 if (verbose)
1638 fprintf(stderr, "I");
1639 wait_iters_done();
1640 }
1641
1642 wait_replays_done();
1643 wait_reclaims_done();
1644
1645 if (verbose)
1646 fprintf(stderr, "\n");
1647
1648 rem_input_files();
1649 release_map_devs();
1650
1651 return 0;
1652 }
1653