1 /*
2 * version of copy command using async i/o
3 * From: Stephen Hemminger <shemminger@osdl.org>
4 * Modified by Daniel McNeil <daniel@osdl.org> for testing aio.
5 * - added -a alignment
6 * - added -b blksize option
7 * _ added -s size option
8 * - added -f open_flag option
9 * - added -w (no write) option (reads from source only)
10 * - added -n (num aio) option
11 * - added -z (zero dest) opton (writes zeros to dest only)
12 * - added -D delay_ms option
13 *
14 * Copy file by using a async I/O state machine.
15 * 1. Start read request
16 * 2. When read completes turn it into a write request
17 * 3. When write completes decrement counter and free resources
18 *
19 *
20 * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize]
21 * [-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest
22 */
23
24 #define _GNU_SOURCE
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/param.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <mntent.h>
35 #include <sys/select.h>
36 #include <sys/mount.h>
37 #include "config.h"
38
39 #if HAVE_LIBAIO_H
40
41 #include <libaio.h>
42
43 #define AIO_BLKSIZE (64*1024)
44 #define AIO_MAXIO 32
45
46 static int aio_blksize = AIO_BLKSIZE;
47 static int aio_maxio = AIO_MAXIO;
48
49 static int busy = 0; // # of I/O's in flight
50 static int tocopy = 0; // # of blocks left to copy
51 static int srcfd; // source fd
52 static int srcfd2; // source fd - end of file non-sector
53 static int dstfd = -1; // destination file descriptor
54 static int dstfd2 = -1; // Handle end of file for non-sector size
55 static const char *dstname = NULL;
56 static const char *srcname = NULL;
57 static int source_open_flag = O_RDONLY; /* open flags on source file */
58 static int dest_open_flag = O_WRONLY; /* open flags on dest file */
59 static int no_write; /* do not write */
60 static int zero; /* write zero's only */
61
62 static int debug;
63 static int count_io_q_waits; /* how many time io_queue_wait called */
64
65 struct iocb **iocb_free; /* array of pointers to iocb */
66 int iocb_free_count; /* current free count */
67 int alignment = 512; /* buffer alignment */
68
69 struct timeval delay; /* delay between i/o */
70
dev_block_size_by_path(const char * path)71 static int dev_block_size_by_path(const char *path)
72 {
73 FILE *f;
74 struct mntent *mnt;
75 size_t prefix_len, prefix_max = 0;
76 char dev_name[1024];
77 int fd, size;
78
79 if (!path)
80 return 0;
81
82 f = setmntent("/proc/mounts", "r");
83 if (!f) {
84 fprintf(stderr, "Failed to open /proc/mounts\n");
85 return 0;
86 }
87
88 while ((mnt = getmntent(f))) {
89 /* Skip pseudo fs */
90 if (mnt->mnt_fsname[0] != '/')
91 continue;
92
93 prefix_len = strlen(mnt->mnt_dir);
94
95 if (prefix_len > prefix_max &&
96 !strncmp(path, mnt->mnt_dir, prefix_len)) {
97 prefix_max = prefix_len;
98 strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name));
99 dev_name[sizeof(dev_name)-1] = '\0';
100 }
101 }
102
103 endmntent(f);
104
105 if (!prefix_max) {
106 fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path);
107 return 0;
108 }
109
110 printf("Path '%s' is on device '%s'\n", path, dev_name);
111
112 fd = open(dev_name, O_RDONLY);
113 if (!fd) {
114 fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno));
115 return 0;
116 }
117
118 if (ioctl(fd, BLKSSZGET, &size)) {
119 fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno));
120 close(fd);
121 return 0;
122 }
123
124 close(fd);
125 printf("'%s' has block size %i\n", dev_name, size);
126
127 return size;
128 }
129
init_iocb(int n,int iosize)130 int init_iocb(int n, int iosize)
131 {
132 void *buf;
133 int i;
134
135 if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) {
136 return -1;
137 }
138
139 for (i = 0; i < n; i++) {
140 if (!
141 (iocb_free[i] = malloc(sizeof(struct iocb))))
142 return -1;
143 if (posix_memalign(&buf, alignment, iosize))
144 return -1;
145 if (debug > 1) {
146 printf("buf allocated at 0x%p, align:%d\n",
147 buf, alignment);
148 }
149 if (zero) {
150 /*
151 * We are writing zero's to dstfd
152 */
153 memset(buf, 0, iosize);
154 }
155 io_prep_pread(iocb_free[i], -1, buf, iosize, 0);
156 }
157 iocb_free_count = i;
158 return 0;
159 }
160
alloc_iocb(void)161 static struct iocb *alloc_iocb(void)
162 {
163 if (!iocb_free_count)
164 return 0;
165 return iocb_free[--iocb_free_count];
166 }
167
free_iocb(struct iocb * io)168 void free_iocb(struct iocb *io)
169 {
170 iocb_free[iocb_free_count++] = io;
171 }
172
173 /*
174 * io_wait_run() - wait for an io_event and then call the callback.
175 */
io_wait_run(io_context_t ctx,struct timespec * to)176 int io_wait_run(io_context_t ctx, struct timespec *to)
177 {
178 struct io_event events[aio_maxio];
179 struct io_event *ep;
180 int ret, n;
181
182 /*
183 * get up to aio_maxio events at a time.
184 */
185 ret = n = io_getevents(ctx, 1, aio_maxio, events, to);
186
187 /*
188 * Call the callback functions for each event.
189 */
190 for (ep = events; n-- > 0; ep++) {
191 io_callback_t cb = (io_callback_t) ep->data;
192 struct iocb *iocb = ep->obj;
193
194 if (debug > 1) {
195 fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n",
196 ep, iocb, ep->res, ep->res2);
197 }
198 cb(ctx, iocb, ep->res, ep->res2);
199 }
200 return ret;
201 }
202
203 /* Fatal error handler */
io_error(const char * func,int rc)204 static void io_error(const char *func, int rc)
205 {
206 if (rc == -ENOSYS)
207 fprintf(stderr, "AIO not in this kernel\n");
208 else if (rc < 0)
209 fprintf(stderr, "%s: %s\n", func, strerror(-rc));
210 else
211 fprintf(stderr, "%s: error %d\n", func, rc);
212
213 if (dstfd > 0)
214 close(dstfd);
215 if (dstname && dest_open_flag & O_CREAT)
216 unlink(dstname);
217 exit(1);
218 }
219
220 /*
221 * Write complete callback.
222 * Adjust counts and free resources
223 */
wr_done(io_context_t ctx,struct iocb * iocb,long res,long res2)224 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
225 {
226 if (res2 != 0) {
227 io_error("aio write", res2);
228 }
229 if (res != iocb->u.c.nbytes) {
230 fprintf(stderr, "write missed bytes expect %lu got %ld\n",
231 iocb->u.c.nbytes, res);
232 exit(1);
233 }
234 --tocopy;
235 --busy;
236 free_iocb(iocb);
237 if (debug)
238 write(2, "w", 1);
239 }
240
241 /*
242 * Read complete callback.
243 * Change read iocb into a write iocb and start it.
244 */
rd_done(io_context_t ctx,struct iocb * iocb,long res,long res2)245 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
246 {
247 /* library needs accessors to look at iocb? */
248 int iosize = iocb->u.c.nbytes;
249 char *buf = iocb->u.c.buf;
250 off_t offset = iocb->u.c.offset;
251
252 if (res2 != 0)
253 io_error("aio read", res2);
254 if (res != iosize) {
255 fprintf(stderr, "read missing bytes expect %lu got %ld\n",
256 iocb->u.c.nbytes, res);
257 exit(1);
258 }
259
260 /* turn read into write */
261 if (no_write) {
262 --tocopy;
263 --busy;
264 free_iocb(iocb);
265 } else {
266 int fd;
267 if (iocb->aio_fildes == srcfd)
268 fd = dstfd;
269 else
270 fd = dstfd2;
271 io_prep_pwrite(iocb, fd, buf, iosize, offset);
272 io_set_callback(iocb, wr_done);
273 if (1 != (res = io_submit(ctx, 1, &iocb)))
274 io_error("io_submit write", res);
275 }
276 if (debug)
277 write(2, "r", 1);
278 if (debug > 1)
279 printf("%d", iosize);
280 }
281
usage(void)282 static void usage(void)
283 {
284 fprintf(stderr,
285 "Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]"
286 " [-f open_flag] SOURCE DEST\n"
287 "This copies from SOURCE to DEST using AIO.\n\n"
288 "Usage: aiocp [options] -w SOURCE\n"
289 "This does sequential AIO reads (no writes).\n\n"
290 "Usage: aiocp [options] -z DEST\n"
291 "This does sequential AIO writes of zeros.\n");
292
293 exit(1);
294 }
295
296 /*
297 * Scale value by kilo, mega, or giga.
298 */
scale_by_kmg(long long value,char scale)299 long long scale_by_kmg(long long value, char scale)
300 {
301 switch (scale) {
302 case 'g':
303 case 'G':
304 value *= 1024;
305 case 'm':
306 case 'M':
307 value *= 1024;
308 case 'k':
309 case 'K':
310 value *= 1024;
311 break;
312 case '\0':
313 break;
314 default:
315 usage();
316 break;
317 }
318 return value;
319 }
320
main(int argc,char * const * argv)321 int main(int argc, char *const *argv)
322 {
323 struct stat st;
324 off_t length = 0, offset = 0;
325 off_t leftover = 0;
326 io_context_t myctx;
327 int c;
328 extern char *optarg;
329 extern int optind, opterr, optopt;
330
331 while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) {
332 char *endp;
333
334 switch (c) {
335 case 'a': /* alignment of data buffer */
336 alignment = strtol(optarg, &endp, 0);
337 alignment = (long)scale_by_kmg((long long)alignment,
338 *endp);
339 break;
340 case 'f': /* use these open flags */
341 if (strcmp(optarg, "LARGEFILE") == 0 ||
342 strcmp(optarg, "O_LARGEFILE") == 0) {
343 source_open_flag |= O_LARGEFILE;
344 dest_open_flag |= O_LARGEFILE;
345 } else if (strcmp(optarg, "TRUNC") == 0 ||
346 strcmp(optarg, "O_TRUNC") == 0) {
347 dest_open_flag |= O_TRUNC;
348 } else if (strcmp(optarg, "SYNC") == 0 ||
349 strcmp(optarg, "O_SYNC") == 0) {
350 dest_open_flag |= O_SYNC;
351 } else if (strcmp(optarg, "DIRECT") == 0 ||
352 strcmp(optarg, "O_DIRECT") == 0) {
353 source_open_flag |= O_DIRECT;
354 dest_open_flag |= O_DIRECT;
355 } else if (strncmp(optarg, "CREAT", 5) == 0 ||
356 strncmp(optarg, "O_CREAT", 5) == 0) {
357 dest_open_flag |= O_CREAT;
358 }
359 break;
360 case 'd':
361 debug++;
362 break;
363 case 'D':
364 delay.tv_usec = atoi(optarg);
365 break;
366 case 'b': /* block size */
367 aio_blksize = strtol(optarg, &endp, 0);
368 aio_blksize =
369 (long)scale_by_kmg((long long)aio_blksize, *endp);
370 break;
371
372 case 'n': /* num io */
373 aio_maxio = strtol(optarg, &endp, 0);
374 break;
375 case 's': /* size to transfer */
376 length = strtoll(optarg, &endp, 0);
377 length = scale_by_kmg(length, *endp);
378 break;
379 case 'w': /* no write */
380 no_write = 1;
381 break;
382 case 'z': /* write zero's */
383 zero = 1;
384 break;
385
386 default:
387 usage();
388 }
389 }
390
391 argc -= optind;
392 argv += optind;
393
394 if (argc < 1) {
395 usage();
396 }
397 if (!zero) {
398 if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) {
399 perror(srcname);
400 exit(1);
401 }
402 argv++;
403 argc--;
404 if (fstat(srcfd, &st) < 0) {
405 perror("fstat");
406 exit(1);
407 }
408 if (length == 0)
409 length = st.st_size;
410 }
411
412 if (!no_write) {
413 /*
414 * We are either copying or writing zeros to dstname
415 */
416 if (argc < 1) {
417 usage();
418 }
419 if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) {
420 perror(dstname);
421 exit(1);
422 }
423 if (zero) {
424 /*
425 * get size of dest, if we are zeroing it.
426 * TODO: handle devices.
427 */
428 if (fstat(dstfd, &st) < 0) {
429 perror("fstat");
430 exit(1);
431 }
432 if (length == 0)
433 length = st.st_size;
434 }
435 }
436 /*
437 * O_DIRECT cannot handle non-sector sizes
438 */
439 if (dest_open_flag & O_DIRECT) {
440 int src_alignment = dev_block_size_by_path(srcname);
441 int dst_alignment = dev_block_size_by_path(dstname);
442
443 /*
444 * Given we expect the block sizes to be multiple of 2 the
445 * larger is always divideable by the smaller, so we only need
446 * to care about maximum.
447 */
448 if (src_alignment > dst_alignment)
449 dst_alignment = src_alignment;
450
451 if (alignment < dst_alignment) {
452 alignment = dst_alignment;
453 printf("Forcing aligment to %i\n", alignment);
454 }
455
456 if (aio_blksize % alignment) {
457 printf("Block size is not multiple of drive block size\n");
458 printf("Skipping the test!\n");
459 exit(0);
460 }
461
462 leftover = length % alignment;
463 if (leftover) {
464 int flag;
465
466 length -= leftover;
467 if (!zero) {
468 flag = source_open_flag & ~O_DIRECT;
469 srcfd2 = open(srcname, flag);
470 if (srcfd2 < 0) {
471 perror(srcname);
472 exit(1);
473 }
474 }
475 if (!no_write) {
476 flag = (O_SYNC | dest_open_flag) &
477 ~(O_DIRECT | O_CREAT);
478 dstfd2 = open(dstname, flag);
479 if (dstfd2 < 0) {
480 perror(dstname);
481 exit(1);
482 }
483 }
484 }
485 }
486
487 /* initialize state machine */
488 memset(&myctx, 0, sizeof(myctx));
489 io_queue_init(aio_maxio, &myctx);
490 tocopy = howmany(length, aio_blksize);
491
492 if (init_iocb(aio_maxio, aio_blksize) < 0) {
493 fprintf(stderr, "Error allocating the i/o buffers\n");
494 exit(1);
495 }
496
497 while (tocopy > 0) {
498 int i, rc;
499 /* Submit as many reads as once as possible upto aio_maxio */
500 int n = MIN(MIN(aio_maxio - busy, aio_maxio),
501 howmany(length - offset, aio_blksize));
502 if (n > 0) {
503 struct iocb *ioq[n];
504
505 for (i = 0; i < n; i++) {
506 struct iocb *io = alloc_iocb();
507 int iosize = MIN(length - offset, aio_blksize);
508
509 if (zero) {
510 /*
511 * We are writing zero's to dstfd
512 */
513 io_prep_pwrite(io, dstfd, io->u.c.buf,
514 iosize, offset);
515 io_set_callback(io, wr_done);
516 } else {
517 io_prep_pread(io, srcfd, io->u.c.buf,
518 iosize, offset);
519 io_set_callback(io, rd_done);
520 }
521 ioq[i] = io;
522 offset += iosize;
523 }
524
525 rc = io_submit(myctx, n, ioq);
526 if (rc < 0)
527 io_error("io_submit", rc);
528
529 busy += n;
530 if (debug > 1)
531 printf("io_submit(%d) busy:%d\n", n, busy);
532 if (delay.tv_usec) {
533 struct timeval t = delay;
534 (void)select(0, 0, 0, 0, &t);
535 }
536 }
537
538 /*
539 * We have submitted all the i/o requests. Wait for at least one to complete
540 * and call the callbacks.
541 */
542 count_io_q_waits++;
543 rc = io_wait_run(myctx, 0);
544 if (rc < 0)
545 io_error("io_wait_run", rc);
546
547 if (debug > 1) {
548 printf("io_wait_run: rc == %d\n", rc);
549 printf("busy:%d aio_maxio:%d tocopy:%d\n",
550 busy, aio_maxio, tocopy);
551 }
552 }
553
554 if (leftover) {
555 /* non-sector size end of file */
556 struct iocb *io = alloc_iocb();
557 int rc;
558 if (zero) {
559 /*
560 * We are writing zero's to dstfd2
561 */
562 io_prep_pwrite(io, dstfd2, io->u.c.buf,
563 leftover, offset);
564 io_set_callback(io, wr_done);
565 } else {
566 io_prep_pread(io, srcfd2, io->u.c.buf,
567 leftover, offset);
568 io_set_callback(io, rd_done);
569 }
570 rc = io_submit(myctx, 1, &io);
571 if (rc < 0)
572 io_error("io_submit", rc);
573 count_io_q_waits++;
574 rc = io_wait_run(myctx, 0);
575 if (rc < 0)
576 io_error("io_wait_run", rc);
577 }
578
579 if (srcfd != -1)
580 close(srcfd);
581 if (dstfd != -1)
582 close(dstfd);
583 exit(0);
584 }
585
586 /*
587 * Results look like:
588 * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc
589 * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
590 * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
591 * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
592 * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
593 * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
594 * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
595 * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
596 * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
597 * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
598 */
599
600 #else
601
main(void)602 int main(void)
603 {
604 fprintf(stderr, "System doesn't have libaio support.\n");
605 return 1;
606 }
607
608 #endif
609