1 /*
2  * version of copy command using async i/o
3  * From:	Stephen Hemminger <shemminger@osdl.org>
4  * Modified by Daniel McNeil <daniel@osdl.org> for testing aio.
5  *	- added -a alignment
6  *	- added -b blksize option
7  *	_ added -s size	option
8  *	- added -f open_flag option
9  *	- added -w (no write) option (reads from source only)
10  *	- added -n (num aio) option
11  *	- added -z (zero dest) opton (writes zeros to dest only)
12  *	- added -D delay_ms option
13  *
14  * Copy file by using a async I/O state machine.
15  * 1. Start read request
16  * 2. When read completes turn it into a write request
17  * 3. When write completes decrement counter and free resources
18  *
19  *
20  * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize]
21  *		[-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest
22  */
23 
24 #define _GNU_SOURCE
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/param.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <mntent.h>
35 #include <sys/select.h>
36 #include <sys/mount.h>
37 #include "config.h"
38 
39 #if HAVE_LIBAIO_H
40 
41 #include <libaio.h>
42 
43 #define AIO_BLKSIZE	(64*1024)
44 #define AIO_MAXIO	32
45 
46 static int aio_blksize = AIO_BLKSIZE;
47 static int aio_maxio = AIO_MAXIO;
48 
49 static int busy = 0;		// # of I/O's in flight
50 static int tocopy = 0;		// # of blocks left to copy
51 static int srcfd;		// source fd
52 static int srcfd2;		// source fd - end of file non-sector
53 static int dstfd = -1;		// destination file descriptor
54 static int dstfd2 = -1;		// Handle end of file for non-sector size
55 static const char *dstname = NULL;
56 static const char *srcname = NULL;
57 static int source_open_flag = O_RDONLY;	/* open flags on source file */
58 static int dest_open_flag = O_WRONLY;	/* open flags on dest file */
59 static int no_write;		/* do not write */
60 static int zero;		/* write zero's only */
61 
62 static int debug;
63 static int count_io_q_waits;	/* how many time io_queue_wait called */
64 
65 struct iocb **iocb_free;	/* array of pointers to iocb */
66 int iocb_free_count;		/* current free count */
67 int alignment = 512;		/* buffer alignment */
68 
69 struct timeval delay;		/* delay between i/o */
70 
dev_block_size_by_path(const char * path)71 static int dev_block_size_by_path(const char *path)
72 {
73 	FILE *f;
74 	struct mntent *mnt;
75 	size_t prefix_len, prefix_max = 0;
76 	char dev_name[1024];
77 	int fd, size;
78 
79 	if (!path)
80 		return 0;
81 
82 	f = setmntent("/proc/mounts", "r");
83 	if (!f) {
84 		fprintf(stderr, "Failed to open /proc/mounts\n");
85 		return 0;
86 	}
87 
88 	while ((mnt = getmntent(f))) {
89 		/* Skip pseudo fs */
90 		if (mnt->mnt_fsname[0] != '/')
91 			continue;
92 
93 		prefix_len = strlen(mnt->mnt_dir);
94 
95 		if (prefix_len > prefix_max &&
96 		    !strncmp(path, mnt->mnt_dir, prefix_len)) {
97 			prefix_max = prefix_len;
98 			strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name));
99 			dev_name[sizeof(dev_name)-1] = '\0';
100 		}
101 	}
102 
103 	endmntent(f);
104 
105 	if (!prefix_max) {
106 		fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path);
107 		return 0;
108 	}
109 
110 	printf("Path '%s' is on device '%s'\n", path, dev_name);
111 
112 	fd = open(dev_name, O_RDONLY);
113 	if (!fd) {
114 		fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno));
115 		return 0;
116 	}
117 
118 	if (ioctl(fd, BLKSSZGET, &size)) {
119 		fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno));
120 		close(fd);
121 		return 0;
122 	}
123 
124 	close(fd);
125 	printf("'%s' has block size %i\n", dev_name, size);
126 
127 	return size;
128 }
129 
init_iocb(int n,int iosize)130 int init_iocb(int n, int iosize)
131 {
132 	void *buf;
133 	int i;
134 
135 	if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) {
136 		return -1;
137 	}
138 
139 	for (i = 0; i < n; i++) {
140 		if (!
141 		    (iocb_free[i] = malloc(sizeof(struct iocb))))
142 			return -1;
143 		if (posix_memalign(&buf, alignment, iosize))
144 			return -1;
145 		if (debug > 1) {
146 			printf("buf allocated at 0x%p, align:%d\n",
147 			       buf, alignment);
148 		}
149 		if (zero) {
150 			/*
151 			 * We are writing zero's to dstfd
152 			 */
153 			memset(buf, 0, iosize);
154 		}
155 		io_prep_pread(iocb_free[i], -1, buf, iosize, 0);
156 	}
157 	iocb_free_count = i;
158 	return 0;
159 }
160 
alloc_iocb(void)161 static struct iocb *alloc_iocb(void)
162 {
163 	if (!iocb_free_count)
164 		return 0;
165 	return iocb_free[--iocb_free_count];
166 }
167 
free_iocb(struct iocb * io)168 void free_iocb(struct iocb *io)
169 {
170 	iocb_free[iocb_free_count++] = io;
171 }
172 
173 /*
174  * io_wait_run() - wait for an io_event and then call the callback.
175  */
io_wait_run(io_context_t ctx,struct timespec * to)176 int io_wait_run(io_context_t ctx, struct timespec *to)
177 {
178 	struct io_event events[aio_maxio];
179 	struct io_event *ep;
180 	int ret, n;
181 
182 	/*
183 	 * get up to aio_maxio events at a time.
184 	 */
185 	ret = n = io_getevents(ctx, 1, aio_maxio, events, to);
186 
187 	/*
188 	 * Call the callback functions for each event.
189 	 */
190 	for (ep = events; n-- > 0; ep++) {
191 		io_callback_t cb = (io_callback_t) ep->data;
192 		struct iocb *iocb = ep->obj;
193 
194 		if (debug > 1) {
195 			fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n",
196 				ep, iocb, ep->res, ep->res2);
197 		}
198 		cb(ctx, iocb, ep->res, ep->res2);
199 	}
200 	return ret;
201 }
202 
203 /* Fatal error handler */
io_error(const char * func,int rc)204 static void io_error(const char *func, int rc)
205 {
206 	if (rc == -ENOSYS)
207 		fprintf(stderr, "AIO not in this kernel\n");
208 	else if (rc < 0)
209 		fprintf(stderr, "%s: %s\n", func, strerror(-rc));
210 	else
211 		fprintf(stderr, "%s: error %d\n", func, rc);
212 
213 	if (dstfd > 0)
214 		close(dstfd);
215 	if (dstname && dest_open_flag & O_CREAT)
216 		unlink(dstname);
217 	exit(1);
218 }
219 
220 /*
221  * Write complete callback.
222  * Adjust counts and free resources
223  */
wr_done(io_context_t ctx,struct iocb * iocb,long res,long res2)224 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
225 {
226 	if (res2 != 0) {
227 		io_error("aio write", res2);
228 	}
229 	if (res != iocb->u.c.nbytes) {
230 		fprintf(stderr, "write missed bytes expect %lu got %ld\n",
231 			iocb->u.c.nbytes, res);
232 		exit(1);
233 	}
234 	--tocopy;
235 	--busy;
236 	free_iocb(iocb);
237 	if (debug)
238 		write(2, "w", 1);
239 }
240 
241 /*
242  * Read complete callback.
243  * Change read iocb into a write iocb and start it.
244  */
rd_done(io_context_t ctx,struct iocb * iocb,long res,long res2)245 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
246 {
247 	/* library needs accessors to look at iocb? */
248 	int iosize = iocb->u.c.nbytes;
249 	char *buf = iocb->u.c.buf;
250 	off_t offset = iocb->u.c.offset;
251 
252 	if (res2 != 0)
253 		io_error("aio read", res2);
254 	if (res != iosize) {
255 		fprintf(stderr, "read missing bytes expect %lu got %ld\n",
256 			iocb->u.c.nbytes, res);
257 		exit(1);
258 	}
259 
260 	/* turn read into write */
261 	if (no_write) {
262 		--tocopy;
263 		--busy;
264 		free_iocb(iocb);
265 	} else {
266 		int fd;
267 		if (iocb->aio_fildes == srcfd)
268 			fd = dstfd;
269 		else
270 			fd = dstfd2;
271 		io_prep_pwrite(iocb, fd, buf, iosize, offset);
272 		io_set_callback(iocb, wr_done);
273 		if (1 != (res = io_submit(ctx, 1, &iocb)))
274 			io_error("io_submit write", res);
275 	}
276 	if (debug)
277 		write(2, "r", 1);
278 	if (debug > 1)
279 		printf("%d", iosize);
280 }
281 
usage(void)282 static void usage(void)
283 {
284 	fprintf(stderr,
285 		"Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]"
286 		" [-f open_flag] SOURCE DEST\n"
287 		"This copies from SOURCE to DEST using AIO.\n\n"
288 		"Usage: aiocp [options] -w SOURCE\n"
289 		"This does sequential AIO reads (no writes).\n\n"
290 		"Usage: aiocp [options] -z DEST\n"
291 		"This does sequential AIO writes of zeros.\n");
292 
293 	exit(1);
294 }
295 
296 /*
297  * Scale value by kilo, mega, or giga.
298  */
scale_by_kmg(long long value,char scale)299 long long scale_by_kmg(long long value, char scale)
300 {
301 	switch (scale) {
302 	case 'g':
303 	case 'G':
304 		value *= 1024;
305 	case 'm':
306 	case 'M':
307 		value *= 1024;
308 	case 'k':
309 	case 'K':
310 		value *= 1024;
311 		break;
312 	case '\0':
313 		break;
314 	default:
315 		usage();
316 		break;
317 	}
318 	return value;
319 }
320 
main(int argc,char * const * argv)321 int main(int argc, char *const *argv)
322 {
323 	struct stat st;
324 	off_t length = 0, offset = 0;
325 	off_t leftover = 0;
326 	io_context_t myctx;
327 	int c;
328 	extern char *optarg;
329 	extern int optind, opterr, optopt;
330 
331 	while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) {
332 		char *endp;
333 
334 		switch (c) {
335 		case 'a':	/* alignment of data buffer */
336 			alignment = strtol(optarg, &endp, 0);
337 			alignment = (long)scale_by_kmg((long long)alignment,
338 						       *endp);
339 			break;
340 		case 'f':	/* use these open flags */
341 			if (strcmp(optarg, "LARGEFILE") == 0 ||
342 			    strcmp(optarg, "O_LARGEFILE") == 0) {
343 				source_open_flag |= O_LARGEFILE;
344 				dest_open_flag |= O_LARGEFILE;
345 			} else if (strcmp(optarg, "TRUNC") == 0 ||
346 				   strcmp(optarg, "O_TRUNC") == 0) {
347 				dest_open_flag |= O_TRUNC;
348 			} else if (strcmp(optarg, "SYNC") == 0 ||
349 				   strcmp(optarg, "O_SYNC") == 0) {
350 				dest_open_flag |= O_SYNC;
351 			} else if (strcmp(optarg, "DIRECT") == 0 ||
352 				   strcmp(optarg, "O_DIRECT") == 0) {
353 				source_open_flag |= O_DIRECT;
354 				dest_open_flag |= O_DIRECT;
355 			} else if (strncmp(optarg, "CREAT", 5) == 0 ||
356 				   strncmp(optarg, "O_CREAT", 5) == 0) {
357 				dest_open_flag |= O_CREAT;
358 			}
359 			break;
360 		case 'd':
361 			debug++;
362 			break;
363 		case 'D':
364 			delay.tv_usec = atoi(optarg);
365 			break;
366 		case 'b':	/* block size */
367 			aio_blksize = strtol(optarg, &endp, 0);
368 			aio_blksize =
369 			    (long)scale_by_kmg((long long)aio_blksize, *endp);
370 			break;
371 
372 		case 'n':	/* num io */
373 			aio_maxio = strtol(optarg, &endp, 0);
374 			break;
375 		case 's':	/* size to transfer */
376 			length = strtoll(optarg, &endp, 0);
377 			length = scale_by_kmg(length, *endp);
378 			break;
379 		case 'w':	/* no write */
380 			no_write = 1;
381 			break;
382 		case 'z':	/* write zero's */
383 			zero = 1;
384 			break;
385 
386 		default:
387 			usage();
388 		}
389 	}
390 
391 	argc -= optind;
392 	argv += optind;
393 
394 	if (argc < 1) {
395 		usage();
396 	}
397 	if (!zero) {
398 		if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) {
399 			perror(srcname);
400 			exit(1);
401 		}
402 		argv++;
403 		argc--;
404 		if (fstat(srcfd, &st) < 0) {
405 			perror("fstat");
406 			exit(1);
407 		}
408 		if (length == 0)
409 			length = st.st_size;
410 	}
411 
412 	if (!no_write) {
413 		/*
414 		 * We are either copying or writing zeros to dstname
415 		 */
416 		if (argc < 1) {
417 			usage();
418 		}
419 		if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) {
420 			perror(dstname);
421 			exit(1);
422 		}
423 		if (zero) {
424 			/*
425 			 * get size of dest, if we are zeroing it.
426 			 * TODO: handle devices.
427 			 */
428 			if (fstat(dstfd, &st) < 0) {
429 				perror("fstat");
430 				exit(1);
431 			}
432 			if (length == 0)
433 				length = st.st_size;
434 		}
435 	}
436 	/*
437 	 * O_DIRECT cannot handle non-sector sizes
438 	 */
439 	if (dest_open_flag & O_DIRECT) {
440 		int src_alignment = dev_block_size_by_path(srcname);
441 		int dst_alignment = dev_block_size_by_path(dstname);
442 
443 		/*
444 		 * Given we expect the block sizes to be multiple of 2 the
445 		 * larger is always divideable by the smaller, so we only need
446 		 * to care about maximum.
447 		 */
448 		if (src_alignment > dst_alignment)
449 			dst_alignment = src_alignment;
450 
451 		if (alignment < dst_alignment) {
452 			alignment = dst_alignment;
453 			printf("Forcing aligment to %i\n", alignment);
454 		}
455 
456 		if (aio_blksize % alignment) {
457 			printf("Block size is not multiple of drive block size\n");
458 			printf("Skipping the test!\n");
459 			exit(0);
460 		}
461 
462 		leftover = length % alignment;
463 		if (leftover) {
464 			int flag;
465 
466 			length -= leftover;
467 			if (!zero) {
468 				flag = source_open_flag & ~O_DIRECT;
469 				srcfd2 = open(srcname, flag);
470 				if (srcfd2 < 0) {
471 					perror(srcname);
472 					exit(1);
473 				}
474 			}
475 			if (!no_write) {
476 				flag = (O_SYNC | dest_open_flag) &
477 				    ~(O_DIRECT | O_CREAT);
478 				dstfd2 = open(dstname, flag);
479 				if (dstfd2 < 0) {
480 					perror(dstname);
481 					exit(1);
482 				}
483 			}
484 		}
485 	}
486 
487 	/* initialize state machine */
488 	memset(&myctx, 0, sizeof(myctx));
489 	io_queue_init(aio_maxio, &myctx);
490 	tocopy = howmany(length, aio_blksize);
491 
492 	if (init_iocb(aio_maxio, aio_blksize) < 0) {
493 		fprintf(stderr, "Error allocating the i/o buffers\n");
494 		exit(1);
495 	}
496 
497 	while (tocopy > 0) {
498 		int i, rc;
499 		/* Submit as many reads as once as possible upto aio_maxio */
500 		int n = MIN(MIN(aio_maxio - busy, aio_maxio),
501 			    howmany(length - offset, aio_blksize));
502 		if (n > 0) {
503 			struct iocb *ioq[n];
504 
505 			for (i = 0; i < n; i++) {
506 				struct iocb *io = alloc_iocb();
507 				int iosize = MIN(length - offset, aio_blksize);
508 
509 				if (zero) {
510 					/*
511 					 * We are writing zero's to dstfd
512 					 */
513 					io_prep_pwrite(io, dstfd, io->u.c.buf,
514 						       iosize, offset);
515 					io_set_callback(io, wr_done);
516 				} else {
517 					io_prep_pread(io, srcfd, io->u.c.buf,
518 						      iosize, offset);
519 					io_set_callback(io, rd_done);
520 				}
521 				ioq[i] = io;
522 				offset += iosize;
523 			}
524 
525 			rc = io_submit(myctx, n, ioq);
526 			if (rc < 0)
527 				io_error("io_submit", rc);
528 
529 			busy += n;
530 			if (debug > 1)
531 				printf("io_submit(%d) busy:%d\n", n, busy);
532 			if (delay.tv_usec) {
533 				struct timeval t = delay;
534 				(void)select(0, 0, 0, 0, &t);
535 			}
536 		}
537 
538 		/*
539 		 * We have submitted all the i/o requests. Wait for at least one to complete
540 		 * and call the callbacks.
541 		 */
542 		count_io_q_waits++;
543 		rc = io_wait_run(myctx, 0);
544 		if (rc < 0)
545 			io_error("io_wait_run", rc);
546 
547 		if (debug > 1) {
548 			printf("io_wait_run: rc == %d\n", rc);
549 			printf("busy:%d aio_maxio:%d tocopy:%d\n",
550 			       busy, aio_maxio, tocopy);
551 		}
552 	}
553 
554 	if (leftover) {
555 		/* non-sector size end of file */
556 		struct iocb *io = alloc_iocb();
557 		int rc;
558 		if (zero) {
559 			/*
560 			 * We are writing zero's to dstfd2
561 			 */
562 			io_prep_pwrite(io, dstfd2, io->u.c.buf,
563 				       leftover, offset);
564 			io_set_callback(io, wr_done);
565 		} else {
566 			io_prep_pread(io, srcfd2, io->u.c.buf,
567 				      leftover, offset);
568 			io_set_callback(io, rd_done);
569 		}
570 		rc = io_submit(myctx, 1, &io);
571 		if (rc < 0)
572 			io_error("io_submit", rc);
573 		count_io_q_waits++;
574 		rc = io_wait_run(myctx, 0);
575 		if (rc < 0)
576 			io_error("io_wait_run", rc);
577 	}
578 
579 	if (srcfd != -1)
580 		close(srcfd);
581 	if (dstfd != -1)
582 		close(dstfd);
583 	exit(0);
584 }
585 
586 /*
587  * Results look like:
588  * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc
589  * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
590  * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
591  * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
592  * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
593  * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
594  * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
595  * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
596  * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
597  * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
598  */
599 
600 #else
601 
main(void)602 int main(void)
603 {
604 	fprintf(stderr, "System doesn't have libaio support.\n");
605 	return 1;
606 }
607 
608 #endif
609