1 /*
2  * version of copy command using async i/o
3  * From:	Stephen Hemminger <shemminger@osdl.org>
4  * Modified by Daniel McNeil <daniel@osdl.org> for testing aio.
5  *	- added -a alignment
6  *	- added -b blksize option
7  *	_ added -s size	option
8  *	- added -f open_flag option
9  *	- added -w (no write) option (reads from source only)
10  *	- added -n (num aio) option
11  *	- added -z (zero dest) opton (writes zeros to dest only)
12  *	- added -D delay_ms option
13  *
14  * Copy file by using a async I/O state machine.
15  * 1. Start read request
16  * 2. When read completes turn it into a write request
17  * 3. When write completes decrement counter and free resources
18  *
19  *
20  * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize]
21  *		[-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest
22  */
23 
24 #define _GNU_SOURCE
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/param.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <mntent.h>
35 #include <sys/select.h>
36 #include <sys/mount.h>
37 
38 #include "config.h"
39 #include "tst_res_flags.h"
40 
41 #ifdef HAVE_LIBAIO
42 #include <libaio.h>
43 
44 #define AIO_BLKSIZE	(64*1024)
45 #define AIO_MAXIO	32
46 
47 static int aio_blksize = AIO_BLKSIZE;
48 static int aio_maxio = AIO_MAXIO;
49 
50 static int busy = 0;		// # of I/O's in flight
51 static int tocopy = 0;		// # of blocks left to copy
52 static int srcfd;		// source fd
53 static int srcfd2;		// source fd - end of file non-sector
54 static int dstfd = -1;		// destination file descriptor
55 static int dstfd2 = -1;		// Handle end of file for non-sector size
56 static const char *dstname = NULL;
57 static const char *srcname = NULL;
58 static int source_open_flag = O_RDONLY;	/* open flags on source file */
59 static int dest_open_flag = O_WRONLY;	/* open flags on dest file */
60 static int no_write;		/* do not write */
61 static int zero;		/* write zero's only */
62 
63 static int debug;
64 static int count_io_q_waits;	/* how many time io_queue_wait called */
65 
66 struct iocb **iocb_free;	/* array of pointers to iocb */
67 int iocb_free_count;		/* current free count */
68 int alignment = 512;		/* buffer alignment */
69 
70 struct timeval delay;		/* delay between i/o */
71 
dev_block_size_by_path(const char * path)72 static int dev_block_size_by_path(const char *path)
73 {
74 	FILE *f;
75 	struct mntent *mnt;
76 	size_t prefix_len, prefix_max = 0;
77 	char dev_name[1024];
78 	int fd, size;
79 
80 	if (!path)
81 		return 0;
82 
83 	f = setmntent("/proc/mounts", "r");
84 	if (!f) {
85 		fprintf(stderr, "Failed to open /proc/mounts\n");
86 		return 0;
87 	}
88 
89 	while ((mnt = getmntent(f))) {
90 		/* Skip pseudo fs */
91 		if (mnt->mnt_fsname[0] != '/')
92 			continue;
93 
94 		prefix_len = strlen(mnt->mnt_dir);
95 
96 		if (prefix_len > prefix_max &&
97 		    !strncmp(path, mnt->mnt_dir, prefix_len)) {
98 			prefix_max = prefix_len;
99 			strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name));
100 			dev_name[sizeof(dev_name)-1] = '\0';
101 		}
102 	}
103 
104 	endmntent(f);
105 
106 	if (!prefix_max) {
107 		fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path);
108 		return 0;
109 	}
110 
111 	printf("Path '%s' is on device '%s'\n", path, dev_name);
112 
113 	fd = open(dev_name, O_RDONLY);
114 	if (!fd) {
115 		fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno));
116 		return 0;
117 	}
118 
119 	if (ioctl(fd, BLKSSZGET, &size)) {
120 		fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno));
121 		close(fd);
122 		return 0;
123 	}
124 
125 	close(fd);
126 	printf("'%s' has block size %i\n", dev_name, size);
127 
128 	return size;
129 }
130 
init_iocb(int n,int iosize)131 int init_iocb(int n, int iosize)
132 {
133 	void *buf;
134 	int i;
135 
136 	if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) {
137 		return -1;
138 	}
139 
140 	for (i = 0; i < n; i++) {
141 		if (!
142 		    (iocb_free[i] = malloc(sizeof(struct iocb))))
143 			return -1;
144 		if (posix_memalign(&buf, alignment, iosize))
145 			return -1;
146 		if (debug > 1) {
147 			printf("buf allocated at 0x%p, align:%d\n",
148 			       buf, alignment);
149 		}
150 		if (zero) {
151 			/*
152 			 * We are writing zero's to dstfd
153 			 */
154 			memset(buf, 0, iosize);
155 		}
156 		io_prep_pread(iocb_free[i], -1, buf, iosize, 0);
157 	}
158 	iocb_free_count = i;
159 	return 0;
160 }
161 
alloc_iocb(void)162 static struct iocb *alloc_iocb(void)
163 {
164 	if (!iocb_free_count)
165 		return 0;
166 	return iocb_free[--iocb_free_count];
167 }
168 
free_iocb(struct iocb * io)169 void free_iocb(struct iocb *io)
170 {
171 	iocb_free[iocb_free_count++] = io;
172 }
173 
174 /*
175  * io_wait_run() - wait for an io_event and then call the callback.
176  */
io_wait_run(io_context_t ctx,struct timespec * to)177 int io_wait_run(io_context_t ctx, struct timespec *to)
178 {
179 	struct io_event events[aio_maxio];
180 	struct io_event *ep;
181 	int ret, n;
182 
183 	/*
184 	 * get up to aio_maxio events at a time.
185 	 */
186 	ret = n = io_getevents(ctx, 1, aio_maxio, events, to);
187 
188 	/*
189 	 * Call the callback functions for each event.
190 	 */
191 	for (ep = events; n-- > 0; ep++) {
192 		io_callback_t cb = (io_callback_t) ep->data;
193 		struct iocb *iocb = ep->obj;
194 
195 		if (debug > 1) {
196 			fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n",
197 				ep, iocb, ep->res, ep->res2);
198 		}
199 		cb(ctx, iocb, ep->res, ep->res2);
200 	}
201 	return ret;
202 }
203 
204 /* Fatal error handler */
io_error(const char * func,int rc)205 static void io_error(const char *func, int rc)
206 {
207 	if (rc == -ENOSYS)
208 		fprintf(stderr, "AIO not in this kernel\n");
209 	else if (rc < 0)
210 		fprintf(stderr, "%s: %s\n", func, strerror(-rc));
211 	else
212 		fprintf(stderr, "%s: error %d\n", func, rc);
213 
214 	if (dstfd > 0)
215 		close(dstfd);
216 	if (dstname && dest_open_flag & O_CREAT)
217 		unlink(dstname);
218 	exit(1);
219 }
220 
221 /*
222  * Write complete callback.
223  * Adjust counts and free resources
224  */
wr_done(io_context_t ctx,struct iocb * iocb,long res,long res2)225 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
226 {
227 	if (res2 != 0) {
228 		io_error("aio write", res2);
229 	}
230 	if (res != iocb->u.c.nbytes) {
231 		fprintf(stderr, "write missed bytes expect %lu got %ld\n",
232 			iocb->u.c.nbytes, res);
233 		exit(1);
234 	}
235 	--tocopy;
236 	--busy;
237 	free_iocb(iocb);
238 	if (debug)
239 		write(2, "w", 1);
240 }
241 
242 /*
243  * Read complete callback.
244  * Change read iocb into a write iocb and start it.
245  */
rd_done(io_context_t ctx,struct iocb * iocb,long res,long res2)246 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
247 {
248 	/* library needs accessors to look at iocb? */
249 	int iosize = iocb->u.c.nbytes;
250 	char *buf = iocb->u.c.buf;
251 	off_t offset = iocb->u.c.offset;
252 
253 	if (res2 != 0)
254 		io_error("aio read", res2);
255 	if (res != iosize) {
256 		fprintf(stderr, "read missing bytes expect %lu got %ld\n",
257 			iocb->u.c.nbytes, res);
258 		exit(1);
259 	}
260 
261 	/* turn read into write */
262 	if (no_write) {
263 		--tocopy;
264 		--busy;
265 		free_iocb(iocb);
266 	} else {
267 		int fd;
268 		if (iocb->aio_fildes == srcfd)
269 			fd = dstfd;
270 		else
271 			fd = dstfd2;
272 		io_prep_pwrite(iocb, fd, buf, iosize, offset);
273 		io_set_callback(iocb, wr_done);
274 		if (1 != (res = io_submit(ctx, 1, &iocb)))
275 			io_error("io_submit write", res);
276 	}
277 	if (debug)
278 		write(2, "r", 1);
279 	if (debug > 1)
280 		printf("%d", iosize);
281 }
282 
usage(void)283 static void usage(void)
284 {
285 	fprintf(stderr,
286 		"Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]"
287 		" [-f open_flag] SOURCE DEST\n"
288 		"This copies from SOURCE to DEST using AIO.\n\n"
289 		"Usage: aiocp [options] -w SOURCE\n"
290 		"This does sequential AIO reads (no writes).\n\n"
291 		"Usage: aiocp [options] -z DEST\n"
292 		"This does sequential AIO writes of zeros.\n");
293 
294 	exit(1);
295 }
296 
297 /*
298  * Scale value by kilo, mega, or giga.
299  */
scale_by_kmg(long long value,char scale)300 long long scale_by_kmg(long long value, char scale)
301 {
302 	switch (scale) {
303 	case 'g':
304 	case 'G':
305 		value *= 1024;
306 	case 'm':
307 	case 'M':
308 		value *= 1024;
309 	case 'k':
310 	case 'K':
311 		value *= 1024;
312 		break;
313 	case '\0':
314 		break;
315 	default:
316 		usage();
317 		break;
318 	}
319 	return value;
320 }
321 
main(int argc,char * const * argv)322 int main(int argc, char *const *argv)
323 {
324 	struct stat st;
325 	off_t length = 0, offset = 0;
326 	off_t leftover = 0;
327 	io_context_t myctx;
328 	int c;
329 	extern char *optarg;
330 	extern int optind, opterr, optopt;
331 
332 	while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) {
333 		char *endp;
334 
335 		switch (c) {
336 		case 'a':	/* alignment of data buffer */
337 			alignment = strtol(optarg, &endp, 0);
338 			alignment = (long)scale_by_kmg((long long)alignment,
339 						       *endp);
340 			break;
341 		case 'f':	/* use these open flags */
342 			if (strcmp(optarg, "LARGEFILE") == 0 ||
343 			    strcmp(optarg, "O_LARGEFILE") == 0) {
344 				source_open_flag |= O_LARGEFILE;
345 				dest_open_flag |= O_LARGEFILE;
346 			} else if (strcmp(optarg, "TRUNC") == 0 ||
347 				   strcmp(optarg, "O_TRUNC") == 0) {
348 				dest_open_flag |= O_TRUNC;
349 			} else if (strcmp(optarg, "SYNC") == 0 ||
350 				   strcmp(optarg, "O_SYNC") == 0) {
351 				dest_open_flag |= O_SYNC;
352 			} else if (strcmp(optarg, "DIRECT") == 0 ||
353 				   strcmp(optarg, "O_DIRECT") == 0) {
354 				source_open_flag |= O_DIRECT;
355 				dest_open_flag |= O_DIRECT;
356 			} else if (strncmp(optarg, "CREAT", 5) == 0 ||
357 				   strncmp(optarg, "O_CREAT", 5) == 0) {
358 				dest_open_flag |= O_CREAT;
359 			}
360 			break;
361 		case 'd':
362 			debug++;
363 			break;
364 		case 'D':
365 			delay.tv_usec = atoi(optarg);
366 			break;
367 		case 'b':	/* block size */
368 			aio_blksize = strtol(optarg, &endp, 0);
369 			aio_blksize =
370 			    (long)scale_by_kmg((long long)aio_blksize, *endp);
371 			break;
372 
373 		case 'n':	/* num io */
374 			aio_maxio = strtol(optarg, &endp, 0);
375 			break;
376 		case 's':	/* size to transfer */
377 			length = strtoll(optarg, &endp, 0);
378 			length = scale_by_kmg(length, *endp);
379 			break;
380 		case 'w':	/* no write */
381 			no_write = 1;
382 			break;
383 		case 'z':	/* write zero's */
384 			zero = 1;
385 			break;
386 
387 		default:
388 			usage();
389 		}
390 	}
391 
392 	argc -= optind;
393 	argv += optind;
394 
395 	if (argc < 1) {
396 		usage();
397 	}
398 	if (!zero) {
399 		if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) {
400 			perror(srcname);
401 			exit(1);
402 		}
403 		argv++;
404 		argc--;
405 		if (fstat(srcfd, &st) < 0) {
406 			perror("fstat");
407 			exit(1);
408 		}
409 		if (length == 0)
410 			length = st.st_size;
411 	}
412 
413 	if (!no_write) {
414 		/*
415 		 * We are either copying or writing zeros to dstname
416 		 */
417 		if (argc < 1) {
418 			usage();
419 		}
420 		if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) {
421 			perror(dstname);
422 			exit(1);
423 		}
424 		if (zero) {
425 			/*
426 			 * get size of dest, if we are zeroing it.
427 			 * TODO: handle devices.
428 			 */
429 			if (fstat(dstfd, &st) < 0) {
430 				perror("fstat");
431 				exit(1);
432 			}
433 			if (length == 0)
434 				length = st.st_size;
435 		}
436 	}
437 	/*
438 	 * O_DIRECT cannot handle non-sector sizes
439 	 */
440 	if (dest_open_flag & O_DIRECT) {
441 		int src_alignment = dev_block_size_by_path(srcname);
442 		int dst_alignment = dev_block_size_by_path(dstname);
443 
444 		/*
445 		 * Given we expect the block sizes to be multiple of 2 the
446 		 * larger is always divideable by the smaller, so we only need
447 		 * to care about maximum.
448 		 */
449 		if (src_alignment > dst_alignment)
450 			dst_alignment = src_alignment;
451 
452 		if (alignment < dst_alignment) {
453 			alignment = dst_alignment;
454 			printf("Forcing aligment to %i\n", alignment);
455 		}
456 
457 		if (aio_blksize % alignment) {
458 			printf("Block size is not multiple of drive block size\n");
459 			printf("Skipping the test!\n");
460 			exit(0);
461 		}
462 
463 		leftover = length % alignment;
464 		if (leftover) {
465 			int flag;
466 
467 			length -= leftover;
468 			if (!zero) {
469 				flag = source_open_flag & ~O_DIRECT;
470 				srcfd2 = open(srcname, flag);
471 				if (srcfd2 < 0) {
472 					perror(srcname);
473 					exit(1);
474 				}
475 			}
476 			if (!no_write) {
477 				flag = (O_SYNC | dest_open_flag) &
478 				    ~(O_DIRECT | O_CREAT);
479 				dstfd2 = open(dstname, flag);
480 				if (dstfd2 < 0) {
481 					perror(dstname);
482 					exit(1);
483 				}
484 			}
485 		}
486 	}
487 
488 	/* initialize state machine */
489 	memset(&myctx, 0, sizeof(myctx));
490 	io_queue_init(aio_maxio, &myctx);
491 	tocopy = howmany(length, aio_blksize);
492 
493 	if (init_iocb(aio_maxio, aio_blksize) < 0) {
494 		fprintf(stderr, "Error allocating the i/o buffers\n");
495 		exit(1);
496 	}
497 
498 	while (tocopy > 0) {
499 		int i, rc;
500 		/* Submit as many reads as once as possible upto aio_maxio */
501 		int n = MIN(MIN(aio_maxio - busy, aio_maxio),
502 			    howmany(length - offset, aio_blksize));
503 		if (n > 0) {
504 			struct iocb *ioq[n];
505 
506 			for (i = 0; i < n; i++) {
507 				struct iocb *io = alloc_iocb();
508 				int iosize = MIN(length - offset, aio_blksize);
509 
510 				if (zero) {
511 					/*
512 					 * We are writing zero's to dstfd
513 					 */
514 					io_prep_pwrite(io, dstfd, io->u.c.buf,
515 						       iosize, offset);
516 					io_set_callback(io, wr_done);
517 				} else {
518 					io_prep_pread(io, srcfd, io->u.c.buf,
519 						      iosize, offset);
520 					io_set_callback(io, rd_done);
521 				}
522 				ioq[i] = io;
523 				offset += iosize;
524 			}
525 
526 			rc = io_submit(myctx, n, ioq);
527 			if (rc < 0)
528 				io_error("io_submit", rc);
529 
530 			busy += n;
531 			if (debug > 1)
532 				printf("io_submit(%d) busy:%d\n", n, busy);
533 			if (delay.tv_usec) {
534 				struct timeval t = delay;
535 				(void)select(0, 0, 0, 0, &t);
536 			}
537 		}
538 
539 		/*
540 		 * We have submitted all the i/o requests. Wait for at least one to complete
541 		 * and call the callbacks.
542 		 */
543 		count_io_q_waits++;
544 		rc = io_wait_run(myctx, 0);
545 		if (rc < 0)
546 			io_error("io_wait_run", rc);
547 
548 		if (debug > 1) {
549 			printf("io_wait_run: rc == %d\n", rc);
550 			printf("busy:%d aio_maxio:%d tocopy:%d\n",
551 			       busy, aio_maxio, tocopy);
552 		}
553 	}
554 
555 	if (leftover) {
556 		/* non-sector size end of file */
557 		struct iocb *io = alloc_iocb();
558 		int rc;
559 		if (zero) {
560 			/*
561 			 * We are writing zero's to dstfd2
562 			 */
563 			io_prep_pwrite(io, dstfd2, io->u.c.buf,
564 				       leftover, offset);
565 			io_set_callback(io, wr_done);
566 		} else {
567 			io_prep_pread(io, srcfd2, io->u.c.buf,
568 				      leftover, offset);
569 			io_set_callback(io, rd_done);
570 		}
571 		rc = io_submit(myctx, 1, &io);
572 		if (rc < 0)
573 			io_error("io_submit", rc);
574 		count_io_q_waits++;
575 		rc = io_wait_run(myctx, 0);
576 		if (rc < 0)
577 			io_error("io_wait_run", rc);
578 	}
579 
580 	if (srcfd != -1)
581 		close(srcfd);
582 	if (dstfd != -1)
583 		close(dstfd);
584 	exit(0);
585 }
586 
587 /*
588  * Results look like:
589  * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc
590  * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
591  * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
592  * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
593  * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
594  * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
595  * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
596  * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
597  * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
598  * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
599  */
600 
601 #else
main(void)602 int main(void)
603 {
604 	fprintf(stderr, "test requires libaio and it's development packages\n");
605 	return TCONF;
606 }
607 #endif
608