1 /*
2  * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
31  */
32 /*
33  * doio -	a general purpose io initiator with system call and
34  *		write logging.  See doio.h for the structure which defines
35  *		what doio requests should look like.
36  *
37  *		Currently doio can handle read,write,reada,writea,ssread,
38  *		sswrite, and many varieties of listio requests.
39  *		For disk io, if the O_SSD flag is set doio will allocate
40  *		the appropriate amount of ssd and do the transfer - thus, doio
41  *		can handle all of the primitive types of file io.
42  *
43  * programming
44  * notes:
45  * -----------
46  *	messages should generally be printed using doio_fprintf().
47  *
48  */
49 
50 #include <stdio.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <stdlib.h>
54 #include <signal.h>
55 #include <string.h>
56 #include <ctype.h>
57 #include <unistd.h>
58 #include <time.h>
59 #include <stdarg.h>
60 #include <sys/stat.h>
61 #include <sys/param.h>
62 #include <sys/types.h>
63 #include <sys/sysmacros.h>
64 #ifdef CRAY
65 #include <sys/iosw.h>
66 #endif
67 #ifdef sgi
68 #include <aio.h>		/* for aio_read,write */
69 #include <inttypes.h>		/* for uint64_t type */
70 #include <siginfo.h>		/* signal handlers & SA_SIGINFO */
71 #endif
72 #ifndef CRAY
73 #include <sys/uio.h>		/* for struct iovec (readv) */
74 #include <sys/mman.h>		/* for mmap(2) */
75 #include <sys/ipc.h>		/* for i/o buffer in shared memory */
76 #include <sys/shm.h>		/* for i/o buffer in shared memory */
77 #endif
78 #include <sys/wait.h>
79 #ifdef CRAY
80 #include <sys/listio.h>
81 #include <sys/panic.h>
82 #endif
83 #include <sys/time.h>		/* for delays */
84 
85 #include "doio.h"
86 #include "write_log.h"
87 #include "random_range.h"
88 #include "string_to_tokens.h"
89 #include "pattern.h"
90 
91 #define	NMEMALLOC	32
92 #define	MEM_DATA	1	/* data space                           */
93 #define	MEM_SHMEM	2	/* System V shared memory               */
94 #define	MEM_T3ESHMEM	3	/* T3E Shared Memory                    */
95 #define	MEM_MMAP	4	/* mmap(2)                              */
96 
97 #define	MEMF_PRIVATE	0001
98 #define	MEMF_AUTORESRV	0002
99 #define	MEMF_LOCAL	0004
100 #define	MEMF_SHARED	0010
101 
102 #define	MEMF_FIXADDR	0100
103 #define	MEMF_ADDR	0200
104 #define	MEMF_AUTOGROW	0400
105 #define	MEMF_FILE	01000	/* regular file -- unlink on close      */
106 #define	MEMF_MPIN	010000	/* use mpin(2) to lock pages in memory */
107 
108 struct memalloc {
109 	int memtype;
110 	int flags;
111 	int nblks;
112 	char *name;
113 	void *space;		/* memory address of allocated space */
114 	int fd;			/* FD open for mmaping */
115 	int size;
116 } Memalloc[NMEMALLOC];
117 
118 /*
119  * Structure for maintaining open file test descriptors.  Used by
120  * alloc_fd().
121  */
122 
123 struct fd_cache {
124 	char c_file[MAX_FNAME_LENGTH + 1];
125 	int c_oflags;
126 	int c_fd;
127 	long c_rtc;
128 #ifdef sgi
129 	int c_memalign;		/* from F_DIOINFO */
130 	int c_miniosz;
131 	int c_maxiosz;
132 #endif
133 #ifndef CRAY
134 	void *c_memaddr;	/* mmapped address */
135 	int c_memlen;		/* length of above region */
136 #endif
137 };
138 
139 /*
140  * Name-To-Value map
141  * Used to map cmdline arguments to values
142  */
143 struct smap {
144 	char *string;
145 	int value;
146 };
147 
148 struct aio_info {
149 	int busy;
150 	int id;
151 	int fd;
152 	int strategy;
153 	volatile int done;
154 #ifdef CRAY
155 	struct iosw iosw;
156 #endif
157 #ifdef sgi
158 	aiocb_t aiocb;
159 	int aio_ret;		/* from aio_return */
160 	int aio_errno;		/* from aio_error */
161 #endif
162 	int sig;
163 	int signalled;
164 	struct sigaction osa;
165 };
166 
167 /* ---------------------------------------------------------------------------
168  *
169  * A new paradigm of doing the r/w system call where there is a "stub"
170  * function that builds the info for the system call, then does the system
171  * call; this is called by code that is common to all system calls and does
172  * the syscall return checking, async I/O wait, iosw check, etc.
173  *
174  * Flags:
175  *	WRITE, ASYNC, SSD/SDS,
176  *	FILE_LOCK, WRITE_LOG, VERIFY_DATA,
177  */
178 
179 struct status {
180 	int rval;		/* syscall return */
181 	int err;		/* errno */
182 	int *aioid;		/* list of async I/O structures */
183 };
184 
185 struct syscall_info {
186 	char *sy_name;
187 	int sy_type;
188 	struct status *(*sy_syscall) ();
189 	int (*sy_buffer) ();
190 	char *(*sy_format) ();
191 	int sy_flags;
192 	int sy_bits;
193 };
194 
195 #define	SY_WRITE		00001
196 #define	SY_ASYNC		00010
197 #define	SY_IOSW			00020
198 #define	SY_SDS			00100
199 
200 #ifndef O_SSD
201 #define O_SSD 0			/* so code compiles on a CRAY2 */
202 #endif
203 
204 #ifdef sgi
205 #define UINT64_T uint64_t
206 #else
207 #define UINT64_T unsigned long
208 #endif
209 
210 #ifndef O_PARALLEL
211 #define O_PARALLEL 0		/* so O_PARALLEL may be used in expressions */
212 #endif
213 
214 #define PPID_CHECK_INTERVAL 5	/* check ppid every <-- iterations */
215 #define	MAX_AIO		256	/* maximum number of async I/O ops */
216 #ifdef _CRAYMPP
217 #define	MPP_BUMP	16	/* page un-alignment for MPP */
218 #else
219 #define	MPP_BUMP	0
220 #endif
221 
222 #define	SYSERR strerror(errno)
223 
224 /*
225  * getopt() string of supported cmdline arguments.
226  */
227 
228 #define OPTS	"aC:d:ehm:n:kr:w:vU:V:M:N:"
229 
230 #define DEF_RELEASE_INTERVAL	0
231 
232 /*
233  * Flags set in parse_cmdline() to indicate which options were selected
234  * on the cmdline.
235  */
236 
237 int a_opt = 0;			/* abort on data compare errors     */
238 int e_opt = 0;			/* exec() after fork()'ing          */
239 int C_opt = 0;			/* Data Check Type                  */
240 int d_opt = 0;			/* delay between operations         */
241 int k_opt = 0;			/* lock file regions during writes  */
242 int m_opt = 0;			/* generate periodic messages       */
243 int n_opt = 0;			/* nprocs                           */
244 int r_opt = 0;			/* resource release interval        */
245 int w_opt = 0;			/* file write log file              */
246 int v_opt = 0;			/* verify writes if set             */
247 int U_opt = 0;			/* upanic() on varios conditions    */
248 int V_opt = 0;			/* over-ride default validation fd type */
249 int M_opt = 0;			/* data buffer allocation types     */
250 char TagName[40];		/* name of this doio (see Monster)  */
251 
252 /*
253  * Misc globals initialized in parse_cmdline()
254  */
255 
256 char *Prog = NULL;		/* set up in parse_cmdline()                */
257 int Upanic_Conditions;		/* set by args to -U                        */
258 int Release_Interval;		/* arg to -r                                */
259 int Nprocs;			/* arg to -n                                */
260 char *Write_Log;		/* arg to -w                                */
261 char *Infile;			/* input file (defaults to stdin)           */
262 int *Children;			/* pids of child procs                      */
263 int Nchildren = 0;
264 int Nsiblings = 0;		/* tfork'ed siblings                        */
265 int Execd = 0;
266 int Message_Interval = 0;
267 int Npes = 0;			/* non-zero if built as an mpp multi-pe app */
268 int Vpe = -1;			/* Virtual pe number if Npes >= 0           */
269 int Reqno = 1;			/* request # - used in some error messages  */
270 int Reqskipcnt = 0;		/* count of I/O requests that are skipped   */
271 int Validation_Flags;
272 char *(*Data_Check) ();		/* function to call for data checking       */
273 int (*Data_Fill) ();		/* function to call for data filling        */
274 int Nmemalloc = 0;		/* number of memory allocation strategies   */
275 int delayop = 0;		/* delay between operations - type of delay */
276 int delaytime = 0;		/* delay between operations - how long      */
277 
278 struct wlog_file Wlog;
279 
280 int active_mmap_rw = 0;		/* Indicates that mmapped I/O is occurring. */
281 			    /* Used by sigbus_action() in the child doio. */
282 int havesigint = 0;
283 
284 #define SKIP_REQ	-2	/* skip I/O request */
285 
286 /*
287  * Global file descriptors
288  */
289 
290 int Wfd_Append;			/* for appending to the write-log       */
291 int Wfd_Random;			/* for overlaying write-log entries     */
292 
293 #define FD_ALLOC_INCR	32	/* allocate this many fd_map structs    */
294 				/* at a time */
295 
296 /*
297  * Globals for tracking Sds and Core usage
298  */
299 
300 char *Memptr;			/* ptr to core buffer space             */
301 int Memsize;			/* # bytes pointed to by Memptr         */
302 				/* maintained by alloc_mem()            */
303 
304 int Sdsptr;			/* sds offset (always 0)                */
305 int Sdssize;			/* # bytes of allocated sds space       */
306 				/* Maintained by alloc_sds()            */
307 char Host[16];
308 char Pattern[128];
309 int Pattern_Length;
310 
311 /*
312  * Signal handlers, and related globals
313  */
314 
315 char *syserrno(int err);
316 void doio(void);
317 void doio_delay(void);
318 char *format_oflags(int oflags);
319 char *format_strat(int strategy);
320 char *format_rw(struct io_req *ioreq, int fd, void *buffer,
321 		int signo, char *pattern, void *iosw);
322 #ifdef CRAY
323 char *format_sds(struct io_req *ioreq, void *buffer, int sds char *pattern);
324 #endif /* CRAY */
325 
326 int do_read(struct io_req *req);
327 int do_write(struct io_req *req);
328 int lock_file_region(char *fname, int fd, int type, int start, int nbytes);
329 
330 #ifdef CRAY
331 char *format_listio(struct io_req *ioreq, int lcmd,
332 		    struct listreq *list, int nent, int fd, char *pattern);
333 #endif /* CRAY */
334 
335 int do_listio(struct io_req *req);
336 
337 #if defined(_CRAY1) || defined(CRAY)
338 int do_ssdio(struct io_req *req);
339 #endif /* defined(_CRAY1) || defined(CRAY) */
340 
341 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd);
342 
343 #ifdef CRAY
344 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc,
345 			 int fd, char *addr);
346 int listio_mem(struct io_req *req, int offset, int fmstride,
347 	       int *min, int *max);
348 char *fmt_listio(struct io_req *req, struct syscall_info *sy,
349 		 int fd, char *addr);
350 #endif /* CRAY */
351 
352 #ifdef sgi
353 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc,
354 			int fd, char *addr);
355 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc,
356 			 int fd, char *addr);
357 char *fmt_pread(struct io_req *req, struct syscall_info *sy,
358 		int fd, char *addr);
359 #endif /* sgi */
360 
361 #ifndef CRAY
362 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc,
363 			int fd, char *addr);
364 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc,
365 			 int fd, char *addr);
366 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc,
367 		      int fd, char *addr, int rw);
368 char *fmt_readv(struct io_req *req, struct syscall_info *sy,
369 		int fd, char *addr);
370 #endif /* !CRAY */
371 
372 #ifdef sgi
373 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc,
374 			int fd, char *addr);
375 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc,
376 			 int fd, char *addr)
377 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc,
378 		      int fd, char *addr, int rw);
379 char *fmt_aread(struct io_req *req, struct syscall_info *sy,
380 		int fd, char *addr);
381 #endif /* sgi */
382 
383 #ifndef CRAY
384 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc,
385 			 int fd, char *addr);
386 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc,
387 			  int fd, char *addr);
388 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc,
389 		       int fd, char *addr, int rw);
390 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr);
391 #endif /* !CRAY */
392 
393 int do_rw(struct io_req *req);
394 
395 #ifdef sgi
396 int do_fcntl(struct io_req *req);
397 #endif /* sgi */
398 
399 #ifndef CRAY
400 int do_sync(struct io_req *req);
401 #endif /* !CRAY */
402 
403 int doio_pat_fill(char *addr, int mem_needed, char *Pattern,
404 		  int Pattern_Length, int shift);
405 char *doio_pat_check(char *buf, int offset, int length,
406 		     char *pattern, int pattern_length, int patshift);
407 char *check_file(char *file, int offset, int length, char *pattern,
408 		 int pattern_length, int patshift, int fsa);
409 int doio_fprintf(FILE * stream, char *format, ...);
410 int alloc_mem(int nbytes);
411 
412 #if defined(_CRAY1) || defined(CRAY)
413 int alloc_sds(int nbytes);
414 #endif /* defined(_CRAY1) || defined(CRAY) */
415 
416 int alloc_fd(char *file, int oflags);
417 struct fd_cache *alloc_fdcache(char *file, int oflags);
418 
419 #ifdef sgi
420 void signal_info(int sig, siginfo_t * info, void *v);
421 void cleanup_handler(int sig, siginfo_t * info, void *v);
422 void die_handler(int sig, siginfo_t * info, void *v);
423 void sigbus_handler(int sig, siginfo_t * info, void *v);
424 #else /* !sgi */
425 void cleanup_handler(int sig);
426 void die_handler(int sig);
427 
428 #ifndef CRAY
429 void sigbus_handler(int sig);
430 #endif /* !CRAY */
431 #endif /* sgi */
432 
433 void noop_handler(int sig);
434 void sigint_handler(int sig);
435 void aio_handler(int sig);
436 void dump_aio(void);
437 
438 #ifdef sgi
439 void cb_handler(sigval_t val);
440 #endif /* sgi */
441 
442 struct aio_info *aio_slot(int aio_id);
443 int aio_register(int fd, int strategy, int sig);
444 int aio_unregister(int aio_id);
445 
446 #ifndef __linux__
447 int aio_wait(int aio_id);
448 #endif /* !__linux__ */
449 
450 char *hms(time_t t);
451 int aio_done(struct aio_info *ainfo);
452 void doio_upanic(int mask);
453 int parse_cmdline(int argc, char **argv, char *opts);
454 
455 #ifndef CRAY
456 void parse_memalloc(char *arg);
457 void dump_memalloc(void);
458 #endif /* !CRAY */
459 
460 void parse_delay(char *arg);
461 int usage(FILE * stream);
462 void help(FILE * stream);
463 
464 /*
465  * Upanic conditions, and a map from symbolics to values
466  */
467 
468 #define U_CORRUPTION	0001	/* upanic on data corruption    */
469 #define U_IOSW	    	0002	/* upanic on bad iosw           */
470 #define U_RVAL	    	0004	/* upanic on bad rval           */
471 
472 #define U_ALL	    	(U_CORRUPTION | U_IOSW | U_RVAL)
473 
474 struct smap Upanic_Args[] = {
475 	{"corruption", U_CORRUPTION},
476 	{"iosw", U_IOSW},
477 	{"rval", U_RVAL},
478 	{"all", U_ALL},
479 	{NULL, 0}
480 };
481 
482 struct aio_info Aio_Info[MAX_AIO];
483 
484 /* -C data-fill/check type */
485 #define	C_DEFAULT	1
486 struct smap checkmap[] = {
487 	{"default", C_DEFAULT},
488 	{NULL, 0},
489 };
490 
491 /* -d option delay types */
492 #define	DELAY_SELECT	1
493 #define	DELAY_SLEEP	2
494 #define	DELAY_SGINAP	3
495 #define	DELAY_ALARM	4
496 #define	DELAY_ITIMER	5	/* POSIX timer                          */
497 
498 struct smap delaymap[] = {
499 	{"select", DELAY_SELECT},
500 	{"sleep", DELAY_SLEEP},
501 #ifdef sgi
502 	{"sginap", DELAY_SGINAP},
503 #endif
504 	{"alarm", DELAY_ALARM},
505 	{NULL, 0},
506 };
507 
508 /******
509 *
510 * strerror() does similar actions.
511 
512 char *
513 syserrno(int err)
514 {
515     static char sys_errno[10];
516     sprintf(sys_errno, "%d", errno);
517     return(sys_errno);
518 }
519 
520 ******/
521 
main(int argc,char ** argv)522 int main(int argc, char **argv)
523 {
524 	int i, pid, stat, ex_stat;
525 #ifdef CRAY
526 	sigset_t omask;
527 #elif defined(linux)
528 	sigset_t omask, block_mask;
529 #else
530 	int omask;
531 #endif
532 	struct sigaction sa;
533 
534 	umask(0);		/* force new file modes to known values */
535 #if _CRAYMPP
536 	Npes = sysconf(_SC_CRAY_NPES);	/* must do this before parse_cmdline */
537 	Vpe = sysconf(_SC_CRAY_VPE);
538 #endif
539 
540 	TagName[0] = '\0';
541 	parse_cmdline(argc, argv, OPTS);
542 
543 	random_range_seed(getpid());	/* initialize random number generator */
544 
545 	/*
546 	 * If this is a re-exec of doio, jump directly into the doio function.
547 	 */
548 
549 	if (Execd) {
550 		doio();
551 		exit(E_SETUP);
552 	}
553 
554 	/*
555 	 * Stop on all but a few signals...
556 	 */
557 	sigemptyset(&sa.sa_mask);
558 	sa.sa_handler = sigint_handler;
559 	sa.sa_flags = SA_RESETHAND;	/* sigint is ignored after the */
560 	/* first time */
561 	for (i = 1; i <= NSIG; i++) {
562 		switch (i) {
563 #ifdef SIGRECOVERY
564 		case SIGRECOVERY:
565 			break;
566 #endif
567 #ifdef SIGCKPT
568 		case SIGCKPT:
569 #endif
570 #ifdef SIGRESTART
571 		case SIGRESTART:
572 #endif
573 		case SIGTSTP:
574 		case SIGSTOP:
575 		case SIGCONT:
576 		case SIGCHLD:
577 		case SIGBUS:
578 		case SIGSEGV:
579 		case SIGQUIT:
580 			break;
581 		default:
582 			sigaction(i, &sa, NULL);
583 		}
584 	}
585 
586 	/*
587 	 * If we're logging write operations, make a dummy call to wlog_open
588 	 * to initialize the write history file.  This call must be done in
589 	 * the parent, to ensure that the history file exists and/or has
590 	 * been truncated before any children attempt to open it, as the doio
591 	 * children are not allowed to truncate the file.
592 	 */
593 
594 	if (w_opt) {
595 		strcpy(Wlog.w_file, Write_Log);
596 
597 		if (wlog_open(&Wlog, 1, 0666) < 0) {
598 			doio_fprintf(stderr,
599 				     "Could not create/truncate write log %s\n",
600 				     Write_Log);
601 			exit(2);
602 		}
603 
604 		wlog_close(&Wlog);
605 	}
606 
607 	/*
608 	 * Malloc space for the children pid array.  Initialize all entries
609 	 * to -1.
610 	 */
611 
612 	Children = malloc(sizeof(int) * Nprocs);
613 	for (i = 0; i < Nprocs; i++) {
614 		Children[i] = -1;
615 	}
616 
617 	sigemptyset(&block_mask);
618 	sigaddset(&block_mask, SIGCHLD);
619 	sigprocmask(SIG_BLOCK, &block_mask, &omask);
620 
621 	/*
622 	 * Fork Nprocs.  This [parent] process is a watchdog, to notify the
623 	 * invoker of procs which exit abnormally, and to make sure that all
624 	 * child procs get cleaned up.  If the -e option was used, we will also
625 	 * re-exec.  This is mostly for unicos/mk on mpp's, to ensure that not
626 	 * all of the doio's don't end up in the same pe.
627 	 *
628 	 * Note - if Nprocs is 1, or this doio is a multi-pe app (Npes > 1),
629 	 * jump directly to doio().  multi-pe apps can't fork(), and there is
630 	 * no reason to fork() for 1 proc.
631 	 */
632 
633 	if (Nprocs == 1 || Npes > 1) {
634 		doio();
635 		exit(0);
636 	} else {
637 		for (i = 0; i < Nprocs; i++) {
638 			if ((pid = fork()) == -1) {
639 				doio_fprintf(stderr,
640 					     "(parent) Could not fork %d children:  %s (%d)\n",
641 					     i + 1, SYSERR, errno);
642 				exit(E_SETUP);
643 			}
644 
645 			Children[Nchildren] = pid;
646 			Nchildren++;
647 
648 			if (pid == 0) {
649 				if (e_opt) {
650 					char *exec_path;
651 
652 					exec_path = argv[0];
653 					argv[0] = malloc(strlen(exec_path) + 2);
654 					sprintf(argv[0], "-%s", exec_path);
655 
656 					execvp(exec_path, argv);
657 					doio_fprintf(stderr,
658 						     "(parent) Could not execvp %s:  %s (%d)\n",
659 						     exec_path, SYSERR, errno);
660 					exit(E_SETUP);
661 				} else {
662 					doio();
663 					exit(E_SETUP);
664 				}
665 			}
666 		}
667 
668 		/*
669 		 * Parent spins on wait(), until all children exit.
670 		 */
671 
672 		ex_stat = E_NORMAL;
673 
674 		while (Nprocs) {
675 			if ((pid = wait(&stat)) == -1) {
676 				if (errno == EINTR)
677 					continue;
678 			}
679 
680 			for (i = 0; i < Nchildren; i++)
681 				if (Children[i] == pid)
682 					Children[i] = -1;
683 
684 			Nprocs--;
685 
686 			if (WIFEXITED(stat)) {
687 				switch (WEXITSTATUS(stat)) {
688 				case E_NORMAL:
689 					/* noop */
690 					break;
691 
692 				case E_INTERNAL:
693 					doio_fprintf(stderr,
694 						     "(parent) pid %d exited because of an internal error\n",
695 						     pid);
696 					ex_stat |= E_INTERNAL;
697 					break;
698 
699 				case E_SETUP:
700 					doio_fprintf(stderr,
701 						     "(parent) pid %d exited because of a setup error\n",
702 						     pid);
703 					ex_stat |= E_SETUP;
704 					break;
705 
706 				case E_COMPARE:
707 					doio_fprintf(stderr,
708 						     "(parent) pid %d exited because of data compare errors\n",
709 						     pid);
710 
711 					ex_stat |= E_COMPARE;
712 
713 					if (a_opt)
714 						kill(0, SIGINT);
715 
716 					break;
717 
718 				case E_USAGE:
719 					doio_fprintf(stderr,
720 						     "(parent) pid %d exited because of a usage error\n",
721 						     pid);
722 
723 					ex_stat |= E_USAGE;
724 					break;
725 
726 				default:
727 					doio_fprintf(stderr,
728 						     "(parent) pid %d exited with unknown status %d\n",
729 						     pid, WEXITSTATUS(stat));
730 					ex_stat |= E_INTERNAL;
731 					break;
732 				}
733 			} else if (WIFSIGNALED(stat)
734 				   && WTERMSIG(stat) != SIGINT) {
735 				doio_fprintf(stderr,
736 					     "(parent) pid %d terminated by signal %d\n",
737 					     pid, WTERMSIG(stat));
738 
739 				ex_stat |= E_SIGNAL;
740 			}
741 
742 			fflush(NULL);
743 		}
744 	}
745 
746 	exit(ex_stat);
747 
748 }				/* main */
749 
750 /*
751  * main doio function.  Each doio child starts here, and never returns.
752  */
753 
doio(void)754 void doio(void)
755 {
756 	int rval, i, infd, nbytes;
757 	char *cp;
758 	struct io_req ioreq;
759 	struct sigaction sa, def_action, ignore_action, exit_action;
760 #ifndef CRAY
761 	struct sigaction sigbus_action;
762 #endif
763 
764 	Memsize = Sdssize = 0;
765 
766 	/*
767 	 * Initialize the Pattern - write-type syscalls will replace Pattern[1]
768 	 * with the pattern passed in the request.  Make sure that
769 	 * strlen(Pattern) is not mod 16 so that out of order words will be
770 	 * detected.
771 	 */
772 
773 	gethostname(Host, sizeof(Host));
774 	if ((cp = strchr(Host, '.')) != NULL)
775 		*cp = '\0';
776 
777 	Pattern_Length = sprintf(Pattern, "-:%d:%s:%s*", getpid(), Host, Prog);
778 
779 	if (!(Pattern_Length % 16)) {
780 		Pattern_Length = sprintf(Pattern, "-:%d:%s:%s**",
781 					 getpid(), Host, Prog);
782 	}
783 
784 	/*
785 	 * Open a couple of descriptors for the write-log file.  One descriptor
786 	 * is for appending, one for random access.  Write logging is done for
787 	 * file corruption detection.  The program doio_check is capable of
788 	 * doing corruption detection based on a doio write-log.
789 	 */
790 
791 	if (w_opt) {
792 
793 		strcpy(Wlog.w_file, Write_Log);
794 
795 		if (wlog_open(&Wlog, 0, 0666) == -1) {
796 			doio_fprintf(stderr,
797 				     "Could not open write log file (%s): wlog_open() failed\n",
798 				     Write_Log);
799 			exit(E_SETUP);
800 		}
801 	}
802 
803 	/*
804 	 * Open the input stream - either a file or stdin
805 	 */
806 
807 	if (Infile == NULL) {
808 		infd = 0;
809 	} else {
810 		if ((infd = open(Infile, O_RDWR)) == -1) {
811 			doio_fprintf(stderr,
812 				     "Could not open input file (%s):  %s (%d)\n",
813 				     Infile, SYSERR, errno);
814 			exit(E_SETUP);
815 		}
816 	}
817 
818 	/*
819 	 * Define a set of signals that should never be masked.  Receipt of
820 	 * these signals generally indicates a programming error, and we want
821 	 * a corefile at the point of error.  We put SIGQUIT in this list so
822 	 * that ^\ will force a user core dump.
823 	 *
824 	 * Note:  the handler for these should be SIG_DFL, all of them
825 	 * produce a corefile as the default action.
826 	 */
827 
828 	ignore_action.sa_handler = SIG_IGN;
829 	ignore_action.sa_flags = 0;
830 	sigemptyset(&ignore_action.sa_mask);
831 
832 	def_action.sa_handler = SIG_DFL;
833 	def_action.sa_flags = 0;
834 	sigemptyset(&def_action.sa_mask);
835 
836 #ifdef sgi
837 	exit_action.sa_sigaction = cleanup_handler;
838 	exit_action.sa_flags = SA_SIGINFO;
839 	sigemptyset(&exit_action.sa_mask);
840 
841 	sa.sa_sigaction = die_handler;
842 	sa.sa_flags = SA_SIGINFO;
843 	sigemptyset(&sa.sa_mask);
844 
845 	sigbus_action.sa_sigaction = sigbus_handler;
846 	sigbus_action.sa_flags = SA_SIGINFO;
847 	sigemptyset(&sigbus_action.sa_mask);
848 #else
849 	exit_action.sa_handler = cleanup_handler;
850 	exit_action.sa_flags = 0;
851 	sigemptyset(&exit_action.sa_mask);
852 
853 	sa.sa_handler = die_handler;
854 	sa.sa_flags = 0;
855 	sigemptyset(&sa.sa_mask);
856 
857 #ifndef CRAY
858 	sigbus_action.sa_handler = sigbus_handler;
859 	sigbus_action.sa_flags = 0;
860 	sigemptyset(&sigbus_action.sa_mask);
861 #endif
862 #endif
863 
864 	for (i = 1; i <= NSIG; i++) {
865 		switch (i) {
866 			/* Signals to terminate program on */
867 		case SIGINT:
868 			sigaction(i, &exit_action, NULL);
869 			break;
870 
871 #ifndef CRAY
872 			/* This depends on active_mmap_rw */
873 		case SIGBUS:
874 			sigaction(i, &sigbus_action, NULL);
875 			break;
876 #endif
877 
878 			/* Signals to Ignore... */
879 		case SIGSTOP:
880 		case SIGCONT:
881 #ifdef SIGRECOVERY
882 		case SIGRECOVERY:
883 #endif
884 			sigaction(i, &ignore_action, NULL);
885 			break;
886 
887 			/* Signals to trap & report & die */
888 			/*case SIGTRAP: */
889 			/*case SIGABRT: */
890 #ifdef SIGERR			/* cray only signals */
891 		case SIGERR:
892 		case SIGBUFIO:
893 		case SIGINFO:
894 #endif
895 			/*case SIGFPE: */
896 		case SIGURG:
897 		case SIGHUP:
898 		case SIGTERM:
899 		case SIGPIPE:
900 		case SIGIO:
901 		case SIGUSR1:
902 		case SIGUSR2:
903 			sigaction(i, &sa, NULL);
904 			break;
905 
906 			/* Default Action for all other signals */
907 		default:
908 			sigaction(i, &def_action, NULL);
909 			break;
910 		}
911 	}
912 
913 	/*
914 	 * Main loop - each doio proc does this until the read returns eof (0).
915 	 * Call the appropriate io function based on the request type.
916 	 */
917 
918 	while ((nbytes = read(infd, (char *)&ioreq, sizeof(ioreq)))) {
919 
920 		/*
921 		 * Periodically check our ppid.  If it is 1, the child exits to
922 		 * help clean up in the case that the main doio process was
923 		 * killed.
924 		 */
925 
926 		if (Reqno && ((Reqno % PPID_CHECK_INTERVAL) == 0)) {
927 			if (getppid() == 1) {
928 				doio_fprintf(stderr,
929 					     "Parent doio process has exited\n");
930 				alloc_mem(-1);
931 				exit(E_SETUP);
932 			}
933 		}
934 
935 		if (nbytes == -1) {
936 			doio_fprintf(stderr,
937 				     "read of %d bytes from input failed:  %s (%d)\n",
938 				     sizeof(ioreq), SYSERR, errno);
939 			alloc_mem(-1);
940 			exit(E_SETUP);
941 		}
942 
943 		if (nbytes != sizeof(ioreq)) {
944 			doio_fprintf(stderr,
945 				     "read wrong # bytes from input stream, expected %d, got %d\n",
946 				     sizeof(ioreq), nbytes);
947 			alloc_mem(-1);
948 			exit(E_SETUP);
949 		}
950 
951 		if (ioreq.r_magic != DOIO_MAGIC) {
952 			doio_fprintf(stderr,
953 				     "got a bad magic # from input stream.  Expected 0%o, got 0%o\n",
954 				     DOIO_MAGIC, ioreq.r_magic);
955 			alloc_mem(-1);
956 			exit(E_SETUP);
957 		}
958 
959 		/*
960 		 * If we're on a Release_Interval multiple, relase all ssd and
961 		 * core space, and close all fd's in Fd_Map[].
962 		 */
963 
964 		if (Reqno && Release_Interval && !(Reqno % Release_Interval)) {
965 			if (Memsize) {
966 #ifdef NOTDEF
967 				sbrk(-1 * Memsize);
968 #else
969 				alloc_mem(-1);
970 #endif
971 			}
972 #ifdef _CRAY1
973 			if (Sdssize) {
974 				ssbreak(-1 * btoc(Sdssize));
975 				Sdsptr = 0;
976 				Sdssize = 0;
977 			}
978 #endif /* _CRAY1 */
979 
980 			alloc_fd(NULL, 0);
981 		}
982 
983 		switch (ioreq.r_type) {
984 		case READ:
985 		case READA:
986 			rval = do_read(&ioreq);
987 			break;
988 
989 		case WRITE:
990 		case WRITEA:
991 			rval = do_write(&ioreq);
992 			break;
993 
994 		case READV:
995 		case AREAD:
996 		case PREAD:
997 		case LREAD:
998 		case LREADA:
999 		case LSREAD:
1000 		case LSREADA:
1001 		case WRITEV:
1002 		case AWRITE:
1003 		case PWRITE:
1004 		case MMAPR:
1005 		case MMAPW:
1006 		case LWRITE:
1007 		case LWRITEA:
1008 		case LSWRITE:
1009 		case LSWRITEA:
1010 		case LEREAD:
1011 		case LEREADA:
1012 		case LEWRITE:
1013 		case LEWRITEA:
1014 			rval = do_rw(&ioreq);
1015 			break;
1016 
1017 #ifdef CRAY
1018 		case SSREAD:
1019 		case SSWRITE:
1020 			rval = do_ssdio(&ioreq);
1021 			break;
1022 
1023 		case LISTIO:
1024 			rval = do_listio(&ioreq);
1025 			break;
1026 #endif
1027 
1028 #ifdef sgi
1029 		case RESVSP:
1030 		case UNRESVSP:
1031 #ifdef F_FSYNC
1032 		case DFFSYNC:
1033 #endif
1034 			rval = do_fcntl(&ioreq);
1035 			break;
1036 #endif /* sgi */
1037 
1038 #ifndef CRAY
1039 		case FSYNC2:
1040 		case FDATASYNC:
1041 			rval = do_sync(&ioreq);
1042 			break;
1043 #endif
1044 		default:
1045 			doio_fprintf(stderr,
1046 				     "Don't know how to handle io request type %d\n",
1047 				     ioreq.r_type);
1048 			alloc_mem(-1);
1049 			exit(E_SETUP);
1050 		}
1051 
1052 		if (rval == SKIP_REQ) {
1053 			Reqskipcnt++;
1054 		} else if (rval != 0) {
1055 			alloc_mem(-1);
1056 			doio_fprintf(stderr,
1057 				     "doio(): operation %d returned != 0\n",
1058 				     ioreq.r_type);
1059 			exit(E_SETUP);
1060 		}
1061 
1062 		if (Message_Interval && Reqno % Message_Interval == 0) {
1063 			doio_fprintf(stderr,
1064 				     "Info:  %d requests done (%d skipped) by this process\n",
1065 				     Reqno, Reqskipcnt);
1066 		}
1067 
1068 		Reqno++;
1069 
1070 		if (delayop != 0)
1071 			doio_delay();
1072 	}
1073 
1074 	/*
1075 	 * Child exits normally
1076 	 */
1077 	alloc_mem(-1);
1078 	exit(E_NORMAL);
1079 
1080 }				/* doio */
1081 
doio_delay(void)1082 void doio_delay(void)
1083 {
1084 	struct timeval tv_delay;
1085 	struct sigaction sa_al, sa_old;
1086 	sigset_t al_mask;
1087 
1088 	switch (delayop) {
1089 	case DELAY_SELECT:
1090 		tv_delay.tv_sec = delaytime / 1000000;
1091 		tv_delay.tv_usec = delaytime % 1000000;
1092 		/*doio_fprintf(stdout, "delay_select: %d %d\n",
1093 		   tv_delay.tv_sec, tv_delay.tv_usec); */
1094 		select(0, NULL, NULL, NULL, &tv_delay);
1095 		break;
1096 
1097 	case DELAY_SLEEP:
1098 		sleep(delaytime);
1099 		break;
1100 
1101 #ifdef sgi
1102 	case DELAY_SGINAP:
1103 		sginap(delaytime);
1104 		break;
1105 #endif
1106 
1107 	case DELAY_ALARM:
1108 		sa_al.sa_flags = 0;
1109 		sa_al.sa_handler = noop_handler;
1110 		sigemptyset(&sa_al.sa_mask);
1111 		sigaction(SIGALRM, &sa_al, &sa_old);
1112 		sigemptyset(&al_mask);
1113 		alarm(delaytime);
1114 		sigsuspend(&al_mask);
1115 		sigaction(SIGALRM, &sa_old, 0);
1116 		break;
1117 	}
1118 }
1119 
1120 /*
1121  * Format IO requests, returning a pointer to the formatted text.
1122  *
1123  * format_strat	- formats the async i/o completion strategy
1124  * format_rw	- formats a read[a]/write[a] request
1125  * format_sds	- formats a ssread/sswrite request
1126  * format_listio- formats a listio request
1127  *
1128  * ioreq is the doio io request structure.
1129  */
1130 
1131 struct smap sysnames[] = {
1132 	{"READ", READ},
1133 	{"WRITE", WRITE},
1134 	{"READA", READA},
1135 	{"WRITEA", WRITEA},
1136 	{"SSREAD", SSREAD},
1137 	{"SSWRITE", SSWRITE},
1138 	{"LISTIO", LISTIO},
1139 	{"LREAD", LREAD},
1140 	{"LREADA", LREADA},
1141 	{"LWRITE", LWRITE},
1142 	{"LWRITEA", LWRITEA},
1143 	{"LSREAD", LSREAD},
1144 	{"LSREADA", LSREADA},
1145 	{"LSWRITE", LSWRITE},
1146 	{"LSWRITEA", LSWRITEA},
1147 
1148 	/* Irix System Calls */
1149 	{"PREAD", PREAD},
1150 	{"PWRITE", PWRITE},
1151 	{"AREAD", AREAD},
1152 	{"AWRITE", AWRITE},
1153 	{"LLREAD", LLREAD},
1154 	{"LLAREAD", LLAREAD},
1155 	{"LLWRITE", LLWRITE},
1156 	{"LLAWRITE", LLAWRITE},
1157 	{"RESVSP", RESVSP},
1158 	{"UNRESVSP", UNRESVSP},
1159 	{"DFFSYNC", DFFSYNC},
1160 
1161 	/* Irix and Linux System Calls */
1162 	{"READV", READV},
1163 	{"WRITEV", WRITEV},
1164 	{"MMAPR", MMAPR},
1165 	{"MMAPW", MMAPW},
1166 	{"FSYNC2", FSYNC2},
1167 	{"FDATASYNC", FDATASYNC},
1168 
1169 	{"unknown", -1},
1170 };
1171 
1172 struct smap aionames[] = {
1173 	{"poll", A_POLL},
1174 	{"signal", A_SIGNAL},
1175 	{"recall", A_RECALL},
1176 	{"recalla", A_RECALLA},
1177 	{"recalls", A_RECALLS},
1178 	{"suspend", A_SUSPEND},
1179 	{"callback", A_CALLBACK},
1180 	{"synch", 0},
1181 	{"unknown", -1},
1182 };
1183 
format_oflags(int oflags)1184 char *format_oflags(int oflags)
1185 {
1186 	char flags[255];
1187 
1188 	flags[0] = '\0';
1189 	switch (oflags & 03) {
1190 	case O_RDONLY:
1191 		strcat(flags, "O_RDONLY,");
1192 		break;
1193 	case O_WRONLY:
1194 		strcat(flags, "O_WRONLY,");
1195 		break;
1196 	case O_RDWR:
1197 		strcat(flags, "O_RDWR,");
1198 		break;
1199 	default:
1200 		strcat(flags, "O_weird");
1201 		break;
1202 	}
1203 
1204 	if (oflags & O_EXCL)
1205 		strcat(flags, "O_EXCL,");
1206 
1207 	if (oflags & O_SYNC)
1208 		strcat(flags, "O_SYNC,");
1209 #ifdef CRAY
1210 	if (oflags & O_RAW)
1211 		strcat(flags, "O_RAW,");
1212 	if (oflags & O_WELLFORMED)
1213 		strcat(flags, "O_WELLFORMED,");
1214 #ifdef O_SSD
1215 	if (oflags & O_SSD)
1216 		strcat(flags, "O_SSD,");
1217 #endif
1218 	if (oflags & O_LDRAW)
1219 		strcat(flags, "O_LDRAW,");
1220 	if (oflags & O_PARALLEL)
1221 		strcat(flags, "O_PARALLEL,");
1222 	if (oflags & O_BIG)
1223 		strcat(flags, "O_BIG,");
1224 	if (oflags & O_PLACE)
1225 		strcat(flags, "O_PLACE,");
1226 	if (oflags & O_ASYNC)
1227 		strcat(flags, "O_ASYNC,");
1228 #endif
1229 
1230 #ifdef sgi
1231 	if (oflags & O_DIRECT)
1232 		strcat(flags, "O_DIRECT,");
1233 	if (oflags & O_DSYNC)
1234 		strcat(flags, "O_DSYNC,");
1235 	if (oflags & O_RSYNC)
1236 		strcat(flags, "O_RSYNC,");
1237 #endif
1238 
1239 	return (strdup(flags));
1240 }
1241 
format_strat(int strategy)1242 char *format_strat(int strategy)
1243 {
1244 	char msg[64];
1245 	char *aio_strat;
1246 
1247 	switch (strategy) {
1248 	case A_POLL:
1249 		aio_strat = "POLL";
1250 		break;
1251 	case A_SIGNAL:
1252 		aio_strat = "SIGNAL";
1253 		break;
1254 	case A_RECALL:
1255 		aio_strat = "RECALL";
1256 		break;
1257 	case A_RECALLA:
1258 		aio_strat = "RECALLA";
1259 		break;
1260 	case A_RECALLS:
1261 		aio_strat = "RECALLS";
1262 		break;
1263 	case A_SUSPEND:
1264 		aio_strat = "SUSPEND";
1265 		break;
1266 	case A_CALLBACK:
1267 		aio_strat = "CALLBACK";
1268 		break;
1269 	case 0:
1270 		aio_strat = "<zero>";
1271 		break;
1272 	default:
1273 		sprintf(msg, "<error:%#o>", strategy);
1274 		aio_strat = strdup(msg);
1275 		break;
1276 	}
1277 
1278 	return (aio_strat);
1279 }
1280 
format_rw(struct io_req * ioreq,int fd,void * buffer,int signo,char * pattern,void * iosw)1281 char *format_rw(struct io_req *ioreq, int fd, void *buffer, int signo,
1282 		char *pattern, void *iosw)
1283 {
1284 	static char *errbuf = NULL;
1285 	char *aio_strat, *cp;
1286 	struct read_req *readp = &ioreq->r_data.read;
1287 	struct write_req *writep = &ioreq->r_data.write;
1288 	struct read_req *readap = &ioreq->r_data.read;
1289 	struct write_req *writeap = &ioreq->r_data.write;
1290 
1291 	if (errbuf == NULL)
1292 		errbuf = malloc(32768);
1293 
1294 	cp = errbuf;
1295 	cp += sprintf(cp, "Request number %d\n", Reqno);
1296 
1297 	switch (ioreq->r_type) {
1298 	case READ:
1299 		cp += sprintf(cp, "syscall:  read(%d, %#lo, %d)\n",
1300 			      fd, (unsigned long)buffer, readp->r_nbytes);
1301 		cp +=
1302 		    sprintf(cp,
1303 			    "          fd %d is file %s - open flags are %#o\n",
1304 			    fd, readp->r_file, readp->r_oflags);
1305 		cp +=
1306 		    sprintf(cp, "          read done at file offset %d\n",
1307 			    readp->r_offset);
1308 		break;
1309 
1310 	case WRITE:
1311 		cp += sprintf(cp, "syscall:  write(%d, %#lo, %d)\n",
1312 			      fd, (unsigned long)buffer, writep->r_nbytes);
1313 		cp +=
1314 		    sprintf(cp,
1315 			    "          fd %d is file %s - open flags are %#o\n",
1316 			    fd, writep->r_file, writep->r_oflags);
1317 		cp +=
1318 		    sprintf(cp,
1319 			    "          write done at file offset %d - pattern is %s\n",
1320 			    writep->r_offset, pattern);
1321 		break;
1322 
1323 	case READA:
1324 		aio_strat = format_strat(readap->r_aio_strat);
1325 
1326 		cp += sprintf(cp, "syscall:  reada(%d, %#lo, %d, %#lo, %d)\n",
1327 			      fd, (unsigned long)buffer, readap->r_nbytes,
1328 			      (unsigned long)iosw, signo);
1329 		cp +=
1330 		    sprintf(cp,
1331 			    "          fd %d is file %s - open flags are %#o\n",
1332 			    fd, readap->r_file, readp->r_oflags);
1333 		cp +=
1334 		    sprintf(cp, "          reada done at file offset %d\n",
1335 			    readap->r_offset);
1336 		cp +=
1337 		    sprintf(cp,
1338 			    "          async io completion strategy is %s\n",
1339 			    aio_strat);
1340 		break;
1341 
1342 	case WRITEA:
1343 		aio_strat = format_strat(writeap->r_aio_strat);
1344 
1345 		cp += sprintf(cp, "syscall:  writea(%d, %#lo, %d, %#lo, %d)\n",
1346 			      fd, (unsigned long)buffer, writeap->r_nbytes,
1347 			      (unsigned long)iosw, signo);
1348 		cp +=
1349 		    sprintf(cp,
1350 			    "          fd %d is file %s - open flags are %#o\n",
1351 			    fd, writeap->r_file, writeap->r_oflags);
1352 		cp +=
1353 		    sprintf(cp,
1354 			    "          writea done at file offset %d - pattern is %s\n",
1355 			    writeap->r_offset, pattern);
1356 		cp +=
1357 		    sprintf(cp,
1358 			    "          async io completion strategy is %s\n",
1359 			    aio_strat);
1360 		break;
1361 
1362 	}
1363 
1364 	return errbuf;
1365 }
1366 
1367 #ifdef CRAY
format_sds(struct io_req * ioreq,void * buffer,int sds,char * pattern)1368 char *format_sds(struct io_req *ioreq, void *buffer, int sds, char *pattern)
1369 {
1370 	int i;
1371 	static char *errbuf = NULL;
1372 	char *cp;
1373 
1374 	struct ssread_req *ssreadp = &ioreq->r_data.ssread;
1375 	struct sswrite_req *sswritep = &ioreq->r_data.sswrite;
1376 
1377 	if (errbuf == NULL)
1378 		errbuf = malloc(32768);
1379 
1380 	cp = errbuf;
1381 	cp += sprintf(cp, "Request number %d\n", Reqno);
1382 
1383 	switch (ioreq->r_type) {
1384 	case SSREAD:
1385 		cp += sprintf(cp, "syscall:  ssread(%#o, %#o, %d)\n",
1386 			      buffer, sds, ssreadp->r_nbytes);
1387 		break;
1388 
1389 	case SSWRITE:
1390 		cp +=
1391 		    sprintf(cp,
1392 			    "syscall:  sswrite(%#o, %#o, %d) - pattern was %s\n",
1393 			    buffer, sds, sswritep->r_nbytes, pattern);
1394 		break;
1395 	}
1396 	return errbuf;
1397 }
1398 #endif /* CRAY */
1399 
1400 /*
1401  * Perform the various sorts of disk reads
1402  */
1403 
do_read(struct io_req * req)1404 int do_read(struct io_req *req)
1405 {
1406 	int fd, offset, nbytes, oflags, rval;
1407 	char *addr, *file;
1408 #ifdef CRAY
1409 	struct aio_info *aiop;
1410 	int aio_id, aio_strat, signo;
1411 #endif
1412 #ifdef sgi
1413 	struct fd_cache *fdc;
1414 #endif
1415 
1416 	/*
1417 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
1418 	 * r_nbytes are at the same offset in the read_req and reada_req
1419 	 * structures.
1420 	 */
1421 
1422 	file = req->r_data.read.r_file;
1423 	oflags = req->r_data.read.r_oflags;
1424 	offset = req->r_data.read.r_offset;
1425 	nbytes = req->r_data.read.r_nbytes;
1426 
1427 	/*printf("read: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
1428 
1429 	/*
1430 	 * Grab an open file descriptor
1431 	 * Note: must be done before memory allocation so that the direct i/o
1432 	 *      information is available in mem. allocate
1433 	 */
1434 
1435 	if ((fd = alloc_fd(file, oflags)) == -1)
1436 		return -1;
1437 
1438 	/*
1439 	 * Allocate core or sds - based on the O_SSD flag
1440 	 */
1441 
1442 #ifndef wtob
1443 #define wtob(x)	(x * sizeof(UINT64_T))
1444 #endif
1445 
1446 #ifdef CRAY
1447 	if (oflags & O_SSD) {
1448 		if (alloc_sds(nbytes) == -1)
1449 			return -1;
1450 
1451 		addr = (char *)Sdsptr;
1452 	} else {
1453 		if ((rval =
1454 		     alloc_mem(nbytes + wtob(1) * 2 +
1455 			       MPP_BUMP * sizeof(UINT64_T))) < 0) {
1456 			return rval;
1457 		}
1458 
1459 		addr = Memptr;
1460 
1461 		/*
1462 		 * if io is not raw, bump the offset by a random amount
1463 		 * to generate non-word-aligned io.
1464 		 */
1465 		if (!(req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
1466 			addr += random_range(0, wtob(1) - 1, 1, NULL);
1467 		}
1468 	}
1469 #else
1470 #ifdef sgi
1471 	/* get memory alignment for using DIRECT I/O */
1472 	fdc = alloc_fdcache(file, oflags);
1473 
1474 	if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
1475 		return rval;
1476 	}
1477 
1478 	addr = Memptr;
1479 
1480 	if ((req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
1481 		/*
1482 		 * Force memory alignment for Direct I/O
1483 		 */
1484 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
1485 			addr +=
1486 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
1487 		}
1488 	} else {
1489 		addr += random_range(0, wtob(1) - 1, 1, NULL);
1490 	}
1491 #else
1492 	/* what is !CRAY && !sgi ? */
1493 	if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
1494 		return rval;
1495 	}
1496 
1497 	addr = Memptr;
1498 #endif /* !CRAY && sgi */
1499 #endif /* CRAY */
1500 
1501 	switch (req->r_type) {
1502 	case READ:
1503 		/* move to the desired file position. */
1504 		if (lseek(fd, offset, SEEK_SET) == -1) {
1505 			doio_fprintf(stderr,
1506 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
1507 				     fd, offset, SYSERR, errno);
1508 			return -1;
1509 		}
1510 
1511 		if ((rval = read(fd, addr, nbytes)) == -1) {
1512 			doio_fprintf(stderr,
1513 				     "read() request failed:  %s (%d)\n%s\n",
1514 				     SYSERR, errno,
1515 				     format_rw(req, fd, addr, -1, NULL, NULL));
1516 			doio_upanic(U_RVAL);
1517 			return -1;
1518 		} else if (rval != nbytes) {
1519 			doio_fprintf(stderr,
1520 				     "read() request returned wrong # of bytes - expected %d, got %d\n%s\n",
1521 				     nbytes, rval,
1522 				     format_rw(req, fd, addr, -1, NULL, NULL));
1523 			doio_upanic(U_RVAL);
1524 			return -1;
1525 		}
1526 		break;
1527 
1528 #ifdef CRAY
1529 	case READA:
1530 		/*
1531 		 * Async read
1532 		 */
1533 
1534 		/* move to the desired file position. */
1535 		if (lseek(fd, offset, SEEK_SET) == -1) {
1536 			doio_fprintf(stderr,
1537 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
1538 				     fd, offset, SYSERR, errno);
1539 			return -1;
1540 		}
1541 
1542 		aio_strat = req->r_data.read.r_aio_strat;
1543 		signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
1544 
1545 		aio_id = aio_register(fd, aio_strat, signo);
1546 		aiop = aio_slot(aio_id);
1547 
1548 		if (reada(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
1549 			doio_fprintf(stderr, "reada() failed: %s (%d)\n%s\n",
1550 				     SYSERR, errno,
1551 				     format_rw(req, fd, addr, signo, NULL,
1552 					       &aiop->iosw));
1553 			aio_unregister(aio_id);
1554 			doio_upanic(U_RVAL);
1555 			rval = -1;
1556 		} else {
1557 			/*
1558 			 * Wait for io to complete
1559 			 */
1560 
1561 			aio_wait(aio_id);
1562 
1563 			/*
1564 			 * make sure the io completed without error
1565 			 */
1566 
1567 			if (aiop->iosw.sw_count != nbytes) {
1568 				doio_fprintf(stderr,
1569 					     "Bad iosw from reada()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
1570 					     1, 0, nbytes,
1571 					     aiop->iosw.sw_flag,
1572 					     aiop->iosw.sw_error,
1573 					     aiop->iosw.sw_count,
1574 					     format_rw(req, fd, addr, signo,
1575 						       NULL, &aiop->iosw));
1576 				aio_unregister(aio_id);
1577 				doio_upanic(U_IOSW);
1578 				rval = -1;
1579 			} else {
1580 				aio_unregister(aio_id);
1581 				rval = 0;
1582 			}
1583 		}
1584 
1585 		if (rval == -1)
1586 			return rval;
1587 		break;
1588 #endif /* CRAY */
1589 	}
1590 
1591 	return 0;		/* if we get here, everything went ok */
1592 }
1593 
1594 /*
1595  * Perform the verious types of disk writes.
1596  */
1597 
do_write(struct io_req * req)1598 int do_write(struct io_req *req)
1599 {
1600 	static int pid = -1;
1601 	int fd, nbytes, oflags, signo;
1602 	int logged_write, rval, got_lock;
1603 	off_t offset, woffset;
1604 	char *addr, pattern, *file, *msg;
1605 	struct wlog_rec wrec;
1606 #ifdef CRAY
1607 	int aio_strat, aio_id;
1608 	struct aio_info *aiop;
1609 #endif
1610 #ifdef sgi
1611 	struct fd_cache *fdc;
1612 #endif
1613 
1614 	woffset = 0;
1615 
1616 	/*
1617 	 * Misc variable setup
1618 	 */
1619 
1620 	signo = 0;
1621 	nbytes = req->r_data.write.r_nbytes;
1622 	offset = req->r_data.write.r_offset;
1623 	pattern = req->r_data.write.r_pattern;
1624 	file = req->r_data.write.r_file;
1625 	oflags = req->r_data.write.r_oflags;
1626 
1627 	/*printf("pwrite: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
1628 
1629 	/*
1630 	 * Allocate core memory and possibly sds space.  Initialize the data
1631 	 * to be written.
1632 	 */
1633 
1634 	Pattern[0] = pattern;
1635 
1636 	/*
1637 	 * Get a descriptor to do the io on
1638 	 */
1639 
1640 	if ((fd = alloc_fd(file, oflags)) == -1)
1641 		return -1;
1642 
1643 	/*printf("write: %d, %s, %#o, %d %d\n",
1644 	   fd, file, oflags, offset, nbytes); */
1645 
1646 	/*
1647 	 * Allocate SDS space for backdoor write if desired
1648 	 */
1649 
1650 #ifdef CRAY
1651 	if (oflags & O_SSD) {
1652 #ifndef _CRAYMPP
1653 		if ((rval = alloc_mem(nbytes + wtob(1))) < 0) {
1654 			return rval;
1655 		}
1656 
1657 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1658 		/*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
1659 
1660 		if (alloc_sds(nbytes) == -1)
1661 			return -1;
1662 
1663 		if (sswrite((long)Memptr, Sdsptr, btoc(nbytes)) == -1) {
1664 			doio_fprintf(stderr,
1665 				     "sswrite(%d, %d, %d) failed:  %s (%d)\n",
1666 				     (long)Memptr, Sdsptr, btoc(nbytes), SYSERR,
1667 				     errno);
1668 			fflush(stderr);
1669 			return -1;
1670 		}
1671 
1672 		addr = (char *)Sdsptr;
1673 #else
1674 		doio_fprintf(stderr,
1675 			     "Invalid O_SSD flag was generated for MPP system\n");
1676 		fflush(stderr);
1677 		return -1;
1678 #endif /* !CRAYMPP */
1679 	} else {
1680 		if ((rval = alloc_mem(nbytes + wtob(1)) < 0)) {
1681 			return rval;
1682 		}
1683 
1684 		addr = Memptr;
1685 
1686 		/*
1687 		 * if io is not raw, bump the offset by a random amount
1688 		 * to generate non-word-aligned io.
1689 		 */
1690 
1691 		if (!(req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
1692 			addr += random_range(0, wtob(1) - 1, 1, NULL);
1693 		}
1694 
1695 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1696 		if (addr != Memptr)
1697 			memmove(addr, Memptr, nbytes);
1698 	}
1699 #else /* CRAY */
1700 #ifdef sgi
1701 	/* get memory alignment for using DIRECT I/O */
1702 	fdc = alloc_fdcache(file, oflags);
1703 
1704 	if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
1705 		return rval;
1706 	}
1707 
1708 	addr = Memptr;
1709 
1710 	if ((req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
1711 		/*
1712 		 * Force memory alignment for Direct I/O
1713 		 */
1714 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
1715 			addr +=
1716 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
1717 		}
1718 	} else {
1719 		addr += random_range(0, wtob(1) - 1, 1, NULL);
1720 	}
1721 
1722 	(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1723 	if (addr != Memptr)
1724 		memmove(addr, Memptr, nbytes);
1725 
1726 #else /* sgi */
1727 	if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
1728 		return rval;
1729 	}
1730 
1731 	addr = Memptr;
1732 
1733 	(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1734 	if (addr != Memptr)
1735 		memmove(addr, Memptr, nbytes);
1736 #endif /* sgi */
1737 #endif /* CRAY */
1738 
1739 	rval = -1;
1740 	got_lock = 0;
1741 	logged_write = 0;
1742 
1743 	if (k_opt) {
1744 		if (lock_file_region(file, fd, F_WRLCK, offset, nbytes) < 0) {
1745 			alloc_mem(-1);
1746 			exit(E_INTERNAL);
1747 		}
1748 
1749 		got_lock = 1;
1750 	}
1751 
1752 	/*
1753 	 * Write a preliminary write-log entry.  This is done so that
1754 	 * doio_check can do corruption detection across an interrupt/crash.
1755 	 * Note that w_done is set to 0.  If doio_check sees this, it
1756 	 * re-creates the file extents as if the write completed, but does not
1757 	 * do any checking - see comments in doio_check for more details.
1758 	 */
1759 
1760 	if (w_opt) {
1761 		if (pid == -1) {
1762 			pid = getpid();
1763 		}
1764 		wrec.w_async = (req->r_type == WRITEA) ? 1 : 0;
1765 		wrec.w_oflags = oflags;
1766 		wrec.w_pid = pid;
1767 		wrec.w_offset = offset;
1768 		wrec.w_nbytes = nbytes;
1769 
1770 		wrec.w_pathlen = strlen(file);
1771 		memcpy(wrec.w_path, file, wrec.w_pathlen);
1772 		wrec.w_hostlen = strlen(Host);
1773 		memcpy(wrec.w_host, Host, wrec.w_hostlen);
1774 		wrec.w_patternlen = Pattern_Length;
1775 		memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
1776 
1777 		wrec.w_done = 0;
1778 
1779 		if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
1780 			doio_fprintf(stderr,
1781 				     "Could not append to write-log:  %s (%d)\n",
1782 				     SYSERR, errno);
1783 		} else {
1784 			logged_write = 1;
1785 		}
1786 	}
1787 
1788 	switch (req->r_type) {
1789 	case WRITE:
1790 		/*
1791 		 * sync write
1792 		 */
1793 
1794 		if (lseek(fd, offset, SEEK_SET) == -1) {
1795 			doio_fprintf(stderr,
1796 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
1797 				     fd, offset, SYSERR, errno);
1798 			return -1;
1799 		}
1800 
1801 		rval = write(fd, addr, nbytes);
1802 
1803 		if (rval == -1) {
1804 			doio_fprintf(stderr,
1805 				     "write() failed:  %s (%d)\n%s\n",
1806 				     SYSERR, errno,
1807 				     format_rw(req, fd, addr, -1, Pattern,
1808 					       NULL));
1809 #ifdef sgi
1810 			doio_fprintf(stderr,
1811 				     "write() failed:  %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%miniou(%d)=%d, oflags=%#o memalign=%d, addr%%memalign=%d\n",
1812 				     strerror(errno),
1813 				     fd, addr, nbytes,
1814 				     offset,
1815 				     fdc->c_miniosz, nbytes % fdc->c_miniosz,
1816 				     oflags, fdc->c_memalign,
1817 				     (long)addr % fdc->c_memalign);
1818 #else
1819 			doio_fprintf(stderr,
1820 				     "write() failed:  %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%1B=%d, oflags=%#o\n",
1821 				     strerror(errno),
1822 				     fd, addr, nbytes,
1823 				     offset, nbytes % 4096, oflags);
1824 #endif
1825 			doio_upanic(U_RVAL);
1826 		} else if (rval != nbytes) {
1827 			doio_fprintf(stderr,
1828 				     "write() returned wrong # bytes - expected %d, got %d\n%s\n",
1829 				     nbytes, rval,
1830 				     format_rw(req, fd, addr, -1, Pattern,
1831 					       NULL));
1832 			doio_upanic(U_RVAL);
1833 			rval = -1;
1834 		}
1835 
1836 		break;
1837 
1838 #ifdef CRAY
1839 	case WRITEA:
1840 		/*
1841 		 * async write
1842 		 */
1843 		if (lseek(fd, offset, SEEK_SET) == -1) {
1844 			doio_fprintf(stderr,
1845 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
1846 				     fd, offset, SYSERR, errno);
1847 			return -1;
1848 		}
1849 
1850 		aio_strat = req->r_data.write.r_aio_strat;
1851 		signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
1852 
1853 		aio_id = aio_register(fd, aio_strat, signo);
1854 		aiop = aio_slot(aio_id);
1855 
1856 		/*
1857 		 * init iosw and do the async write
1858 		 */
1859 
1860 		if (writea(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
1861 			doio_fprintf(stderr,
1862 				     "writea() failed: %s (%d)\n%s\n",
1863 				     SYSERR, errno,
1864 				     format_rw(req, fd, addr, -1, Pattern,
1865 					       NULL));
1866 			doio_upanic(U_RVAL);
1867 			aio_unregister(aio_id);
1868 			rval = -1;
1869 		} else {
1870 
1871 			/*
1872 			 * Wait for io to complete
1873 			 */
1874 
1875 			aio_wait(aio_id);
1876 
1877 			/*
1878 			 * check that iosw is ok
1879 			 */
1880 
1881 			if (aiop->iosw.sw_count != nbytes) {
1882 				doio_fprintf(stderr,
1883 					     "Bad iosw from writea()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
1884 					     1, 0, nbytes,
1885 					     aiop->iosw.sw_flag,
1886 					     aiop->iosw.sw_error,
1887 					     aiop->iosw.sw_count,
1888 					     format_rw(req, fd, addr, -1,
1889 						       Pattern, &aiop->iosw));
1890 				aio_unregister(aio_id);
1891 				doio_upanic(U_IOSW);
1892 				rval = -1;
1893 			} else {
1894 				aio_unregister(aio_id);
1895 				rval = 0;
1896 			}
1897 		}
1898 		break;
1899 
1900 #endif /* CRAY */
1901 	}
1902 
1903 	/*
1904 	 * Verify that the data was written correctly - check_file() returns
1905 	 * a non-null pointer which contains an error message if there are
1906 	 * problems.
1907 	 */
1908 
1909 	if (v_opt) {
1910 		msg = check_file(file, offset, nbytes, Pattern, Pattern_Length,
1911 				 0, oflags & O_PARALLEL);
1912 		if (msg != NULL) {
1913 			doio_fprintf(stderr, "%s%s\n", msg,
1914 #ifdef CRAY
1915 				     format_rw(req, fd, addr, -1, Pattern,
1916 					       &aiop->iosw)
1917 #else
1918 				     format_rw(req, fd, addr, -1, Pattern, NULL)
1919 #endif
1920 			    );
1921 			doio_upanic(U_CORRUPTION);
1922 			exit(E_COMPARE);
1923 
1924 		}
1925 	}
1926 
1927 	/*
1928 	 * General cleanup ...
1929 	 *
1930 	 * Write extent information to the write-log, so that doio_check can do
1931 	 * corruption detection.  Note that w_done is set to 1, indicating that
1932 	 * the write has been verified as complete.  We don't need to write the
1933 	 * filename on the second logging.
1934 	 */
1935 
1936 	if (w_opt && logged_write) {
1937 		wrec.w_done = 1;
1938 		wlog_record_write(&Wlog, &wrec, woffset);
1939 	}
1940 
1941 	/*
1942 	 * Unlock file region if necessary
1943 	 */
1944 
1945 	if (got_lock) {
1946 		if (lock_file_region(file, fd, F_UNLCK, offset, nbytes) < 0) {
1947 			alloc_mem(-1);
1948 			exit(E_INTERNAL);
1949 		}
1950 	}
1951 
1952 	return ((rval == -1) ? -1 : 0);
1953 }
1954 
1955 /*
1956  * Simple routine to lock/unlock a file using fcntl()
1957  */
1958 
lock_file_region(char * fname,int fd,int type,int start,int nbytes)1959 int lock_file_region(char *fname, int fd, int type, int start, int nbytes)
1960 {
1961 	struct flock flk;
1962 
1963 	flk.l_type = type;
1964 	flk.l_whence = 0;
1965 	flk.l_start = start;
1966 	flk.l_len = nbytes;
1967 
1968 	if (fcntl(fd, F_SETLKW, &flk) < 0) {
1969 		doio_fprintf(stderr,
1970 			     "fcntl(%d, %d, %#o) failed for file %s, lock type %d, offset %d, length %d:  %s (%d), open flags: %#o\n",
1971 			     fd, F_SETLKW, &flk, fname, type,
1972 			     start, nbytes, SYSERR, errno,
1973 			     fcntl(fd, F_GETFL, 0));
1974 		return -1;
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 /*
1981  * Perform a listio request.
1982  */
1983 
1984 #ifdef CRAY
format_listio(struct io_req * ioreq,int lcmd,struct listreq * list,int nent,int fd,char * pattern)1985 char *format_listio(struct io_req *ioreq, int lcmd, struct listreq *list,
1986 		    int nent, int fd, char *pattern)
1987 {
1988 	static char *errbuf = NULL;
1989 	struct listio_req *liop = &ioreq->r_data.listio;
1990 	struct listreq *listreq;
1991 	char *cp, *cmd, *opcode, *aio_strat;
1992 	int i;
1993 
1994 	switch (lcmd) {
1995 	case LC_START:
1996 		cmd = "LC_START";
1997 		break;
1998 	case LC_WAIT:
1999 		cmd = "LC_WAIT";
2000 		break;
2001 	default:
2002 		cmd = "???";
2003 		break;
2004 	}
2005 
2006 	if (errbuf == NULL)
2007 		errbuf = malloc(32768);
2008 
2009 	cp = errbuf;
2010 	cp += sprintf(cp, "Request number %d\n", Reqno);
2011 
2012 	cp += sprintf(cp, "syscall:  listio(%s, %#o, %d)\n\n", cmd, list, nent);
2013 
2014 	aio_strat = format_strat(liop->r_aio_strat);
2015 
2016 	for (i = 0; i < nent; i++) {
2017 		cp += sprintf(cp, "struct lioreq for request element %d\n", i);
2018 		cp += sprintf(cp, "----------------------------------------\n");
2019 
2020 		listreq = list + i;
2021 
2022 		switch (listreq->li_opcode) {
2023 		case LO_READ:
2024 			opcode = "LO_READ";
2025 			break;
2026 		case LO_WRITE:
2027 			opcode = "LO_WRITE";
2028 			break;
2029 		default:
2030 			opcode = "???";
2031 			break;
2032 		}
2033 
2034 		cp += sprintf(cp, "          li_opcode =    %s\n", opcode);
2035 		cp +=
2036 		    sprintf(cp, "          li_drvr =      %#o\n",
2037 			    listreq->li_drvr);
2038 		cp +=
2039 		    sprintf(cp, "          li_flags =     %#o\n",
2040 			    listreq->li_flags);
2041 		cp +=
2042 		    sprintf(cp, "          li_offset =    %d\n",
2043 			    listreq->li_offset);
2044 		cp +=
2045 		    sprintf(cp, "          li_fildes =    %d\n",
2046 			    listreq->li_fildes);
2047 		cp +=
2048 		    sprintf(cp, "          li_buf =       %#o\n",
2049 			    listreq->li_buf);
2050 		cp +=
2051 		    sprintf(cp, "          li_nbyte =     %d\n",
2052 			    listreq->li_nbyte);
2053 		cp +=
2054 		    sprintf(cp, "          li_status =    %#o (%d, %d, %d)\n",
2055 			    listreq->li_status, listreq->li_status->sw_flag,
2056 			    listreq->li_status->sw_error,
2057 			    listreq->li_status->sw_count);
2058 		cp +=
2059 		    sprintf(cp, "          li_signo =     %d\n",
2060 			    listreq->li_signo);
2061 		cp +=
2062 		    sprintf(cp, "          li_nstride =   %d\n",
2063 			    listreq->li_nstride);
2064 		cp +=
2065 		    sprintf(cp, "          li_filstride = %d\n",
2066 			    listreq->li_filstride);
2067 		cp +=
2068 		    sprintf(cp, "          li_memstride = %d\n",
2069 			    listreq->li_memstride);
2070 		cp +=
2071 		    sprintf(cp, "          io completion strategy is %s\n",
2072 			    aio_strat);
2073 	}
2074 	return errbuf;
2075 }
2076 #endif /* CRAY */
2077 
do_listio(struct io_req * req)2078 int do_listio(struct io_req *req)
2079 {
2080 #ifdef CRAY
2081 	struct listio_req *lio;
2082 	int fd, oflags, signo, nb, i;
2083 	int logged_write, rval, got_lock;
2084 	int aio_strat, aio_id;
2085 	int min_byte, max_byte;
2086 	int mem_needed;
2087 	int foffset, fstride, mstride, nstrides;
2088 	char *moffset;
2089 	long offset, woffset;
2090 	char *addr, *msg;
2091 	sigset_t block_mask, omask;
2092 	struct wlog_rec wrec;
2093 	struct aio_info *aiop;
2094 	struct listreq lio_req;
2095 
2096 	lio = &req->r_data.listio;
2097 
2098 	/*
2099 	 * If bytes per stride is less than the stride size, drop the request
2100 	 * since it will cause overlapping strides, and we cannot predict
2101 	 * the order they will complete in.
2102 	 */
2103 
2104 	if (lio->r_filestride && abs(lio->r_filestride) < lio->r_nbytes) {
2105 		doio_fprintf(stderr,
2106 			     "do_listio():  Bogus listio request - abs(filestride) [%d] < nbytes [%d]\n",
2107 			     abs(lio->r_filestride), lio->r_nbytes);
2108 		return -1;
2109 	}
2110 
2111 	/*
2112 	 * Allocate core memory.  Initialize the data to be written.  Make
2113 	 * sure we get enough, based on the memstride.
2114 	 */
2115 
2116 	mem_needed =
2117 	    stride_bounds(0, lio->r_memstride, lio->r_nstrides,
2118 			  lio->r_nbytes, NULL, NULL);
2119 
2120 	if ((rval = alloc_mem(mem_needed + wtob(1))) < 0) {
2121 		return rval;
2122 	}
2123 
2124 	/*
2125 	 * Set the memory address pointer.  If the io is not raw, adjust
2126 	 * addr by a random amount, so that non-raw io is not necessarily
2127 	 * word aligned.
2128 	 */
2129 
2130 	addr = Memptr;
2131 
2132 	if (!(lio->r_uflags & F_WORD_ALIGNED)) {
2133 		addr += random_range(0, wtob(1) - 1, 1, NULL);
2134 	}
2135 
2136 	if (lio->r_opcode == LO_WRITE) {
2137 		Pattern[0] = lio->r_pattern;
2138 		(*Data_Fill) (Memptr, mem_needed, Pattern, Pattern_Length, 0);
2139 		if (addr != Memptr)
2140 			memmove(addr, Memptr, mem_needed);
2141 	}
2142 
2143 	/*
2144 	 * Get a descriptor to do the io on.  No need to do an lseek, as this
2145 	 * is encoded in the listio request.
2146 	 */
2147 
2148 	if ((fd = alloc_fd(lio->r_file, lio->r_oflags)) == -1) {
2149 		return -1;
2150 	}
2151 
2152 	rval = -1;
2153 	got_lock = 0;
2154 	logged_write = 0;
2155 
2156 	/*
2157 	 * If the opcode is LO_WRITE, lock all regions of the file that
2158 	 * are touched by this listio request.  Currently, we use
2159 	 * stride_bounds() to figure out the min and max bytes affected, and
2160 	 * lock the entire region, regardless of the file stride.
2161 	 */
2162 
2163 	if (lio->r_opcode == LO_WRITE && k_opt) {
2164 		stride_bounds(lio->r_offset,
2165 			      lio->r_filestride, lio->r_nstrides,
2166 			      lio->r_nbytes, &min_byte, &max_byte);
2167 
2168 		if (lock_file_region(lio->r_file, fd, F_WRLCK,
2169 				     min_byte, (max_byte - min_byte + 1)) < 0) {
2170 			doio_fprintf(stderr,
2171 				     "stride_bounds(%d, %d, %d, %d, ..., ...) set min_byte to %d, max_byte to %d\n",
2172 				     lio->r_offset, lio->r_filestride,
2173 				     lio->r_nstrides, lio->r_nbytes, min_byte,
2174 				     max_byte);
2175 			return -1;
2176 		} else {
2177 			got_lock = 1;
2178 		}
2179 	}
2180 
2181 	/*
2182 	 * async write
2183 	 */
2184 
2185 	aio_strat = lio->r_aio_strat;
2186 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2187 
2188 	aio_id = aio_register(fd, aio_strat, signo);
2189 	aiop = aio_slot(aio_id);
2190 
2191 	/*
2192 	 * Form the listio request, and make the call.
2193 	 */
2194 
2195 	lio_req.li_opcode = lio->r_opcode;
2196 	lio_req.li_drvr = 0;
2197 	lio_req.li_flags = LF_LSEEK;
2198 	lio_req.li_offset = lio->r_offset;
2199 	lio_req.li_fildes = fd;
2200 
2201 	if (lio->r_memstride >= 0 || lio->r_nstrides <= 1) {
2202 		lio_req.li_buf = addr;
2203 	} else {
2204 		lio_req.li_buf = addr + mem_needed - lio->r_nbytes;
2205 	}
2206 
2207 	lio_req.li_nbyte = lio->r_nbytes;
2208 	lio_req.li_status = &aiop->iosw;
2209 	lio_req.li_signo = signo;
2210 	lio_req.li_nstride = lio->r_nstrides;
2211 	lio_req.li_filstride = lio->r_filestride;
2212 	lio_req.li_memstride = lio->r_memstride;
2213 
2214 	/*
2215 	 * If signo != 0, block signo while we're in the system call, so that
2216 	 * we don't get interrupted syscall failures.
2217 	 */
2218 
2219 	if (signo) {
2220 		sigemptyset(&block_mask);
2221 		sigaddset(&block_mask, signo);
2222 		sigprocmask(SIG_BLOCK, &block_mask, &omask);
2223 	}
2224 
2225 	if (listio(lio->r_cmd, &lio_req, 1) < 0) {
2226 		doio_fprintf(stderr,
2227 			     "listio() failed: %s (%d)\n%s\n",
2228 			     SYSERR, errno,
2229 			     format_listio(req, lio->r_cmd, &lio_req, 1, fd,
2230 					   Pattern));
2231 		aio_unregister(aio_id);
2232 		doio_upanic(U_RVAL);
2233 		goto lio_done;
2234 	}
2235 
2236 	if (signo) {
2237 		sigprocmask(SIG_SETMASK, &omask, NULL);
2238 	}
2239 
2240 	/*
2241 	 * Wait for io to complete
2242 	 */
2243 
2244 	aio_wait(aio_id);
2245 
2246 	nstrides = lio->r_nstrides ? lio->r_nstrides : 1;
2247 	if (aiop->iosw.sw_count != lio->r_nbytes * nstrides) {
2248 		doio_fprintf(stderr,
2249 			     "Bad iosw from listio()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
2250 			     1, 0, lio->r_nbytes * lio->r_nstrides,
2251 			     aiop->iosw.sw_flag,
2252 			     aiop->iosw.sw_error, aiop->iosw.sw_count,
2253 			     format_listio(req, lio->r_cmd, &lio_req, 1, fd,
2254 					   Pattern));
2255 		aio_unregister(aio_id);
2256 		doio_upanic(U_IOSW);
2257 		goto lio_done;
2258 	}
2259 
2260 	aio_unregister(aio_id);
2261 
2262 	/*
2263 	 * Verify that the data was written correctly - check_file() returns
2264 	 * a non-null pointer which contains an error message if there are
2265 	 * problems.
2266 	 *
2267 	 * For listio, we basically have to make 1 call to check_file for each
2268 	 * stride.
2269 	 */
2270 
2271 	if (v_opt && lio_req.li_opcode == LO_WRITE) {
2272 		fstride = lio->r_filestride ? lio->r_filestride : lio->r_nbytes;
2273 		mstride = lio->r_memstride ? lio->r_memstride : lio->r_nbytes;
2274 		foffset = lio->r_offset;
2275 
2276 		if (mstride > 0 || lio->r_nstrides <= 1) {
2277 			moffset = addr;
2278 		} else {
2279 			moffset = addr + mem_needed - lio->r_nbytes;
2280 		}
2281 
2282 		for (i = 0; i < lio_req.li_nstride; i++) {
2283 			msg = check_file(lio->r_file,
2284 					 foffset, lio->r_nbytes,
2285 					 Pattern, Pattern_Length,
2286 					 moffset - addr,
2287 					 lio->r_oflags & O_PARALLEL);
2288 
2289 			if (msg != NULL) {
2290 				doio_fprintf(stderr, "%s\n%s\n",
2291 					     msg,
2292 					     format_listio(req, lio->r_cmd,
2293 							   &lio_req, 1, fd,
2294 							   Pattern));
2295 				doio_upanic(U_CORRUPTION);
2296 				exit(E_COMPARE);
2297 			}
2298 
2299 			moffset += mstride;
2300 			foffset += fstride;
2301 		}
2302 
2303 	}
2304 
2305 	rval = 0;
2306 
2307 lio_done:
2308 
2309 	/*
2310 	 * General cleanup ...
2311 	 *
2312 	 */
2313 
2314 	/*
2315 	 * Release file locks if necessary
2316 	 */
2317 
2318 	if (got_lock) {
2319 		if (lock_file_region(lio->r_file, fd, F_UNLCK,
2320 				     min_byte, (max_byte - min_byte + 1)) < 0) {
2321 			return -1;
2322 		}
2323 	}
2324 
2325 	return rval;
2326 #else
2327 	return -1;
2328 #endif
2329 }
2330 
2331 /*
2332  * perform ssread/sswrite operations
2333  */
2334 
2335 #ifdef _CRAY1
2336 
do_ssdio(struct io_req * req)2337 int do_ssdio(struct io_req *req)
2338 {
2339 	int nbytes, nb;
2340 	char errbuf[BSIZE];
2341 
2342 	nbytes = req->r_data.ssread.r_nbytes;
2343 
2344 	/*
2345 	 * Grab core and sds space
2346 	 */
2347 
2348 	if ((nb = alloc_mem(nbytes)) < 0)
2349 		return nb;
2350 
2351 	if (alloc_sds(nbytes) == -1)
2352 		return -1;
2353 
2354 	if (req->r_type == SSWRITE) {
2355 
2356 		/*
2357 		 * Init data and ship it to the ssd
2358 		 */
2359 
2360 		Pattern[0] = req->r_data.sswrite.r_pattern;
2361 		/*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
2362 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
2363 
2364 		if (sswrite((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
2365 			doio_fprintf(stderr, "sswrite() failed:  %s (%d)\n%s\n",
2366 				     SYSERR, errno,
2367 				     format_sds(req, Memptr, Sdsptr, Pattern));
2368 			doio_upanic(U_RVAL);
2369 			return -1;
2370 		}
2371 	} else {
2372 		/*
2373 		 * read from sds
2374 		 */
2375 
2376 		if (ssread((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
2377 			doio_fprintf(stderr, "ssread() failed: %s (%d)\n%s\n",
2378 				     SYSERR, errno,
2379 				     format_sds(req, Memptr, Sdsptr, Pattern));
2380 
2381 			doio_upanic(U_RVAL);
2382 			return -1;
2383 		}
2384 	}
2385 
2386 	/*
2387 	 * Verify data if SSWRITE and v_opt
2388 	 */
2389 
2390 	if (v_opt && req->r_type == SSWRITE) {
2391 		ssread((long)Memptr, (long)Sdsptr, btoc(nbytes));
2392 
2393 		if (pattern_check(Memptr, nbytes, Pattern, Pattern_Length, 0) ==
2394 		    -1) {
2395 			doio_fprintf(stderr,
2396 				     "sds DATA COMPARE ERROR - ABORTING\n%s\n",
2397 				     format_sds(req, Memptr, Sdsptr, Pattern));
2398 
2399 			doio_upanic(U_CORRUPTION);
2400 			exit(E_COMPARE);
2401 		}
2402 	}
2403 }
2404 
2405 #else
2406 
2407 #ifdef CRAY
2408 
do_ssdio(struct io_req * req)2409 int do_ssdio(struct io_req *req)
2410 {
2411 	doio_fprintf(stderr,
2412 		     "Internal Error - do_ssdio() called on a non-cray1 system\n");
2413 	alloc_mem(-1);
2414 	exit(E_INTERNAL);
2415 }
2416 
2417 #endif /* CRAY */
2418 
2419 #endif /* _CRAY1 */
2420 
fmt_ioreq(struct io_req * ioreq,struct syscall_info * sy,int fd)2421 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd)
2422 {
2423 	static char *errbuf = NULL;
2424 	char *cp;
2425 	struct rw_req *io;
2426 	struct smap *aname;
2427 #ifdef CRAY
2428 	struct stat sbuf;
2429 #endif
2430 #ifdef sgi
2431 	struct dioattr finfo;
2432 #endif
2433 
2434 	if (errbuf == NULL)
2435 		errbuf = malloc(32768);
2436 
2437 	io = &ioreq->r_data.io;
2438 
2439 	/*
2440 	 * Look up async I/O completion strategy
2441 	 */
2442 	for (aname = aionames;
2443 	     aname->value != -1 && aname->value != io->r_aio_strat; aname++) ;
2444 
2445 	cp = errbuf;
2446 	cp += sprintf(cp, "Request number %d\n", Reqno);
2447 
2448 	cp +=
2449 	    sprintf(cp, "          fd %d is file %s - open flags are %#o %s\n",
2450 		    fd, io->r_file, io->r_oflags, format_oflags(io->r_oflags));
2451 
2452 	if (sy->sy_flags & SY_WRITE) {
2453 		cp +=
2454 		    sprintf(cp,
2455 			    "          write done at file offset %d - pattern is %c (%#o)\n",
2456 			    io->r_offset,
2457 			    (io->r_pattern == '\0') ? '?' : io->r_pattern,
2458 			    io->r_pattern);
2459 	} else {
2460 		cp += sprintf(cp, "          read done at file offset %d\n",
2461 			      io->r_offset);
2462 	}
2463 
2464 	if (sy->sy_flags & SY_ASYNC) {
2465 		cp +=
2466 		    sprintf(cp,
2467 			    "          async io completion strategy is %s\n",
2468 			    aname->string);
2469 	}
2470 
2471 	cp +=
2472 	    sprintf(cp,
2473 		    "          number of requests is %d, strides per request is %d\n",
2474 		    io->r_nent, io->r_nstrides);
2475 
2476 	cp += sprintf(cp, "          i/o byte count = %d\n", io->r_nbytes);
2477 
2478 	cp += sprintf(cp, "          memory alignment is %s\n",
2479 		      (io->
2480 		       r_uflags & F_WORD_ALIGNED) ? "aligned" : "unaligned");
2481 
2482 #ifdef CRAY
2483 	if (io->r_oflags & O_RAW) {
2484 		cp +=
2485 		    sprintf(cp,
2486 			    "          RAW I/O: offset %% 4096 = %d length %% 4096 = %d\n",
2487 			    io->r_offset % 4096, io->r_nbytes % 4096);
2488 		fstat(fd, &sbuf);
2489 		cp +=
2490 		    sprintf(cp,
2491 			    "          optimal file xfer size: small: %d large: %d\n",
2492 			    sbuf.st_blksize, sbuf.st_oblksize);
2493 		cp +=
2494 		    sprintf(cp, "          cblks %d cbits %#o\n", sbuf.st_cblks,
2495 			    sbuf.st_cbits);
2496 	}
2497 #endif
2498 #ifdef sgi
2499 	if (io->r_oflags & O_DIRECT) {
2500 
2501 		if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
2502 			cp +=
2503 			    sprintf(cp,
2504 				    "          Error %s (%d) getting direct I/O info\n",
2505 				    strerror(errno), errno);
2506 			finfo.d_mem = 1;
2507 			finfo.d_miniosz = 1;
2508 			finfo.d_maxiosz = 1;
2509 		}
2510 
2511 		cp +=
2512 		    sprintf(cp,
2513 			    "          DIRECT I/O: offset %% %d = %d length %% %d = %d\n",
2514 			    finfo.d_miniosz, io->r_offset % finfo.d_miniosz,
2515 			    io->r_nbytes, io->r_nbytes % finfo.d_miniosz);
2516 		cp +=
2517 		    sprintf(cp,
2518 			    "          mem alignment 0x%x xfer size: small: %d large: %d\n",
2519 			    finfo.d_mem, finfo.d_miniosz, finfo.d_maxiosz);
2520 	}
2521 #endif
2522 
2523 	return (errbuf);
2524 }
2525 
2526 /*
2527  * Issue listio requests
2528  */
2529 #ifdef CRAY
sy_listio(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2530 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc, int fd,
2531 			 char *addr)
2532 {
2533 	int offset, nbytes, nstrides, nents, aio_strat;
2534 	int aio_id, signo, o, i, lc;
2535 	char *a;
2536 	struct listreq *lio_req, *l;
2537 	struct aio_info *aiop;
2538 	struct status *status;
2539 
2540 	/*
2541 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
2542 	 * r_nbytes are at the same offset in the read_req and reada_req
2543 	 * structures.
2544 	 */
2545 	offset = req->r_data.io.r_offset;
2546 	nbytes = req->r_data.io.r_nbytes;
2547 	nstrides = req->r_data.io.r_nstrides;
2548 	nents = req->r_data.io.r_nent;
2549 	aio_strat = req->r_data.io.r_aio_strat;
2550 
2551 	lc = (sysc->sy_flags & SY_ASYNC) ? LC_START : LC_WAIT;
2552 
2553 	status = malloc(sizeof(struct status));
2554 	if (status == NULL) {
2555 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2556 			     __FILE__, __LINE__);
2557 		return NULL;
2558 	}
2559 	status->aioid = malloc((nents + 1) * sizeof(int));
2560 	if (status->aioid == NULL) {
2561 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2562 			     __FILE__, __LINE__);
2563 		return NULL;
2564 	}
2565 
2566 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2567 
2568 	lio_req = malloc(nents * sizeof(struct listreq));
2569 	if (lio_req == NULL) {
2570 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2571 			     __FILE__, __LINE__);
2572 		return NULL;
2573 	}
2574 	for (l = lio_req, a = addr, o = offset, i = 0;
2575 	     i < nents; l++, a += nbytes, o += nbytes, i++) {
2576 
2577 		aio_id = aio_register(fd, aio_strat, signo);
2578 		aiop = aio_slot(aio_id);
2579 		status->aioid[i] = aio_id;
2580 
2581 		l->li_opcode = (sysc->sy_flags & SY_WRITE) ? LO_WRITE : LO_READ;
2582 		l->li_offset = o;
2583 		l->li_fildes = fd;
2584 		l->li_buf = a;
2585 		l->li_nbyte = nbytes;
2586 		l->li_status = &aiop->iosw;
2587 		l->li_signo = signo;
2588 		l->li_nstride = nstrides;
2589 		l->li_filstride = 0;
2590 		l->li_memstride = 0;
2591 		l->li_drvr = 0;
2592 		l->li_flags = LF_LSEEK;
2593 	}
2594 
2595 	status->aioid[nents] = -1;	/* end sentinel */
2596 
2597 	if ((status->rval = listio(lc, lio_req, nents)) == -1) {
2598 		status->err = errno;
2599 	}
2600 
2601 	free(lio_req);
2602 	return (status);
2603 }
2604 
2605 /*
2606  * Calculate the size of a request in bytes and min/max boundaries
2607  *
2608  * This assumes filestride & memstride = 0.
2609  */
listio_mem(struct io_req * req,int offset,int fmstride,int * min,int * max)2610 int listio_mem(struct io_req *req, int offset, int fmstride, int *min, int *max)
2611 {
2612 	int i, size;
2613 
2614 	size = stride_bounds(offset, fmstride,
2615 			     req->r_data.io.r_nstrides * req->r_data.io.r_nent,
2616 			     req->r_data.io.r_nbytes, min, max);
2617 	return (size);
2618 }
2619 
fmt_listio(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2620 char *fmt_listio(struct io_req *req, struct syscall_info *sy, int fd,
2621 		 char *addr)
2622 {
2623 	static char *errbuf = NULL;
2624 	char *cp;
2625 	char *c, *opcode;
2626 	int i;
2627 
2628 	if (errbuf == NULL) {
2629 		errbuf = malloc(32768);
2630 		if (errbuf == NULL) {
2631 			doio_fprintf(stderr, "malloc failed, %s/%d\n",
2632 				     __FILE__, __LINE__);
2633 			return NULL;
2634 		}
2635 	}
2636 
2637 	c = (sy->sy_flags & SY_ASYNC) ? "lc_wait" : "lc_start";
2638 
2639 	cp = errbuf;
2640 	cp += sprintf(cp, "syscall:  listio(%s, (?), %d)\n",
2641 		      c, req->r_data.io.r_nent);
2642 
2643 	cp += sprintf(cp, "          data buffer at %#o\n", addr);
2644 
2645 	return (errbuf);
2646 }
2647 #endif /* CRAY */
2648 
2649 #ifdef sgi
sy_pread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2650 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc, int fd,
2651 			char *addr)
2652 {
2653 	int rc;
2654 	struct status *status;
2655 
2656 	rc = pread(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
2657 
2658 	status = malloc(sizeof(struct status));
2659 	if (status == NULL) {
2660 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2661 			     __FILE__, __LINE__);
2662 		return NULL;
2663 	}
2664 	status->aioid = NULL;
2665 	status->rval = rc;
2666 	status->err = errno;
2667 
2668 	return (status);
2669 }
2670 
sy_pwrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2671 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc, int fd,
2672 			 char *addr)
2673 {
2674 	int rc;
2675 	struct status *status;
2676 
2677 	rc = pwrite(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
2678 
2679 	status = malloc(sizeof(struct status));
2680 	if (status == NULL) {
2681 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2682 			     __FILE__, __LINE__);
2683 		return NULL;
2684 	}
2685 	status->aioid = NULL;
2686 	status->rval = rc;
2687 	status->err = errno;
2688 
2689 	return (status);
2690 }
2691 
fmt_pread(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2692 char *fmt_pread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2693 {
2694 	static char *errbuf = NULL;
2695 	char *cp;
2696 
2697 	if (errbuf == NULL) {
2698 		errbuf = malloc(32768);
2699 		if (errbuf == NULL) {
2700 			doio_fprintf(stderr, "malloc failed, %s/%d\n",
2701 				     __FILE__, __LINE__);
2702 			return NULL;
2703 		}
2704 	}
2705 
2706 	cp = errbuf;
2707 	cp += sprintf(cp, "syscall:  %s(%d, 0x%lx, %d)\n",
2708 		      sy->sy_name, fd, addr, req->r_data.io.r_nbytes);
2709 	return (errbuf);
2710 }
2711 #endif /* sgi */
2712 
2713 #ifndef CRAY
sy_readv(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2714 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc, int fd,
2715 			char *addr)
2716 {
2717 	struct status *sy_rwv();
2718 	return sy_rwv(req, sysc, fd, addr, 0);
2719 }
2720 
sy_writev(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2721 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc, int fd,
2722 			 char *addr)
2723 {
2724 	struct status *sy_rwv();
2725 	return sy_rwv(req, sysc, fd, addr, 1);
2726 }
2727 
sy_rwv(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2728 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc, int fd,
2729 		      char *addr, int rw)
2730 {
2731 	int rc;
2732 	struct status *status;
2733 	struct iovec iov[2];
2734 
2735 	status = malloc(sizeof(struct status));
2736 	if (status == NULL) {
2737 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2738 			     __FILE__, __LINE__);
2739 		return NULL;
2740 	}
2741 	status->aioid = NULL;
2742 
2743 	/* move to the desired file position. */
2744 	if ((rc = lseek(fd, req->r_data.io.r_offset, SEEK_SET)) == -1) {
2745 		status->rval = rc;
2746 		status->err = errno;
2747 		return (status);
2748 	}
2749 
2750 	iov[0].iov_base = addr;
2751 	iov[0].iov_len = req->r_data.io.r_nbytes;
2752 
2753 	if (rw)
2754 		rc = writev(fd, iov, 1);
2755 	else
2756 		rc = readv(fd, iov, 1);
2757 	status->aioid = NULL;
2758 	status->rval = rc;
2759 	status->err = errno;
2760 	return (status);
2761 }
2762 
fmt_readv(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2763 char *fmt_readv(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2764 {
2765 	static char errbuf[32768];
2766 	char *cp;
2767 
2768 	cp = errbuf;
2769 	cp += sprintf(cp, "syscall:  %s(%d, (iov on stack), 1)\n",
2770 		      sy->sy_name, fd);
2771 	return (errbuf);
2772 }
2773 #endif /* !CRAY */
2774 
2775 #ifdef sgi
sy_aread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2776 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc, int fd,
2777 			char *addr)
2778 {
2779 	struct status *sy_arw();
2780 	return sy_arw(req, sysc, fd, addr, 0);
2781 }
2782 
sy_awrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2783 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc, int fd,
2784 			 char *addr)
2785 {
2786 	struct status *sy_arw();
2787 	return sy_arw(req, sysc, fd, addr, 1);
2788 }
2789 
2790 /*
2791   #define sy_aread(A, B, C, D)	sy_arw(A, B, C, D, 0)
2792   #define sy_awrite(A, B, C, D)	sy_arw(A, B, C, D, 1)
2793  */
2794 
sy_arw(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2795 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc, int fd,
2796 		      char *addr, int rw)
2797 {
2798 	/* POSIX 1003.1b-1993 Async read */
2799 	struct status *status;
2800 	int rc;
2801 	int aio_id, aio_strat, signo;
2802 	struct aio_info *aiop;
2803 
2804 	status = malloc(sizeof(struct status));
2805 	if (status == NULL) {
2806 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2807 			     __FILE__, __LINE__);
2808 		return NULL;
2809 	}
2810 	aio_strat = req->r_data.io.r_aio_strat;
2811 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2812 
2813 	aio_id = aio_register(fd, aio_strat, signo);
2814 	aiop = aio_slot(aio_id);
2815 
2816 	memset((void *)&aiop->aiocb, 0, sizeof(aiocb_t));
2817 
2818 	aiop->aiocb.aio_fildes = fd;
2819 	aiop->aiocb.aio_nbytes = req->r_data.io.r_nbytes;
2820 	aiop->aiocb.aio_offset = req->r_data.io.r_offset;
2821 	aiop->aiocb.aio_buf = addr;
2822 	aiop->aiocb.aio_reqprio = 0;	/* must be 0 */
2823 	aiop->aiocb.aio_lio_opcode = 0;
2824 
2825 	if (aio_strat == A_SIGNAL) {	/* siginfo(2) stuff */
2826 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
2827 		aiop->aiocb.aio_sigevent.sigev_signo = signo;
2828 	} else if (aio_strat == A_CALLBACK) {
2829 		aiop->aiocb.aio_sigevent.sigev_signo = 0;
2830 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_CALLBACK;
2831 		aiop->aiocb.aio_sigevent.sigev_func = cb_handler;
2832 		aiop->aiocb.aio_sigevent.sigev_value.sival_int = aio_id;
2833 	} else {
2834 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_NONE;
2835 		aiop->aiocb.aio_sigevent.sigev_signo = 0;
2836 	}
2837 
2838 	if (rw)
2839 		rc = aio_write(&aiop->aiocb);
2840 	else
2841 		rc = aio_read(&aiop->aiocb);
2842 
2843 	status->aioid = malloc(2 * sizeof(int));
2844 	if (status->aioid == NULL) {
2845 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2846 			     __FILE__, __LINE__);
2847 		return NULL;
2848 	}
2849 	status->aioid[0] = aio_id;
2850 	status->aioid[1] = -1;
2851 	status->rval = rc;
2852 	status->err = errno;
2853 	return (status);
2854 }
2855 
fmt_aread(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2856 char *fmt_aread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2857 {
2858 	static char errbuf[32768];
2859 	char *cp;
2860 
2861 	cp = errbuf;
2862 	cp += sprintf(cp, "syscall:  %s(&aiop->aiocb)\n", sy->sy_name);
2863 	return (errbuf);
2864 }
2865 #endif /* sgi */
2866 
2867 #ifndef CRAY
2868 
sy_mmread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2869 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc, int fd,
2870 			 char *addr)
2871 {
2872 	struct status *sy_mmrw();
2873 	return sy_mmrw(req, sysc, fd, addr, 0);
2874 }
2875 
sy_mmwrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2876 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc, int fd,
2877 			  char *addr)
2878 {
2879 	struct status *sy_mmrw();
2880 	return sy_mmrw(req, sysc, fd, addr, 1);
2881 }
2882 
sy_mmrw(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2883 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc, int fd,
2884 		       char *addr, int rw)
2885 {
2886 	/*
2887 	 * mmap read/write
2888 	 * This version is oriented towards mmaping the file to memory
2889 	 * ONCE and keeping it mapped.
2890 	 */
2891 	struct status *status;
2892 	void *mrc = NULL, *memaddr = NULL;
2893 	struct fd_cache *fdc;
2894 	struct stat sbuf;
2895 	int rc;
2896 
2897 	status = malloc(sizeof(struct status));
2898 	if (status == NULL) {
2899 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
2900 			     __FILE__, __LINE__);
2901 		return NULL;
2902 	}
2903 	status->aioid = NULL;
2904 	status->rval = -1;
2905 
2906 	fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
2907 
2908 	if (v_opt || fdc->c_memaddr == NULL) {
2909 		if (fstat(fd, &sbuf) < 0) {
2910 			doio_fprintf(stderr, "fstat failed, errno=%d\n", errno);
2911 			status->err = errno;
2912 			return (status);
2913 		}
2914 
2915 		fdc->c_memlen = (int)sbuf.st_size;
2916 		mrc = mmap(NULL, (int)sbuf.st_size,
2917 			   rw ? PROT_WRITE | PROT_READ : PROT_READ,
2918 			   MAP_SHARED, fd, 0);
2919 
2920 		if (mrc == MAP_FAILED) {
2921 			doio_fprintf(stderr, "mmap() failed - 0x%lx %d\n",
2922 				     mrc, errno);
2923 			status->err = errno;
2924 			return (status);
2925 		}
2926 
2927 		fdc->c_memaddr = mrc;
2928 	}
2929 
2930 	memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
2931 
2932 	active_mmap_rw = 1;
2933 	if (rw)
2934 		memcpy(memaddr, addr, req->r_data.io.r_nbytes);
2935 	else
2936 		memcpy(addr, memaddr, req->r_data.io.r_nbytes);
2937 	if (v_opt)
2938 		msync(fdc->c_memaddr, (int)sbuf.st_size, MS_SYNC);
2939 	active_mmap_rw = 0;
2940 
2941 	status->rval = req->r_data.io.r_nbytes;
2942 	status->err = 0;
2943 
2944 	if (v_opt) {
2945 		rc = munmap(mrc, (int)sbuf.st_size);
2946 	}
2947 
2948 	return (status);
2949 }
2950 
fmt_mmrw(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2951 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2952 {
2953 	static char errbuf[32768];
2954 	char *cp;
2955 	struct fd_cache *fdc;
2956 	void *memaddr;
2957 
2958 	fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
2959 
2960 	cp = errbuf;
2961 	cp += sprintf(cp, "syscall:  %s(NULL, %d, %s, MAP_SHARED, %d, 0)\n",
2962 		      sy->sy_name,
2963 		      fdc->c_memlen,
2964 		      (sy->sy_flags & SY_WRITE) ? "PROT_WRITE" : "PROT_READ",
2965 		      fd);
2966 
2967 	cp += sprintf(cp, "\tfile is mmaped to: 0x%lx\n",
2968 		      (unsigned long)fdc->c_memaddr);
2969 
2970 	memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
2971 
2972 	cp += sprintf(cp, "\tfile-mem=0x%lx, length=%d, buffer=0x%lx\n",
2973 		      (unsigned long)memaddr, req->r_data.io.r_nbytes,
2974 		      (unsigned long)addr);
2975 
2976 	return (errbuf);
2977 }
2978 #endif /* !CRAY */
2979 
2980 struct syscall_info syscalls[] = {
2981 #ifdef CRAY
2982 	{"listio-read-sync", LREAD,
2983 	 sy_listio, NULL, fmt_listio,
2984 	 SY_IOSW},
2985 	{"listio-read-strides-sync", LSREAD,
2986 	 sy_listio, listio_mem, fmt_listio,
2987 	 SY_IOSW},
2988 	{"listio-read-reqs-sync", LEREAD,
2989 	 sy_listio, listio_mem, fmt_listio,
2990 	 SY_IOSW},
2991 	{"listio-read-async", LREADA,
2992 	 sy_listio, NULL, fmt_listio,
2993 	 SY_IOSW | SY_ASYNC},
2994 	{"listio-read-strides-async", LSREADA,
2995 	 sy_listio, listio_mem, fmt_listio,
2996 	 SY_IOSW | SY_ASYNC},
2997 	{"listio-read-reqs-async", LEREADA,
2998 	 sy_listio, listio_mem, fmt_listio,
2999 	 SY_IOSW | SY_ASYNC},
3000 	{"listio-write-sync", LWRITE,
3001 	 sy_listio, listio_mem, fmt_listio,
3002 	 SY_IOSW | SY_WRITE},
3003 	{"listio-write-strides-sync", LSWRITE,
3004 	 sy_listio, listio_mem, fmt_listio,
3005 	 SY_IOSW | SY_WRITE},
3006 	{"listio-write-reqs-sync", LEWRITE,
3007 	 sy_listio, listio_mem, fmt_listio,
3008 	 SY_IOSW | SY_WRITE},
3009 	{"listio-write-async", LWRITEA,
3010 	 sy_listio, listio_mem, fmt_listio,
3011 	 SY_IOSW | SY_WRITE | SY_ASYNC},
3012 	{"listio-write-strides-async", LSWRITEA,
3013 	 sy_listio, listio_mem, fmt_listio,
3014 	 SY_IOSW | SY_WRITE | SY_ASYNC},
3015 	{"listio-write-reqs-async", LEWRITEA,
3016 	 sy_listio, listio_mem, fmt_listio,
3017 	 SY_IOSW | SY_WRITE | SY_ASYNC},
3018 #endif
3019 
3020 #ifdef sgi
3021 	{"aread", AREAD,
3022 	 sy_aread, NULL, fmt_aread,
3023 	 SY_IOSW | SY_ASYNC},
3024 	{"awrite", AWRITE,
3025 	 sy_awrite, NULL, fmt_aread,
3026 	 SY_IOSW | SY_WRITE | SY_ASYNC},
3027 	{"pread", PREAD,
3028 	 sy_pread, NULL, fmt_pread,
3029 	 0},
3030 	{"pwrite", PWRITE,
3031 	 sy_pwrite, NULL, fmt_pread,
3032 	 SY_WRITE},
3033 #endif
3034 
3035 #ifndef CRAY
3036 	{"readv", READV,
3037 	 sy_readv, NULL, fmt_readv,
3038 	 0},
3039 	{"writev", WRITEV,
3040 	 sy_writev, NULL, fmt_readv,
3041 	 SY_WRITE},
3042 	{"mmap-read", MMAPR,
3043 	 sy_mmread, NULL, fmt_mmrw,
3044 	 0},
3045 	{"mmap-write", MMAPW,
3046 	 sy_mmwrite, NULL, fmt_mmrw,
3047 	 SY_WRITE},
3048 #endif
3049 
3050 	{NULL, 0,
3051 	 0, 0, 0,
3052 	 0},
3053 };
3054 
do_rw(struct io_req * req)3055 int do_rw(struct io_req *req)
3056 {
3057 	static int pid = -1;
3058 	int fd, offset, nbytes, nstrides, nents, oflags;
3059 	int rval, mem_needed, i;
3060 	int logged_write, got_lock, pattern;
3061 	off_t woffset;
3062 	int min_byte, max_byte;
3063 	char *addr, *file, *msg;
3064 	struct status *s;
3065 	struct wlog_rec wrec;
3066 	struct syscall_info *sy;
3067 #if defined(CRAY) || defined(sgi)
3068 	struct aio_info *aiop;
3069 	struct iosw *iosw;
3070 #endif
3071 #ifdef sgi
3072 	struct fd_cache *fdc;
3073 #endif
3074 
3075 	woffset = 0;
3076 
3077 	/*
3078 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3079 	 * r_nbytes are at the same offset in the read_req and reada_req
3080 	 * structures.
3081 	 */
3082 	file = req->r_data.io.r_file;
3083 	oflags = req->r_data.io.r_oflags;
3084 	offset = req->r_data.io.r_offset;
3085 	nbytes = req->r_data.io.r_nbytes;
3086 	nstrides = req->r_data.io.r_nstrides;
3087 	nents = req->r_data.io.r_nent;
3088 	pattern = req->r_data.io.r_pattern;
3089 
3090 	if (nents >= MAX_AIO) {
3091 		doio_fprintf(stderr,
3092 			     "do_rw: too many list requests, %d.  Maximum is %d\n",
3093 			     nents, MAX_AIO);
3094 		return (-1);
3095 	}
3096 
3097 	/*
3098 	 * look up system call info
3099 	 */
3100 	for (sy = syscalls; sy->sy_name != NULL && sy->sy_type != req->r_type;
3101 	     sy++) ;
3102 
3103 	if (sy->sy_name == NULL) {
3104 		doio_fprintf(stderr, "do_rw: unknown r_type %d.\n",
3105 			     req->r_type);
3106 		return (-1);
3107 	}
3108 
3109 	/*
3110 	 * Get an open file descriptor
3111 	 * Note: must be done before memory allocation so that the direct i/o
3112 	 *      information is available in mem. allocate
3113 	 */
3114 
3115 	if ((fd = alloc_fd(file, oflags)) == -1)
3116 		return -1;
3117 
3118 	/*
3119 	 * Allocate core memory and possibly sds space.  Initialize the
3120 	 * data to be written.  Make sure we get enough, based on the
3121 	 * memstride.
3122 	 *
3123 	 * need:
3124 	 *      1 extra word for possible partial-word address "bump"
3125 	 *      1 extra word for dynamic pattern overrun
3126 	 *      MPP_BUMP extra words for T3E non-hw-aligned memory address.
3127 	 */
3128 
3129 	if (sy->sy_buffer != NULL) {
3130 		mem_needed = (*sy->sy_buffer) (req, 0, 0, NULL, NULL);
3131 	} else {
3132 		mem_needed = nbytes;
3133 	}
3134 
3135 #ifdef CRAY
3136 	if ((rval =
3137 	     alloc_mem(mem_needed + wtob(1) * 2 +
3138 		       MPP_BUMP * sizeof(UINT64_T))) < 0) {
3139 		return rval;
3140 	}
3141 #else
3142 #ifdef sgi
3143 	/* get memory alignment for using DIRECT I/O */
3144 	fdc = alloc_fdcache(file, oflags);
3145 
3146 	if ((rval = alloc_mem(mem_needed + wtob(1) * 2 + fdc->c_memalign)) < 0) {
3147 		return rval;
3148 	}
3149 #else
3150 	/* what is !CRAY && !sgi ? */
3151 	if ((rval = alloc_mem(mem_needed + wtob(1) * 2)) < 0) {
3152 		return rval;
3153 	}
3154 #endif /* sgi */
3155 #endif /* CRAY */
3156 
3157 	Pattern[0] = pattern;
3158 
3159 	/*
3160 	 * Allocate SDS space for backdoor write if desired
3161 	 */
3162 
3163 	if (oflags & O_SSD) {
3164 #ifdef CRAY
3165 #ifndef _CRAYMPP
3166 		if (alloc_sds(nbytes) == -1)
3167 			return -1;
3168 
3169 		if (sy->sy_flags & SY_WRITE) {
3170 			/*pattern_fill(Memptr, mem_needed, Pattern, Pattern_Length, 0); */
3171 			(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length,
3172 				      0);
3173 
3174 			if (sswrite((long)Memptr, Sdsptr, btoc(mem_needed)) ==
3175 			    -1) {
3176 				doio_fprintf(stderr,
3177 					     "sswrite(%d, %d, %d) failed:  %s (%d)\n",
3178 					     (long)Memptr, Sdsptr,
3179 					     btoc(mem_needed), SYSERR, errno);
3180 				fflush(stderr);
3181 				return -1;
3182 			}
3183 		}
3184 
3185 		addr = (char *)Sdsptr;
3186 #else
3187 		doio_fprintf(stderr,
3188 			     "Invalid O_SSD flag was generated for MPP system\n");
3189 		fflush(stderr);
3190 		return -1;
3191 #endif /* _CRAYMPP */
3192 #else /* CRAY */
3193 		doio_fprintf(stderr,
3194 			     "Invalid O_SSD flag was generated for non-Cray system\n");
3195 		fflush(stderr);
3196 		return -1;
3197 #endif /* CRAY */
3198 	} else {
3199 		addr = Memptr;
3200 
3201 		/*
3202 		 * if io is not raw, bump the offset by a random amount
3203 		 * to generate non-word-aligned io.
3204 		 *
3205 		 * On MPP systems, raw I/O must start on an 0x80 byte boundary.
3206 		 * For non-aligned I/O, bump the address from 1 to 8 words.
3207 		 */
3208 
3209 		if (!(req->r_data.io.r_uflags & F_WORD_ALIGNED)) {
3210 #ifdef _CRAYMPP
3211 			addr +=
3212 			    random_range(0, MPP_BUMP, 1, NULL) * sizeof(int);
3213 #endif
3214 			addr += random_range(0, wtob(1) - 1, 1, NULL);
3215 		}
3216 #ifdef sgi
3217 		/*
3218 		 * Force memory alignment for Direct I/O
3219 		 */
3220 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
3221 			addr +=
3222 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
3223 		}
3224 #endif
3225 
3226 		/*
3227 		 * FILL must be done on a word-aligned buffer.
3228 		 * Call the fill function with Memptr which is aligned,
3229 		 * then memmove it to the right place.
3230 		 */
3231 		if (sy->sy_flags & SY_WRITE) {
3232 			(*Data_Fill) (Memptr, mem_needed, Pattern,
3233 				      Pattern_Length, 0);
3234 			if (addr != Memptr)
3235 				memmove(addr, Memptr, mem_needed);
3236 		}
3237 	}
3238 
3239 	rval = 0;
3240 	got_lock = 0;
3241 	logged_write = 0;
3242 
3243 	/*
3244 	 * Lock data if this is a write and locking option is set
3245 	 */
3246 	if (sy->sy_flags & SY_WRITE && k_opt) {
3247 		if (sy->sy_buffer != NULL) {
3248 			(*sy->sy_buffer) (req, offset, 0, &min_byte, &max_byte);
3249 		} else {
3250 			min_byte = offset;
3251 			max_byte = offset + (nbytes * nstrides * nents);
3252 		}
3253 
3254 		if (lock_file_region(file, fd, F_WRLCK,
3255 				     min_byte, (max_byte - min_byte + 1)) < 0) {
3256 			doio_fprintf(stderr,
3257 				     "file lock failed:\n%s\n",
3258 				     fmt_ioreq(req, sy, fd));
3259 			doio_fprintf(stderr,
3260 				     "          buffer(req, %d, 0, 0x%x, 0x%x)\n",
3261 				     offset, min_byte, max_byte);
3262 			alloc_mem(-1);
3263 			exit(E_INTERNAL);
3264 		}
3265 
3266 		got_lock = 1;
3267 	}
3268 
3269 	/*
3270 	 * Write a preliminary write-log entry.  This is done so that
3271 	 * doio_check can do corruption detection across an interrupt/crash.
3272 	 * Note that w_done is set to 0.  If doio_check sees this, it
3273 	 * re-creates the file extents as if the write completed, but does not
3274 	 * do any checking - see comments in doio_check for more details.
3275 	 */
3276 
3277 	if (sy->sy_flags & SY_WRITE && w_opt) {
3278 		if (pid == -1) {
3279 			pid = getpid();
3280 		}
3281 
3282 		wrec.w_async = (sy->sy_flags & SY_ASYNC) ? 1 : 0;
3283 		wrec.w_oflags = oflags;
3284 		wrec.w_pid = pid;
3285 		wrec.w_offset = offset;
3286 		wrec.w_nbytes = nbytes;	/* mem_needed -- total length */
3287 
3288 		wrec.w_pathlen = strlen(file);
3289 		memcpy(wrec.w_path, file, wrec.w_pathlen);
3290 		wrec.w_hostlen = strlen(Host);
3291 		memcpy(wrec.w_host, Host, wrec.w_hostlen);
3292 		wrec.w_patternlen = Pattern_Length;
3293 		memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
3294 
3295 		wrec.w_done = 0;
3296 
3297 		if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
3298 			doio_fprintf(stderr,
3299 				     "Could not append to write-log:  %s (%d)\n",
3300 				     SYSERR, errno);
3301 		} else {
3302 			logged_write = 1;
3303 		}
3304 	}
3305 
3306 	s = (*sy->sy_syscall) (req, sy, fd, addr);
3307 
3308 	if (s->rval == -1) {
3309 		doio_fprintf(stderr,
3310 			     "%s() request failed:  %s (%d)\n%s\n%s\n",
3311 			     sy->sy_name, SYSERR, errno,
3312 			     fmt_ioreq(req, sy, fd),
3313 			     (*sy->sy_format) (req, sy, fd, addr));
3314 
3315 		doio_upanic(U_RVAL);
3316 
3317 		for (i = 0; i < nents; i++) {
3318 			if (s->aioid == NULL)
3319 				break;
3320 			aio_unregister(s->aioid[i]);
3321 		}
3322 		rval = -1;
3323 	} else {
3324 		/*
3325 		 * If the syscall was async, wait for I/O to complete
3326 		 */
3327 #ifndef __linux__
3328 		if (sy->sy_flags & SY_ASYNC) {
3329 			for (i = 0; i < nents; i++) {
3330 				aio_wait(s->aioid[i]);
3331 			}
3332 		}
3333 #endif
3334 
3335 		/*
3336 		 * Check the syscall how-much-data-written return.  Look
3337 		 * for this in either the return value or the 'iosw'
3338 		 * structure.
3339 		 */
3340 
3341 		if (sy->sy_flags & SY_IOSW) {
3342 #ifdef CRAY
3343 			for (i = 0; i < nents; i++) {
3344 				if (s->aioid == NULL)
3345 					break;	/* >>> error condition? */
3346 				aiop = aio_slot(s->aioid[i]);
3347 				iosw = &aiop->iosw;
3348 				if (iosw->sw_error != 0) {
3349 					doio_fprintf(stderr,
3350 						     "%s() iosw error set: %s\n%s\n%s\n",
3351 						     sy->sy_name,
3352 						     strerror(iosw->sw_error),
3353 						     fmt_ioreq(req, sy, fd),
3354 						     (*sy->sy_format) (req, sy,
3355 								       fd,
3356 								       addr));
3357 					doio_upanic(U_IOSW);
3358 					rval = -1;
3359 				} else if (iosw->sw_count != nbytes * nstrides) {
3360 					doio_fprintf(stderr,
3361 						     "Bad iosw from %s() #%d\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n%s\n",
3362 						     sy->sy_name, i,
3363 						     1, 0, nbytes * nstrides,
3364 						     iosw->sw_flag,
3365 						     iosw->sw_error,
3366 						     iosw->sw_count,
3367 						     fmt_ioreq(req, sy, fd),
3368 						     (*sy->sy_format) (req, sy,
3369 								       fd,
3370 								       addr));
3371 					doio_upanic(U_IOSW);
3372 					rval = -1;
3373 				}
3374 
3375 				aio_unregister(s->aioid[i]);
3376 			}
3377 #endif /* CRAY */
3378 #ifdef sgi
3379 			for (i = 0; s->aioid[i] != -1; i++) {
3380 				if (s->aioid == NULL) {
3381 					doio_fprintf(stderr,
3382 						     "aioid == NULL!\n");
3383 					break;
3384 				}
3385 				aiop = aio_slot(s->aioid[i]);
3386 
3387 				/*
3388 				 * make sure the io completed without error
3389 				 */
3390 				if (aiop->aio_errno != 0) {
3391 					doio_fprintf(stderr,
3392 						     "%s() aio error set: %s (%d)\n%s\n%s\n",
3393 						     sy->sy_name,
3394 						     strerror(aiop->aio_errno),
3395 						     aiop->aio_errno,
3396 						     fmt_ioreq(req, sy, fd),
3397 						     (*sy->sy_format) (req, sy,
3398 								       fd,
3399 								       addr));
3400 					doio_upanic(U_IOSW);
3401 					rval = -1;
3402 				} else if (aiop->aio_ret != nbytes) {
3403 					doio_fprintf(stderr,
3404 						     "Bad aio return from %s() #%d\nExpected (%d,%d), got (%d,%d)\n%s\n%s\n",
3405 						     sy->sy_name, i,
3406 						     0, nbytes,
3407 						     aiop->aio_errno,
3408 						     aiop->aio_ret,
3409 						     fmt_ioreq(req, sy, fd),
3410 						     (*sy->sy_format) (req, sy,
3411 								       fd,
3412 								       addr));
3413 					aio_unregister(s->aioid[i]);
3414 					doio_upanic(U_IOSW);
3415 					return -1;
3416 				} else {
3417 					aio_unregister(s->aioid[i]);
3418 					rval = 0;
3419 				}
3420 			}
3421 #endif /* sgi */
3422 		} else {
3423 
3424 			if (s->rval != mem_needed) {
3425 				doio_fprintf(stderr,
3426 					     "%s() request returned wrong # of bytes - expected %d, got %d\n%s\n%s\n",
3427 					     sy->sy_name, nbytes, s->rval,
3428 					     fmt_ioreq(req, sy, fd),
3429 					     (*sy->sy_format) (req, sy, fd,
3430 							       addr));
3431 				rval = -1;
3432 				doio_upanic(U_RVAL);
3433 			}
3434 		}
3435 	}
3436 
3437 	/*
3438 	 * Verify that the data was written correctly - check_file() returns
3439 	 * a non-null pointer which contains an error message if there are
3440 	 * problems.
3441 	 */
3442 
3443 	if (rval == 0 && sy->sy_flags & SY_WRITE && v_opt) {
3444 		msg = check_file(file, offset, nbytes * nstrides * nents,
3445 				 Pattern, Pattern_Length, 0,
3446 				 oflags & O_PARALLEL);
3447 		if (msg != NULL) {
3448 			doio_fprintf(stderr, "%s\n%s\n%s\n",
3449 				     msg,
3450 				     fmt_ioreq(req, sy, fd),
3451 				     (*sy->sy_format) (req, sy, fd, addr));
3452 			doio_upanic(U_CORRUPTION);
3453 			exit(E_COMPARE);
3454 		}
3455 	}
3456 
3457 	/*
3458 	 * General cleanup ...
3459 	 *
3460 	 * Write extent information to the write-log, so that doio_check can do
3461 	 * corruption detection.  Note that w_done is set to 1, indicating that
3462 	 * the write has been verified as complete.  We don't need to write the
3463 	 * filename on the second logging.
3464 	 */
3465 
3466 	if (w_opt && logged_write) {
3467 		wrec.w_done = 1;
3468 		wlog_record_write(&Wlog, &wrec, woffset);
3469 	}
3470 
3471 	/*
3472 	 * Unlock file region if necessary
3473 	 */
3474 
3475 	if (got_lock) {
3476 		if (lock_file_region(file, fd, F_UNLCK,
3477 				     min_byte, (max_byte - min_byte + 1)) < 0) {
3478 			alloc_mem(-1);
3479 			exit(E_INTERNAL);
3480 		}
3481 	}
3482 
3483 	if (s->aioid != NULL)
3484 		free(s->aioid);
3485 	free(s);
3486 	return (rval == -1) ? -1 : 0;
3487 }
3488 
3489 /*
3490  * fcntl-based requests
3491  *   - F_FRESVSP
3492  *   - F_UNRESVSP
3493  *   - F_FSYNC
3494  */
3495 #ifdef sgi
do_fcntl(struct io_req * req)3496 int do_fcntl(struct io_req *req)
3497 {
3498 	int fd, oflags, offset, nbytes;
3499 	int rval, op;
3500 	int got_lock;
3501 	int min_byte, max_byte;
3502 	char *file, *msg;
3503 	struct flock flk;
3504 
3505 	/*
3506 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3507 	 * r_nbytes are at the same offset in the read_req and reada_req
3508 	 * structures.
3509 	 */
3510 	file = req->r_data.io.r_file;
3511 	oflags = req->r_data.io.r_oflags;
3512 	offset = req->r_data.io.r_offset;
3513 	nbytes = req->r_data.io.r_nbytes;
3514 
3515 	flk.l_type = 0;
3516 	flk.l_whence = SEEK_SET;
3517 	flk.l_start = offset;
3518 	flk.l_len = nbytes;
3519 
3520 	/*
3521 	 * Get an open file descriptor
3522 	 */
3523 
3524 	if ((fd = alloc_fd(file, oflags)) == -1)
3525 		return -1;
3526 
3527 	rval = 0;
3528 	got_lock = 0;
3529 
3530 	/*
3531 	 * Lock data if this is locking option is set
3532 	 */
3533 	if (k_opt) {
3534 		min_byte = offset;
3535 		max_byte = offset + nbytes;
3536 
3537 		if (lock_file_region(file, fd, F_WRLCK,
3538 				     min_byte, (nbytes + 1)) < 0) {
3539 			doio_fprintf(stderr, "file lock failed:\n");
3540 			doio_fprintf(stderr,
3541 				     "          buffer(req, %d, 0, 0x%x, 0x%x)\n",
3542 				     offset, min_byte, max_byte);
3543 			alloc_mem(-1);
3544 			exit(E_INTERNAL);
3545 		}
3546 
3547 		got_lock = 1;
3548 	}
3549 
3550 	switch (req->r_type) {
3551 	case RESVSP:
3552 		op = F_RESVSP;
3553 		msg = "f_resvsp";
3554 		break;
3555 	case UNRESVSP:
3556 		op = F_UNRESVSP;
3557 		msg = "f_unresvsp";
3558 		break;
3559 #ifdef F_FSYNC
3560 	case DFFSYNC:
3561 		op = F_FSYNC;
3562 		msg = "f_fsync";
3563 		break;
3564 #endif
3565 	}
3566 
3567 	rval = fcntl(fd, op, &flk);
3568 
3569 	if (rval == -1) {
3570 		doio_fprintf(stderr,
3571 			     "fcntl %s request failed: %s (%d)\n\tfcntl(%d, %s %d, {%d %lld ==> %lld}\n",
3572 			     msg, SYSERR, errno,
3573 			     fd, msg, op, flk.l_whence,
3574 			     (long long)flk.l_start, (long long)flk.l_len);
3575 
3576 		doio_upanic(U_RVAL);
3577 		rval = -1;
3578 	}
3579 
3580 	/*
3581 	 * Unlock file region if necessary
3582 	 */
3583 
3584 	if (got_lock) {
3585 		if (lock_file_region(file, fd, F_UNLCK,
3586 				     min_byte, (max_byte - min_byte + 1)) < 0) {
3587 			alloc_mem(-1);
3588 			exit(E_INTERNAL);
3589 		}
3590 	}
3591 
3592 	return (rval == -1) ? -1 : 0;
3593 }
3594 #endif /* sgi */
3595 
3596 /*
3597  *  fsync(2) and fdatasync(2)
3598  */
3599 #ifndef CRAY
do_sync(struct io_req * req)3600 int do_sync(struct io_req *req)
3601 {
3602 	int fd, oflags;
3603 	int rval;
3604 	char *file;
3605 
3606 	/*
3607 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3608 	 * r_nbytes are at the same offset in the read_req and reada_req
3609 	 * structures.
3610 	 */
3611 	file = req->r_data.io.r_file;
3612 	oflags = req->r_data.io.r_oflags;
3613 
3614 	/*
3615 	 * Get an open file descriptor
3616 	 */
3617 
3618 	if ((fd = alloc_fd(file, oflags)) == -1)
3619 		return -1;
3620 
3621 	rval = 0;
3622 	switch (req->r_type) {
3623 	case FSYNC2:
3624 		rval = fsync(fd);
3625 		break;
3626 	case FDATASYNC:
3627 		rval = fdatasync(fd);
3628 		break;
3629 	default:
3630 		rval = -1;
3631 	}
3632 	return (rval == -1) ? -1 : 0;
3633 }
3634 #endif /* !CRAY */
3635 
3636 int
doio_pat_fill(char * addr,int mem_needed,char * Pattern,int Pattern_Length,int shift)3637 doio_pat_fill(char *addr, int mem_needed, char *Pattern, int Pattern_Length,
3638 	      int shift)
3639 {
3640 	return pattern_fill(addr, mem_needed, Pattern, Pattern_Length, 0);
3641 }
3642 
doio_pat_check(char * buf,int offset,int length,char * pattern,int pattern_length,int patshift)3643 char *doio_pat_check(char *buf, int offset, int length, char *pattern,
3644 		     int pattern_length, int patshift)
3645 {
3646 	static char errbuf[4096];
3647 	int nb, i, pattern_index;
3648 	char *cp, *bufend, *ep;
3649 	char actual[33], expected[33];
3650 
3651 	if (pattern_check(buf, length, pattern, pattern_length, patshift) != 0) {
3652 		ep = errbuf;
3653 		ep +=
3654 		    sprintf(ep,
3655 			    "Corrupt regions follow - unprintable chars are represented as '.'\n");
3656 		ep +=
3657 		    sprintf(ep,
3658 			    "-----------------------------------------------------------------\n");
3659 
3660 		pattern_index = patshift % pattern_length;;
3661 		cp = buf;
3662 		bufend = buf + length;
3663 
3664 		while (cp < bufend) {
3665 			if (*cp != pattern[pattern_index]) {
3666 				nb = bufend - cp;
3667 				if ((unsigned int)nb > sizeof(expected) - 1) {
3668 					nb = sizeof(expected) - 1;
3669 				}
3670 
3671 				ep +=
3672 				    sprintf(ep,
3673 					    "corrupt bytes starting at file offset %d\n",
3674 					    offset + (int)(cp - buf));
3675 
3676 				/*
3677 				 * Fill in the expected and actual patterns
3678 				 */
3679 				memset(expected, 0x00, sizeof(expected));
3680 				memset(actual, 0x00, sizeof(actual));
3681 
3682 				for (i = 0; i < nb; i++) {
3683 					expected[i] =
3684 					    pattern[(pattern_index +
3685 						     i) % pattern_length];
3686 					if (!isprint(expected[i])) {
3687 						expected[i] = '.';
3688 					}
3689 
3690 					actual[i] = cp[i];
3691 					if (!isprint(actual[i])) {
3692 						actual[i] = '.';
3693 					}
3694 				}
3695 
3696 				ep +=
3697 				    sprintf(ep,
3698 					    "    1st %2d expected bytes:  %s\n",
3699 					    nb, expected);
3700 				ep +=
3701 				    sprintf(ep,
3702 					    "    1st %2d actual bytes:    %s\n",
3703 					    nb, actual);
3704 				fflush(stderr);
3705 				return errbuf;
3706 			} else {
3707 				cp++;
3708 				pattern_index++;
3709 
3710 				if (pattern_index == pattern_length) {
3711 					pattern_index = 0;
3712 				}
3713 			}
3714 		}
3715 		return errbuf;
3716 	}
3717 
3718 	return NULL;
3719 }
3720 
3721 /*
3722  * Check the contents of a file beginning at offset, for length bytes.  It
3723  * is assumed that there is a string of pattern bytes in this area of the
3724  * file.  Use normal buffered reads to do the verification.
3725  *
3726  * If there is a data mismatch, write a detailed message into a static buffer
3727  * suitable for the caller to print.  Otherwise print NULL.
3728  *
3729  * The fsa flag is set to non-zero if the buffer should be read back through
3730  * the FSA (unicos/mk).  This implies the file will be opened
3731  * O_PARALLEL|O_RAW|O_WELLFORMED to do the validation.  We must do this because
3732  * FSA will not allow the file to be opened for buffered io if it was
3733  * previously opened for O_PARALLEL io.
3734  */
3735 
check_file(char * file,int offset,int length,char * pattern,int pattern_length,int patshift,int fsa)3736 char *check_file(char *file, int offset, int length, char *pattern,
3737 		 int pattern_length, int patshift, int fsa)
3738 {
3739 	static char errbuf[4096];
3740 	int fd, nb, flags;
3741 	char *buf, *em, *ep;
3742 #ifdef sgi
3743 	struct fd_cache *fdc;
3744 #endif
3745 
3746 	buf = Memptr;
3747 
3748 	if (V_opt) {
3749 		flags = Validation_Flags | O_RDONLY;
3750 	} else {
3751 		flags = O_RDONLY;
3752 		if (fsa) {
3753 #ifdef CRAY
3754 			flags |= O_PARALLEL | O_RAW | O_WELLFORMED;
3755 #endif
3756 		}
3757 	}
3758 
3759 	if ((fd = alloc_fd(file, flags)) == -1) {
3760 		sprintf(errbuf,
3761 			"Could not open file %s with flags %#o (%s) for data comparison:  %s (%d)\n",
3762 			file, flags, format_oflags(flags), SYSERR, errno);
3763 		return errbuf;
3764 	}
3765 
3766 	if (lseek(fd, offset, SEEK_SET) == -1) {
3767 		sprintf(errbuf,
3768 			"Could not lseek to offset %d in %s for verification:  %s (%d)\n",
3769 			offset, file, SYSERR, errno);
3770 		return errbuf;
3771 	}
3772 #ifdef sgi
3773 	/* Irix: Guarantee a properly aligned address on Direct I/O */
3774 	fdc = alloc_fdcache(file, flags);
3775 	if ((flags & O_DIRECT) && ((long)buf % fdc->c_memalign != 0)) {
3776 		buf += fdc->c_memalign - ((long)buf % fdc->c_memalign);
3777 	}
3778 #endif
3779 
3780 	if ((nb = read(fd, buf, length)) == -1) {
3781 #ifdef sgi
3782 		sprintf(errbuf,
3783 			"Could not read %d bytes from %s for verification:  %s (%d)\n\tread(%d, 0x%lx, %d)\n\tbuf %% alignment(%d) = %ld\n",
3784 			length, file, SYSERR, errno,
3785 			fd, buf, length,
3786 			fdc->c_memalign, (long)buf % fdc->c_memalign);
3787 #else
3788 		sprintf(errbuf,
3789 			"Could not read %d bytes from %s for verification:  %s (%d)\n",
3790 			length, file, SYSERR, errno);
3791 
3792 #endif
3793 		return errbuf;
3794 	}
3795 
3796 	if (nb != length) {
3797 		sprintf(errbuf,
3798 			"Read wrong # bytes from %s.  Expected %d, got %d\n",
3799 			file, length, nb);
3800 		return errbuf;
3801 	}
3802 
3803 	if ((em =
3804 	     (*Data_Check) (buf, offset, length, pattern, pattern_length,
3805 			    patshift)) != NULL) {
3806 		ep = errbuf;
3807 		ep += sprintf(ep, "*** DATA COMPARISON ERROR ***\n");
3808 		ep +=
3809 		    sprintf(ep, "check_file(%s, %d, %d, %s, %d, %d) failed\n\n",
3810 			    file, offset, length, pattern, pattern_length,
3811 			    patshift);
3812 		ep +=
3813 		    sprintf(ep, "Comparison fd is %d, with open flags %#o\n",
3814 			    fd, flags);
3815 		strcpy(ep, em);
3816 		return (errbuf);
3817 	}
3818 	return NULL;
3819 }
3820 
3821 /*
3822  * Function to single-thread stdio output.
3823  */
3824 
doio_fprintf(FILE * stream,char * format,...)3825 int doio_fprintf(FILE * stream, char *format, ...)
3826 {
3827 	static int pid = -1;
3828 	char *date;
3829 	int rval;
3830 	struct flock flk;
3831 	va_list arglist;
3832 	struct timeval ts;
3833 	gettimeofday(&ts, NULL);
3834 	date = hms(ts.tv_sec);
3835 
3836 	if (pid == -1) {
3837 		pid = getpid();
3838 	}
3839 
3840 	flk.l_whence = flk.l_start = flk.l_len = 0;
3841 	flk.l_type = F_WRLCK;
3842 	fcntl(fileno(stream), F_SETLKW, &flk);
3843 
3844 	va_start(arglist, format);
3845 	rval = fprintf(stream, "\n%s%s (%5d) %s\n", Prog, TagName, pid, date);
3846 	rval += fprintf(stream, "---------------------\n");
3847 	vfprintf(stream, format, arglist);
3848 	va_end(arglist);
3849 
3850 	fflush(stream);
3851 
3852 	flk.l_type = F_UNLCK;
3853 	fcntl(fileno(stream), F_SETLKW, &flk);
3854 
3855 	return rval;
3856 }
3857 
3858 /*
3859  * Simple function for allocating core memory.  Uses Memsize and Memptr to
3860  * keep track of the current amount allocated.
3861  */
3862 #ifndef CRAY
alloc_mem(int nbytes)3863 int alloc_mem(int nbytes)
3864 {
3865 	char *cp;
3866 	void *addr;
3867 	int me = 0, flags, key, shmid;
3868 	static int mturn = 0;	/* which memory type to use */
3869 	struct memalloc *M;
3870 	char filename[255];
3871 #ifdef __linux__
3872 	struct shmid_ds shm_ds;
3873 #endif
3874 
3875 #ifdef __linux__
3876 	memset(&shm_ds, 0x00, sizeof(struct shmid_ds));
3877 #endif
3878 
3879 	/* nbytes = -1 means "free all allocated memory" */
3880 	if (nbytes == -1) {
3881 
3882 		for (me = 0; me < Nmemalloc; me++) {
3883 			if (Memalloc[me].space == NULL)
3884 				continue;
3885 
3886 			switch (Memalloc[me].memtype) {
3887 			case MEM_DATA:
3888 #ifdef sgi
3889 				if (Memalloc[me].flags & MEMF_MPIN)
3890 					munpin(Memalloc[me].space,
3891 					       Memalloc[me].size);
3892 #endif
3893 				free(Memalloc[me].space);
3894 				Memalloc[me].space = NULL;
3895 				Memptr = NULL;
3896 				Memsize = 0;
3897 				break;
3898 			case MEM_SHMEM:
3899 #ifdef sgi
3900 				if (Memalloc[me].flags & MEMF_MPIN)
3901 					munpin(Memalloc[me].space,
3902 					       Memalloc[me].size);
3903 #endif
3904 				shmdt(Memalloc[me].space);
3905 				Memalloc[me].space = NULL;
3906 #ifdef sgi
3907 				shmctl(Memalloc[me].fd, IPC_RMID);
3908 #else
3909 				shmctl(Memalloc[me].fd, IPC_RMID, &shm_ds);
3910 #endif
3911 				break;
3912 			case MEM_MMAP:
3913 #ifdef sgi
3914 				if (Memalloc[me].flags & MEMF_MPIN)
3915 					munpin(Memalloc[me].space,
3916 					       Memalloc[me].size);
3917 #endif
3918 				munmap(Memalloc[me].space, Memalloc[me].size);
3919 				close(Memalloc[me].fd);
3920 				if (Memalloc[me].flags & MEMF_FILE) {
3921 					unlink(Memalloc[me].name);
3922 				}
3923 				Memalloc[me].space = NULL;
3924 				break;
3925 			default:
3926 				doio_fprintf(stderr,
3927 					     "alloc_mem: HELP! Unknown memory space type %d index %d\n",
3928 					     Memalloc[me].memtype, me);
3929 				break;
3930 			}
3931 		}
3932 		return 0;
3933 	}
3934 
3935 	/*
3936 	 * Select a memory area (currently round-robbin)
3937 	 */
3938 
3939 	if (mturn >= Nmemalloc)
3940 		mturn = 0;
3941 
3942 	M = &Memalloc[mturn];
3943 
3944 	switch (M->memtype) {
3945 	case MEM_DATA:
3946 		if (nbytes > M->size) {
3947 			if (M->space != NULL) {
3948 #ifdef sgi
3949 				if (M->flags & MEMF_MPIN)
3950 					munpin(M->space, M->size);
3951 #endif
3952 				free(M->space);
3953 			}
3954 			M->space = NULL;
3955 			M->size = 0;
3956 		}
3957 
3958 		if (M->space == NULL) {
3959 			if ((cp = malloc(nbytes)) == NULL) {
3960 				doio_fprintf(stderr,
3961 					     "malloc(%d) failed:  %s (%d)\n",
3962 					     nbytes, SYSERR, errno);
3963 				return -1;
3964 			}
3965 #ifdef sgi
3966 			if (M->flags & MEMF_MPIN) {
3967 				if (mpin(cp, nbytes) == -1) {
3968 					doio_fprintf(stderr,
3969 						     "mpin(0x%lx, %d) failed:  %s (%d)\n",
3970 						     cp, nbytes, SYSERR, errno);
3971 				}
3972 			}
3973 #endif
3974 			M->space = (void *)cp;
3975 			M->size = nbytes;
3976 		}
3977 		break;
3978 
3979 	case MEM_MMAP:
3980 		if (nbytes > M->size) {
3981 			if (M->space != NULL) {
3982 #ifdef sgi
3983 				if (M->flags & MEMF_MPIN)
3984 					munpin(M->space, M->size);
3985 #endif
3986 				munmap(M->space, M->size);
3987 				close(M->fd);
3988 				if (M->flags & MEMF_FILE)
3989 					unlink(M->name);
3990 			}
3991 			M->space = NULL;
3992 			M->size = 0;
3993 		}
3994 
3995 		if (M->space == NULL) {
3996 			if (strchr(M->name, '%')) {
3997 				sprintf(filename, M->name, getpid());
3998 				M->name = strdup(filename);
3999 			}
4000 
4001 			if ((M->fd =
4002 			     open(M->name, O_CREAT | O_RDWR, 0666)) == -1) {
4003 				doio_fprintf(stderr,
4004 					     "alloc_mmap: error %d (%s) opening '%s'\n",
4005 					     errno, SYSERR, M->name);
4006 				return (-1);
4007 			}
4008 
4009 			addr = NULL;
4010 			flags = 0;
4011 			M->size = nbytes * 4;
4012 
4013 			/* bias addr if MEMF_ADDR | MEMF_FIXADDR */
4014 			/* >>> how to pick a memory address? */
4015 
4016 			/* bias flags on MEMF_PRIVATE etc */
4017 			if (M->flags & MEMF_PRIVATE)
4018 				flags |= MAP_PRIVATE;
4019 #ifdef sgi
4020 			if (M->flags & MEMF_LOCAL)
4021 				flags |= MAP_LOCAL;
4022 			if (M->flags & MEMF_AUTORESRV)
4023 				flags |= MAP_AUTORESRV;
4024 			if (M->flags & MEMF_AUTOGROW)
4025 				flags |= MAP_AUTOGROW;
4026 #endif
4027 			if (M->flags & MEMF_SHARED)
4028 				flags |= MAP_SHARED;
4029 
4030 /*printf("alloc_mem, about to mmap, fd=%d, name=(%s)\n", M->fd, M->name);*/
4031 			if ((M->space = mmap(addr, M->size,
4032 					     PROT_READ | PROT_WRITE,
4033 					     flags, M->fd, 0))
4034 			    == MAP_FAILED) {
4035 				doio_fprintf(stderr,
4036 					     "alloc_mem: mmap error. errno %d (%s)\n\tmmap(addr 0x%x, size %d, read|write 0x%x, mmap flags 0x%x [%#o], fd %d, 0)\n\tfile %s\n",
4037 					     errno, SYSERR, addr, M->size,
4038 					     PROT_READ | PROT_WRITE, flags,
4039 					     M->flags, M->fd, M->name);
4040 				doio_fprintf(stderr, "\t%s%s%s%s%s",
4041 					     (flags & MAP_PRIVATE) ? "private "
4042 					     : "",
4043 #ifdef sgi
4044 					     (flags & MAP_LOCAL) ? "local " :
4045 					     "",
4046 					     (flags & MAP_AUTORESRV) ?
4047 					     "autoresrv " : "",
4048 					     (flags & MAP_AUTOGROW) ?
4049 					     "autogrow " : "",
4050 #endif
4051 					     (flags & MAP_SHARED) ? "shared" :
4052 					     "");
4053 				return (-1);
4054 			}
4055 		}
4056 		break;
4057 
4058 	case MEM_SHMEM:
4059 		if (nbytes > M->size) {
4060 			if (M->space != NULL) {
4061 #ifdef sgi
4062 				if (M->flags & MEMF_MPIN)
4063 					munpin(M->space, M->size);
4064 #endif
4065 				shmdt(M->space);
4066 #ifdef sgi
4067 				shmctl(M->fd, IPC_RMID);
4068 #else
4069 				shmctl(M->fd, IPC_RMID, &shm_ds);
4070 #endif
4071 			}
4072 			M->space = NULL;
4073 			M->size = 0;
4074 		}
4075 
4076 		if (M->space == NULL) {
4077 			if (!strcmp(M->name, "private")) {
4078 				key = IPC_PRIVATE;
4079 			} else {
4080 				sscanf(M->name, "%i", &key);
4081 			}
4082 
4083 			M->size = M->nblks ? M->nblks * 512 : nbytes;
4084 
4085 			if (nbytes > M->size) {
4086 #ifdef DEBUG
4087 				doio_fprintf(stderr,
4088 					     "MEM_SHMEM: nblks(%d) too small:  nbytes=%d  Msize=%d, skipping this req.\n",
4089 					     M->nblks, nbytes, M->size);
4090 #endif
4091 				return SKIP_REQ;
4092 			}
4093 
4094 			shmid = shmget(key, M->size, IPC_CREAT | 0666);
4095 			if (shmid == -1) {
4096 				doio_fprintf(stderr,
4097 					     "shmget(0x%x, %d, CREAT) failed: %s (%d)\n",
4098 					     key, M->size, SYSERR, errno);
4099 				return (-1);
4100 			}
4101 			M->fd = shmid;
4102 			M->space = shmat(shmid, NULL, SHM_RND);
4103 			if (M->space == (void *)-1) {
4104 				doio_fprintf(stderr,
4105 					     "shmat(0x%x, NULL, SHM_RND) failed: %s (%d)\n",
4106 					     shmid, SYSERR, errno);
4107 				return (-1);
4108 			}
4109 #ifdef sgi
4110 			if (M->flags & MEMF_MPIN) {
4111 				if (mpin(M->space, M->size) == -1) {
4112 					doio_fprintf(stderr,
4113 						     "mpin(0x%lx, %d) failed:  %s (%d)\n",
4114 						     M->space, M->size, SYSERR,
4115 						     errno);
4116 				}
4117 			}
4118 #endif
4119 		}
4120 		break;
4121 
4122 	default:
4123 		doio_fprintf(stderr,
4124 			     "alloc_mem: HELP! Unknown memory space type %d index %d\n",
4125 			     Memalloc[me].memtype, mturn);
4126 		break;
4127 	}
4128 
4129 	Memptr = M->space;
4130 	Memsize = M->size;
4131 
4132 	mturn++;
4133 	return 0;
4134 }
4135 #else /* CRAY */
alloc_mem(int nbytes)4136 int alloc_mem(int nbytes)
4137 {
4138 	char *cp;
4139 	int ip;
4140 	static char *malloc_space;
4141 
4142 	/*
4143 	 * The "unicos" version of this did some stuff with sbrk;
4144 	 * this caused problems with async I/O on irix, and now appears
4145 	 * to be causing problems with FSA I/O on unicos/mk.
4146 	 */
4147 #ifdef NOTDEF
4148 	if (nbytes > Memsize) {
4149 		if ((cp = (char *)sbrk(nbytes - Memsize)) == (char *)-1) {
4150 			doio_fprintf(stderr, "sbrk(%d) failed:  %s (%d)\n",
4151 				     nbytes - Memsize, SYSERR, errno);
4152 			return -1;
4153 		}
4154 
4155 		if (Memsize == 0)
4156 			Memptr = cp;
4157 		Memsize += nbytes - Memsize;
4158 	}
4159 #else
4160 
4161 	/* nbytes = -1 means "free all allocated memory" */
4162 	if (nbytes == -1) {
4163 		free(malloc_space);
4164 		Memptr = NULL;
4165 		Memsize = 0;
4166 		return 0;
4167 	}
4168 
4169 	if (nbytes > Memsize) {
4170 		if (Memsize != 0)
4171 			free(malloc_space);
4172 
4173 		if ((cp = malloc_space = malloc(nbytes)) == NULL) {
4174 			doio_fprintf(stderr, "malloc(%d) failed:  %s (%d)\n",
4175 				     nbytes, SYSERR, errno);
4176 			return -1;
4177 		}
4178 #ifdef _CRAYT3E
4179 		/* T3E requires memory to be aligned on 0x40 word boundaries */
4180 		ip = (int)cp;
4181 		if (ip & 0x3F != 0) {
4182 			doio_fprintf(stderr,
4183 				     "malloc(%d) = 0x%x(0x%x) not aligned by 0x%x\n",
4184 				     nbytes, cp, ip, ip & 0x3f);
4185 
4186 			free(cp);
4187 			if ((cp = malloc_space = malloc(nbytes + 0x40)) == NULL) {
4188 				doio_fprintf(stderr,
4189 					     "malloc(%d) failed:  %s (%d)\n",
4190 					     nbytes, SYSERR, errno);
4191 				return -1;
4192 			}
4193 			ip = (int)cp;
4194 			cp += (0x40 - (ip & 0x3F));
4195 		}
4196 #endif /* _CRAYT3E */
4197 		Memptr = cp;
4198 		Memsize = nbytes;
4199 	}
4200 #endif /* NOTDEF */
4201 	return 0;
4202 }
4203 #endif /* CRAY */
4204 
4205 /*
4206  * Simple function for allocating sds space.  Uses Sdssize and Sdsptr to
4207  * keep track of location and size of currently allocated chunk.
4208  */
4209 
4210 #ifdef _CRAY1
4211 
alloc_sds(int nbytes)4212 int alloc_sds(int nbytes)
4213 {
4214 	int nblks;
4215 
4216 	if (nbytes > Sdssize) {
4217 		if ((nblks = ssbreak(btoc(nbytes - Sdssize))) == -1) {
4218 			doio_fprintf(stderr, "ssbreak(%d) failed:  %s (%d)\n",
4219 				     btoc(nbytes - Sdssize), SYSERR, errno);
4220 			return -1;
4221 		}
4222 
4223 		Sdssize = ctob(nblks);
4224 		Sdsptr = 0;
4225 	}
4226 
4227 	return 0;
4228 }
4229 
4230 #else
4231 
4232 #ifdef CRAY
4233 
alloc_sds(int nbytes)4234 int alloc_sds(int nbytes)
4235 {
4236 	doio_fprintf(stderr,
4237 		     "Internal Error - alloc_sds() called on a CRAY2 system\n");
4238 	alloc_mem(-1);
4239 	exit(E_INTERNAL);
4240 }
4241 
4242 #endif
4243 
4244 #endif /* _CRAY1 */
4245 
4246 /*
4247  * Function to maintain a file descriptor cache, so that doio does not have
4248  * to do so many open() and close() calls.  Descriptors are stored in the
4249  * cache by file name, and open flags.  Each entry also has a _rtc value
4250  * associated with it which is used in aging.  If doio cannot open a file
4251  * because it already has too many open (ie. system limit hit) it will close
4252  * the one in the cache that has the oldest _rtc value.
4253  *
4254  * If alloc_fd() is called with a file of NULL, it will close all descriptors
4255  * in the cache, and free the memory in the cache.
4256  */
4257 
alloc_fd(char * file,int oflags)4258 int alloc_fd(char *file, int oflags)
4259 {
4260 	struct fd_cache *fdc;
4261 	struct fd_cache *alloc_fdcache(char *file, int oflags);
4262 
4263 	fdc = alloc_fdcache(file, oflags);
4264 	if (fdc != NULL)
4265 		return (fdc->c_fd);
4266 	else
4267 		return (-1);
4268 }
4269 
alloc_fdcache(char * file,int oflags)4270 struct fd_cache *alloc_fdcache(char *file, int oflags)
4271 {
4272 	int fd;
4273 	struct fd_cache *free_slot, *oldest_slot, *cp;
4274 	static int cache_size = 0;
4275 	static struct fd_cache *cache = NULL;
4276 #ifdef sgi
4277 	struct dioattr finfo;
4278 #endif
4279 
4280 	/*
4281 	 * If file is NULL, it means to free up the fd cache.
4282 	 */
4283 
4284 	if (file == NULL && cache != NULL) {
4285 		for (cp = cache; cp < &cache[cache_size]; cp++) {
4286 			if (cp->c_fd != -1) {
4287 				close(cp->c_fd);
4288 			}
4289 #ifndef CRAY
4290 			if (cp->c_memaddr != NULL) {
4291 				munmap(cp->c_memaddr, cp->c_memlen);
4292 			}
4293 #endif
4294 		}
4295 
4296 		free(cache);
4297 		cache = NULL;
4298 		cache_size = 0;
4299 		return 0;
4300 	}
4301 
4302 	free_slot = NULL;
4303 	oldest_slot = NULL;
4304 
4305 	/*
4306 	 * Look for a fd in the cache.  If one is found, return it directly.
4307 	 * Otherwise, when this loop exits, oldest_slot will point to the
4308 	 * oldest fd slot in the cache, and free_slot will point to an
4309 	 * unoccupied slot if there are any.
4310 	 */
4311 
4312 	for (cp = cache; cp != NULL && cp < &cache[cache_size]; cp++) {
4313 		if (cp->c_fd != -1 &&
4314 		    cp->c_oflags == oflags && strcmp(cp->c_file, file) == 0) {
4315 #ifdef CRAY
4316 			cp->c_rtc = _rtc();
4317 #else
4318 			cp->c_rtc = Reqno;
4319 #endif
4320 			return cp;
4321 		}
4322 
4323 		if (cp->c_fd == -1) {
4324 			if (free_slot == NULL) {
4325 				free_slot = cp;
4326 			}
4327 		} else {
4328 			if (oldest_slot == NULL ||
4329 			    cp->c_rtc < oldest_slot->c_rtc) {
4330 				oldest_slot = cp;
4331 			}
4332 		}
4333 	}
4334 
4335 	/*
4336 	 * No matching file/oflags pair was found in the cache.  Attempt to
4337 	 * open a new fd.
4338 	 */
4339 
4340 	if ((fd = open(file, oflags, 0666)) < 0) {
4341 		if (errno != EMFILE) {
4342 			doio_fprintf(stderr,
4343 				     "Could not open file %s with flags %#o (%s): %s (%d)\n",
4344 				     file, oflags, format_oflags(oflags),
4345 				     SYSERR, errno);
4346 			alloc_mem(-1);
4347 			exit(E_SETUP);
4348 		}
4349 
4350 		/*
4351 		 * If we get here, we have as many open fd's as we can have.
4352 		 * Close the oldest one in the cache (pointed to by
4353 		 * oldest_slot), and attempt to re-open.
4354 		 */
4355 
4356 		close(oldest_slot->c_fd);
4357 		oldest_slot->c_fd = -1;
4358 		free_slot = oldest_slot;
4359 
4360 		if ((fd = open(file, oflags, 0666)) < 0) {
4361 			doio_fprintf(stderr,
4362 				     "Could not open file %s with flags %#o (%s):  %s (%d)\n",
4363 				     file, oflags, format_oflags(oflags),
4364 				     SYSERR, errno);
4365 			alloc_mem(-1);
4366 			exit(E_SETUP);
4367 		}
4368 	}
4369 
4370 /*printf("alloc_fd: new file %s flags %#o fd %d\n", file, oflags, fd);*/
4371 
4372 	/*
4373 	 * If we get here, fd is our open descriptor.  If free_slot is NULL,
4374 	 * we need to grow the cache, otherwise free_slot is the slot that
4375 	 * should hold the fd info.
4376 	 */
4377 
4378 	if (free_slot == NULL) {
4379 		cache =
4380 		    (struct fd_cache *)realloc(cache,
4381 					       sizeof(struct fd_cache) *
4382 					       (FD_ALLOC_INCR + cache_size));
4383 		if (cache == NULL) {
4384 			doio_fprintf(stderr,
4385 				     "Could not malloc() space for fd chace");
4386 			alloc_mem(-1);
4387 			exit(E_SETUP);
4388 		}
4389 
4390 		cache_size += FD_ALLOC_INCR;
4391 
4392 		for (cp = &cache[cache_size - FD_ALLOC_INCR];
4393 		     cp < &cache[cache_size]; cp++) {
4394 			cp->c_fd = -1;
4395 		}
4396 
4397 		free_slot = &cache[cache_size - FD_ALLOC_INCR];
4398 	}
4399 
4400 	/*
4401 	 * finally, fill in the cache slot info
4402 	 */
4403 
4404 	free_slot->c_fd = fd;
4405 	free_slot->c_oflags = oflags;
4406 	strcpy(free_slot->c_file, file);
4407 #ifdef CRAY
4408 	free_slot->c_rtc = _rtc();
4409 #else
4410 	free_slot->c_rtc = Reqno;
4411 #endif
4412 
4413 #ifdef sgi
4414 	if (oflags & O_DIRECT) {
4415 		if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
4416 			finfo.d_mem = 1;
4417 			finfo.d_miniosz = 1;
4418 			finfo.d_maxiosz = 1;
4419 		}
4420 	} else {
4421 		finfo.d_mem = 1;
4422 		finfo.d_miniosz = 1;
4423 		finfo.d_maxiosz = 1;
4424 	}
4425 
4426 	free_slot->c_memalign = finfo.d_mem;
4427 	free_slot->c_miniosz = finfo.d_miniosz;
4428 	free_slot->c_maxiosz = finfo.d_maxiosz;
4429 #endif /* sgi */
4430 #ifndef CRAY
4431 	free_slot->c_memaddr = NULL;
4432 	free_slot->c_memlen = 0;
4433 #endif
4434 
4435 	return free_slot;
4436 }
4437 
4438 /*
4439  *
4440  *			Signal Handling Section
4441  *
4442  *
4443  */
4444 
4445 #ifdef sgi
4446 /*
4447  * "caller-id" for signals
4448  */
signal_info(int sig,siginfo_t * info,void * v)4449 void signal_info(int sig, siginfo_t * info, void *v)
4450 {
4451 	int haveit = 0;
4452 
4453 	if (info != NULL) {
4454 		switch (info->si_code) {
4455 		case SI_USER:
4456 			doio_fprintf(stderr,
4457 				     "signal_info: si_signo %d si_errno %d si_code SI_USER pid %d uid %d\n",
4458 				     info->si_signo, info->si_errno,
4459 				     info->si_pid, info->si_uid);
4460 			haveit = 1;
4461 			break;
4462 
4463 		case SI_QUEUE:
4464 			doio_fprintf(stderr,
4465 				     "signal_info  si_signo %d si_code = SI_QUEUE\n",
4466 				     info->si_signo);
4467 			haveit = 1;
4468 			break;
4469 		}
4470 
4471 		if (!haveit) {
4472 			if ((info->si_signo == SIGSEGV) ||
4473 			    (info->si_signo == SIGBUS)) {
4474 				doio_fprintf(stderr,
4475 					     "signal_info  si_signo %d si_errno %d si_code = %d  si_addr=%p  active_mmap_rw=%d havesigint=%d\n",
4476 					     info->si_signo, info->si_errno,
4477 					     info->si_code, info->si_addr,
4478 					     active_mmap_rw, havesigint);
4479 				haveit = 1;
4480 			}
4481 		}
4482 
4483 		if (!haveit) {
4484 			doio_fprintf(stderr,
4485 				     "signal_info: si_signo %d si_errno %d unknown code %d\n",
4486 				     info->si_signo, info->si_errno,
4487 				     info->si_code);
4488 		}
4489 	} else {
4490 		doio_fprintf(stderr, "signal_info: sig %d\n", sig);
4491 	}
4492 }
4493 
cleanup_handler(int sig,siginfo_t * info,void * v)4494 void cleanup_handler(int sig, siginfo_t * info, void *v)
4495 {
4496 	havesigint = 1;		/* in case there's a followup signal */
4497 	/*signal_info(sig, info, v); *//* be quiet on "normal" kill */
4498 	alloc_mem(-1);
4499 	exit(0);
4500 }
4501 
die_handler(int sig,siginfo_t * info,void * v)4502 void die_handler(int sig, siginfo_t * info, void *v)
4503 {
4504 	doio_fprintf(stderr, "terminating on signal %d\n", sig);
4505 	signal_info(sig, info, v);
4506 	alloc_mem(-1);
4507 	exit(1);
4508 }
4509 
sigbus_handler(int sig,siginfo_t * info,void * v)4510 void sigbus_handler(int sig, siginfo_t * info, void *v)
4511 {
4512 	/* While we are doing a memcpy to/from an mmapped region we can
4513 	   get a SIGBUS for a variety of reasons--and not all of them
4514 	   should be considered failures.
4515 
4516 	   Under normal conditions if we get a SIGINT it means we've been
4517 	   told to shutdown.  However, if we're currently doing the above-
4518 	   mentioned memcopy then the kernel will follow that SIGINT with
4519 	   a SIGBUS.  We can guess that we're in this situation by seeing
4520 	   that the si_errno field in the siginfo structure has EINTR as
4521 	   an errno.  (We might make the guess stronger by looking at the
4522 	   si_addr field to see that it's not faulting off the end of the
4523 	   mmapped region, but it seems that in such a case havesigint
4524 	   would not have been set so maybe that doesn't make the guess
4525 	   stronger.)
4526 	 */
4527 
4528 	if (active_mmap_rw && havesigint && (info->si_errno == EINTR)) {
4529 		cleanup_handler(sig, info, v);
4530 	} else {
4531 		die_handler(sig, info, v);
4532 	}
4533 }
4534 #else
4535 
cleanup_handler(int sig)4536 void cleanup_handler(int sig)
4537 {
4538 	havesigint = 1;		/* in case there's a followup signal */
4539 	alloc_mem(-1);
4540 	exit(0);
4541 }
4542 
die_handler(int sig)4543 void die_handler(int sig)
4544 {
4545 	doio_fprintf(stderr, "terminating on signal %d\n", sig);
4546 	alloc_mem(-1);
4547 	exit(1);
4548 }
4549 
4550 #ifndef CRAY
sigbus_handler(int sig)4551 void sigbus_handler(int sig)
4552 {
4553 	/* See sigbus_handler() in the 'ifdef sgi' case for details.  Here,
4554 	   we don't have the siginfo stuff so the guess is weaker but we'll
4555 	   do it anyway.
4556 	 */
4557 
4558 	if (active_mmap_rw && havesigint)
4559 		cleanup_handler(sig);
4560 	else
4561 		die_handler(sig);
4562 }
4563 #endif /* !CRAY */
4564 #endif /* sgi */
4565 
noop_handler(int sig)4566 void noop_handler(int sig)
4567 {
4568 	return;
4569 }
4570 
4571 /*
4572  * SIGINT handler for the parent (original doio) process.  It simply sends
4573  * a SIGINT to all of the doio children.  Since they're all in the same
4574  * pgrp, this can be done with a single kill().
4575  */
4576 
sigint_handler(int sig)4577 void sigint_handler(int sig)
4578 {
4579 	int i;
4580 
4581 	for (i = 0; i < Nchildren; i++) {
4582 		if (Children[i] != -1) {
4583 			kill(Children[i], SIGINT);
4584 		}
4585 	}
4586 }
4587 
4588 /*
4589  * Signal handler used to inform a process when async io completes.  Referenced
4590  * in do_read() and do_write().  Note that the signal handler is not
4591  * re-registered.
4592  */
4593 
aio_handler(int sig)4594 void aio_handler(int sig)
4595 {
4596 	unsigned int i;
4597 	struct aio_info *aiop;
4598 
4599 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4600 		aiop = &Aio_Info[i];
4601 
4602 		if (aiop->strategy == A_SIGNAL && aiop->sig == sig) {
4603 			aiop->signalled++;
4604 
4605 			if (aio_done(aiop)) {
4606 				aiop->done++;
4607 			}
4608 		}
4609 	}
4610 }
4611 
4612 /*
4613  * dump info on all open aio slots
4614  */
dump_aio(void)4615 void dump_aio(void)
4616 {
4617 	unsigned int i, count;
4618 
4619 	count = 0;
4620 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4621 		if (Aio_Info[i].busy) {
4622 			count++;
4623 			fprintf(stderr,
4624 				"Aio_Info[%03d] id=%d fd=%d signal=%d signaled=%d\n",
4625 				i, Aio_Info[i].id,
4626 				Aio_Info[i].fd,
4627 				Aio_Info[i].sig, Aio_Info[i].signalled);
4628 			fprintf(stderr, "\tstrategy=%s\n",
4629 				format_strat(Aio_Info[i].strategy));
4630 		}
4631 	}
4632 	fprintf(stderr, "%d active async i/os\n", count);
4633 }
4634 
4635 #ifdef sgi
4636 /*
4637  * Signal handler called as a callback, not as a signal.
4638  * 'val' is the value from sigev_value and is assumed to be the
4639  * Aio_Info[] index.
4640  */
cb_handler(sigval_t val)4641 void cb_handler(sigval_t val)
4642 {
4643 	struct aio_info *aiop;
4644 
4645 /*printf("cb_handler requesting slot %d\n", val.sival_int);*/
4646 	aiop = aio_slot(val.sival_int);
4647 /*printf("cb_handler, aiop=%p\n", aiop);*/
4648 
4649 /*printf("%d in cb_handler\n", getpid() );*/
4650 	if (aiop->strategy == A_CALLBACK) {
4651 		aiop->signalled++;
4652 
4653 		if (aio_done(aiop)) {
4654 			aiop->done++;
4655 		}
4656 	}
4657 }
4658 #endif
4659 
aio_slot(int aio_id)4660 struct aio_info *aio_slot(int aio_id)
4661 {
4662 	unsigned int i;
4663 	static int id = 1;
4664 	struct aio_info *aiop;
4665 
4666 	aiop = NULL;
4667 
4668 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4669 		if (aio_id == -1) {
4670 			if (!Aio_Info[i].busy) {
4671 				aiop = &Aio_Info[i];
4672 				aiop->busy = 1;
4673 				aiop->id = id++;
4674 				break;
4675 			}
4676 		} else {
4677 			if (Aio_Info[i].busy && Aio_Info[i].id == aio_id) {
4678 				aiop = &Aio_Info[i];
4679 				break;
4680 			}
4681 		}
4682 	}
4683 
4684 	if (aiop == NULL) {
4685 		doio_fprintf(stderr, "aio_slot(%d) not found.  Request %d\n",
4686 			     aio_id, Reqno);
4687 		dump_aio();
4688 		alloc_mem(-1);
4689 		exit(E_INTERNAL);
4690 	}
4691 
4692 	return aiop;
4693 }
4694 
aio_register(int fd,int strategy,int sig)4695 int aio_register(int fd, int strategy, int sig)
4696 {
4697 	struct aio_info *aiop;
4698 	struct sigaction sa;
4699 
4700 	aiop = aio_slot(-1);
4701 
4702 	aiop->fd = fd;
4703 	aiop->strategy = strategy;
4704 	aiop->done = 0;
4705 #ifdef CRAY
4706 	memset((char *)&aiop->iosw, 0x00, sizeof(aiop->iosw));
4707 #endif
4708 
4709 	if (strategy == A_SIGNAL) {
4710 		aiop->sig = sig;
4711 		aiop->signalled = 0;
4712 
4713 		sa.sa_handler = aio_handler;
4714 		sa.sa_flags = 0;
4715 		sigemptyset(&sa.sa_mask);
4716 
4717 		sigaction(sig, &sa, &aiop->osa);
4718 	} else {
4719 		aiop->sig = -1;
4720 		aiop->signalled = 0;
4721 	}
4722 
4723 	return aiop->id;
4724 }
4725 
aio_unregister(int aio_id)4726 int aio_unregister(int aio_id)
4727 {
4728 	struct aio_info *aiop;
4729 
4730 	aiop = aio_slot(aio_id);
4731 
4732 	if (aiop->strategy == A_SIGNAL) {
4733 		sigaction(aiop->sig, &aiop->osa, NULL);
4734 	}
4735 
4736 	aiop->busy = 0;
4737 	return 0;
4738 }
4739 
4740 #ifndef __linux__
aio_wait(int aio_id)4741 int aio_wait(int aio_id)
4742 {
4743 #ifdef RECALL_SIZEOF
4744 	long mask[RECALL_SIZEOF];
4745 #endif
4746 	sigset_t signalset;
4747 	struct aio_info *aiop;
4748 #ifdef CRAY
4749 	struct iosw *ioswlist[1];
4750 #endif
4751 #ifdef sgi
4752 	const aiocb_t *aioary[1];
4753 #endif
4754 	int r, cnt;
4755 
4756 	aiop = aio_slot(aio_id);
4757 /*printf("%d aiop B =%p\n", getpid(), aiop);*/
4758 
4759 	switch (aiop->strategy) {
4760 	case A_POLL:
4761 		while (!aio_done(aiop)) ;
4762 		break;
4763 
4764 	case A_SIGNAL:
4765 		sigemptyset(&signalset);
4766 		sighold(aiop->sig);
4767 
4768 		while (!aiop->signalled || !aiop->done) {
4769 			sigsuspend(&signalset);
4770 			sighold(aiop->sig);
4771 		}
4772 		break;
4773 
4774 #ifdef CRAY
4775 	case A_RECALL:
4776 		ioswlist[0] = &aiop->iosw;
4777 		if (recall(aiop->fd, 1, ioswlist) < 0) {
4778 			doio_fprintf(stderr, "recall() failed:  %s (%d)\n",
4779 				     SYSERR, errno);
4780 			exit(E_SETUP);
4781 		}
4782 		break;
4783 
4784 #ifdef RECALL_SIZEOF
4785 
4786 	case A_RECALLA:
4787 		RECALL_INIT(mask);
4788 		RECALL_SET(mask, aiop->fd);
4789 		if (recalla(mask) < 0) {
4790 			doio_fprintf(stderr, "recalla() failed:  %s (%d)\n",
4791 				     SYSERR, errno);
4792 			exit(E_SETUP);
4793 		}
4794 
4795 		RECALL_CLR(mask, aiop->fd);
4796 		break;
4797 #endif
4798 
4799 	case A_RECALLS:
4800 		ioswlist[0] = &aiop->iosw;
4801 		if (recalls(1, ioswlist) < 0) {
4802 			doio_fprintf(stderr, "recalls failed:  %s (%d)\n",
4803 				     SYSERR, errno);
4804 			exit(E_SETUP);
4805 		}
4806 		break;
4807 #endif /* CRAY */
4808 
4809 #ifdef sgi
4810 	case A_CALLBACK:
4811 		aioary[0] = &aiop->aiocb;
4812 		cnt = 0;
4813 		do {
4814 			r = aio_suspend(aioary, 1, NULL);
4815 			if (r == -1) {
4816 				doio_fprintf(stderr,
4817 					     "aio_suspend failed: %s (%d)\n",
4818 					     SYSERR, errno);
4819 				exit(E_SETUP);
4820 			}
4821 			cnt++;
4822 		} while (aiop->done == 0);
4823 
4824 #if 0
4825 		/*
4826 		 * after having this set for a while, I've decided that
4827 		 * it's too noisy
4828 		 */
4829 		if (cnt > 1)
4830 			doio_fprintf(stderr,
4831 				     "aio_wait: callback wait took %d tries\n",
4832 				     cnt);
4833 #endif
4834 
4835 		/*
4836 		 * Note: cb_handler already calls aio_done
4837 		 */
4838 		break;
4839 
4840 	case A_SUSPEND:
4841 		aioary[0] = &aiop->aiocb;
4842 		r = aio_suspend(aioary, 1, NULL);
4843 		if (r == -1) {
4844 			doio_fprintf(stderr, "aio_suspend failed: %s (%d)\n",
4845 				     SYSERR, errno);
4846 			exit(E_SETUP);
4847 		}
4848 
4849 		aio_done(aiop);
4850 		break;
4851 #endif
4852 	}
4853 
4854 /*printf("aio_wait: errno %d return %d\n", aiop->aio_errno, aiop->aio_ret);*/
4855 
4856 	return 0;
4857 }
4858 #endif /* !linux */
4859 
4860 /*
4861  * Format specified time into HH:MM:SS format.  t is the time to format
4862  * in seconds (as returned from time(2)).
4863  */
4864 
hms(time_t t)4865 char *hms(time_t t)
4866 {
4867 	static char ascii_time[9];
4868 	struct tm *ltime;
4869 
4870 	ltime = localtime(&t);
4871 	strftime(ascii_time, sizeof(ascii_time), "%H:%M:%S", ltime);
4872 
4873 	return ascii_time;
4874 }
4875 
4876 /*
4877  * Simple routine to check if an async io request has completed.
4878  */
4879 
aio_done(struct aio_info * ainfo)4880 int aio_done(struct aio_info *ainfo)
4881 {
4882 #ifdef CRAY
4883 	return ainfo->iosw.sw_flag;
4884 #endif
4885 
4886 #ifdef sgi
4887 	if ((ainfo->aio_errno = aio_error(&ainfo->aiocb)) == -1) {
4888 		doio_fprintf(stderr, "aio_done: aio_error failed: %s (%d)\n",
4889 			     SYSERR, errno);
4890 		exit(E_SETUP);
4891 	}
4892 	/*printf("%d aio_done aio_errno=%d\n", getpid(), ainfo->aio_errno); */
4893 	if (ainfo->aio_errno != EINPROGRESS) {
4894 		if ((ainfo->aio_ret = aio_return(&ainfo->aiocb)) == -1) {
4895 			doio_fprintf(stderr,
4896 				     "aio_done: aio_return failed: %s (%d)\n",
4897 				     SYSERR, errno);
4898 			exit(E_SETUP);
4899 		}
4900 	}
4901 
4902 	return (ainfo->aio_errno != EINPROGRESS);
4903 #else
4904 	return -1;		/* invalid */
4905 #endif
4906 }
4907 
4908 /*
4909  * Routine to handle upanic() - it first attempts to set the panic flag.  If
4910  * the flag cannot be set, an error message is issued.  A call to upanic
4911  * with PA_PANIC is then done unconditionally, in case the panic flag was set
4912  * from outside the program (as with the panic(8) program).
4913  *
4914  * Note - we only execute the upanic code if -U was used, and the passed in
4915  * mask is set in the Upanic_Conditions bitmask.
4916  */
4917 
doio_upanic(int mask)4918 void doio_upanic(int mask)
4919 {
4920 	if (U_opt == 0 || (mask & Upanic_Conditions) == 0) {
4921 		return;
4922 	}
4923 #ifdef CRAY
4924 	if (upanic(PA_SET) < 0) {
4925 		doio_fprintf(stderr,
4926 			     "WARNING - Could not set the panic flag - upanic(PA_SET) failed:  %s (%d)\n",
4927 			     SYSERR, errno);
4928 	}
4929 
4930 	upanic(PA_PANIC);
4931 #endif
4932 #ifdef sgi
4933 	syssgi(1005);		/* syssgi test panic - DEBUG kernels only */
4934 #endif
4935 	doio_fprintf(stderr, "WARNING - upanic() failed\n");
4936 }
4937 
4938 /*
4939  * Parse cmdline options/arguments and set appropriate global variables.
4940  * If the cmdline is valid, return 0 to caller.  Otherwise exit with a status
4941  * of 1.
4942  */
4943 
parse_cmdline(int argc,char ** argv,char * opts)4944 int parse_cmdline(int argc, char **argv, char *opts)
4945 {
4946 	int c;
4947 	char cc, *cp = NULL, *tok = NULL;
4948 	extern int opterr;
4949 	extern int optind;
4950 	extern char *optarg;
4951 	struct smap *s;
4952 	char *memargs[NMEMALLOC];
4953 	int nmemargs, ma;
4954 
4955 	if (*argv[0] == '-') {
4956 		argv[0]++;
4957 		Execd = 1;
4958 	}
4959 
4960 	if ((Prog = strrchr(argv[0], '/')) == NULL) {
4961 		Prog = argv[0];
4962 	} else {
4963 		Prog++;
4964 	}
4965 
4966 	opterr = 0;
4967 	while ((c = getopt(argc, argv, opts)) != EOF) {
4968 		switch ((char)c) {
4969 		case 'a':
4970 			a_opt++;
4971 			break;
4972 
4973 		case 'C':
4974 			C_opt++;
4975 			for (s = checkmap; s->string != NULL; s++)
4976 				if (!strcmp(s->string, optarg))
4977 					break;
4978 			if (s->string == NULL && tok != NULL) {
4979 				fprintf(stderr,
4980 					"%s%s:  Illegal -C arg (%s).  Must be one of: ",
4981 					Prog, TagName, tok);
4982 
4983 				for (s = checkmap; s->string != NULL; s++)
4984 					fprintf(stderr, "%s ", s->string);
4985 				fprintf(stderr, "\n");
4986 				exit(1);
4987 			}
4988 
4989 			switch (s->value) {
4990 			case C_DEFAULT:
4991 				Data_Fill = doio_pat_fill;
4992 				Data_Check = doio_pat_check;
4993 				break;
4994 			default:
4995 				fprintf(stderr,
4996 					"%s%s:  Unrecognised -C arg '%s' %d",
4997 					Prog, TagName, s->string, s->value);
4998 				exit(1);
4999 			}
5000 			break;
5001 
5002 		case 'd':	/* delay between i/o ops */
5003 			parse_delay(optarg);
5004 			break;
5005 
5006 		case 'e':
5007 			if (Npes > 1 && Nprocs > 1) {
5008 				fprintf(stderr,
5009 					"%s%s:  Warning - Program is a multi-pe application - exec option is ignored.\n",
5010 					Prog, TagName);
5011 			}
5012 			e_opt++;
5013 			break;
5014 
5015 		case 'h':
5016 			help(stdout);
5017 			exit(0);
5018 			break;
5019 
5020 		case 'k':
5021 			k_opt++;
5022 			break;
5023 
5024 		case 'm':
5025 			Message_Interval = strtol(optarg, &cp, 10);
5026 			if (*cp != '\0' || Message_Interval < 0) {
5027 				fprintf(stderr,
5028 					"%s%s:  Illegal -m arg (%s):  Must be an integer >= 0\n",
5029 					Prog, TagName, optarg);
5030 				exit(1);
5031 			}
5032 			m_opt++;
5033 			break;
5034 
5035 		case 'M':	/* memory allocation types */
5036 #ifndef CRAY
5037 			nmemargs = string_to_tokens(optarg, memargs, 32, ",");
5038 			for (ma = 0; ma < nmemargs; ma++) {
5039 				parse_memalloc(memargs[ma]);
5040 			}
5041 			/*dump_memalloc(); */
5042 #else
5043 			fprintf(stderr,
5044 				"%s%s: Error: -M isn't supported on this platform\n",
5045 				Prog, TagName);
5046 			exit(1);
5047 #endif
5048 			M_opt++;
5049 			break;
5050 
5051 		case 'N':
5052 			sprintf(TagName, "(%.39s)", optarg);
5053 			break;
5054 
5055 		case 'n':
5056 			Nprocs = strtol(optarg, &cp, 10);
5057 			if (*cp != '\0' || Nprocs < 1) {
5058 				fprintf(stderr,
5059 					"%s%s:  Illegal -n arg (%s):  Must be integer > 0\n",
5060 					Prog, TagName, optarg);
5061 				exit(E_USAGE);
5062 			}
5063 
5064 			if (Npes > 1 && Nprocs > 1) {
5065 				fprintf(stderr,
5066 					"%s%s:  Program has been built as a multi-pe app.  -n1 is the only nprocs value allowed\n",
5067 					Prog, TagName);
5068 				exit(E_SETUP);
5069 			}
5070 			n_opt++;
5071 			break;
5072 
5073 		case 'r':
5074 			Release_Interval = strtol(optarg, &cp, 10);
5075 			if (*cp != '\0' || Release_Interval < 0) {
5076 				fprintf(stderr,
5077 					"%s%s:  Illegal -r arg (%s):  Must be integer >= 0\n",
5078 					Prog, TagName, optarg);
5079 				exit(E_USAGE);
5080 			}
5081 
5082 			r_opt++;
5083 			break;
5084 
5085 		case 'w':
5086 			Write_Log = optarg;
5087 			w_opt++;
5088 			break;
5089 
5090 		case 'v':
5091 			v_opt++;
5092 			break;
5093 
5094 		case 'V':
5095 			if (strcasecmp(optarg, "sync") == 0) {
5096 				Validation_Flags = O_SYNC;
5097 			} else if (strcasecmp(optarg, "buffered") == 0) {
5098 				Validation_Flags = 0;
5099 #ifdef CRAY
5100 			} else if (strcasecmp(optarg, "parallel") == 0) {
5101 				Validation_Flags = O_PARALLEL;
5102 			} else if (strcasecmp(optarg, "ldraw") == 0) {
5103 				Validation_Flags = O_LDRAW;
5104 			} else if (strcasecmp(optarg, "raw") == 0) {
5105 				Validation_Flags = O_RAW;
5106 #endif
5107 #ifdef sgi
5108 			} else if (strcasecmp(optarg, "direct") == 0) {
5109 				Validation_Flags = O_DIRECT;
5110 #endif
5111 			} else {
5112 				if (sscanf
5113 				    (optarg, "%i%c", &Validation_Flags,
5114 				     &cc) != 1) {
5115 					fprintf(stderr,
5116 						"%s:  Invalid -V argument (%s) - must be a decimal, hex, or octal\n",
5117 						Prog, optarg);
5118 					fprintf(stderr,
5119 						"    number, or one of the following strings:  'sync',\n");
5120 					fprintf(stderr,
5121 						"    'buffered', 'parallel', 'ldraw', or 'raw'\n");
5122 					exit(E_USAGE);
5123 				}
5124 			}
5125 			V_opt++;
5126 			break;
5127 		case 'U':
5128 			tok = strtok(optarg, ",");
5129 			while (tok != NULL) {
5130 				for (s = Upanic_Args; s->string != NULL; s++)
5131 					if (strcmp(s->string, tok) == 0)
5132 						break;
5133 
5134 				if (s->string == NULL) {
5135 					fprintf(stderr,
5136 						"%s%s:  Illegal -U arg (%s).  Must be one of: ",
5137 						Prog, TagName, tok);
5138 
5139 					for (s = Upanic_Args; s->string != NULL;
5140 					     s++)
5141 						fprintf(stderr, "%s ",
5142 							s->string);
5143 
5144 					fprintf(stderr, "\n");
5145 
5146 					exit(1);
5147 				}
5148 
5149 				Upanic_Conditions |= s->value;
5150 				tok = strtok(NULL, ",");
5151 			}
5152 
5153 			U_opt++;
5154 			break;
5155 
5156 		case '?':
5157 			usage(stderr);
5158 			exit(E_USAGE);
5159 			break;
5160 		}
5161 	}
5162 
5163 	/*
5164 	 * Supply defaults
5165 	 */
5166 
5167 	if (!C_opt) {
5168 		Data_Fill = doio_pat_fill;
5169 		Data_Check = doio_pat_check;
5170 	}
5171 
5172 	if (!U_opt)
5173 		Upanic_Conditions = 0;
5174 
5175 	if (!n_opt)
5176 		Nprocs = 1;
5177 
5178 	if (!r_opt)
5179 		Release_Interval = DEF_RELEASE_INTERVAL;
5180 
5181 	if (!M_opt) {
5182 		Memalloc[Nmemalloc].memtype = MEM_DATA;
5183 		Memalloc[Nmemalloc].flags = 0;
5184 		Memalloc[Nmemalloc].name = NULL;
5185 		Memalloc[Nmemalloc].space = NULL;
5186 		Nmemalloc++;
5187 	}
5188 
5189 	/*
5190 	 * Initialize input stream
5191 	 */
5192 
5193 	if (argc == optind) {
5194 		Infile = NULL;
5195 	} else {
5196 		Infile = argv[optind++];
5197 	}
5198 
5199 	if (argc != optind) {
5200 		usage(stderr);
5201 		exit(E_USAGE);
5202 	}
5203 
5204 	return 0;
5205 }
5206 
5207 /*
5208  * Parse memory allocation types
5209  *
5210  * Types are:
5211  *  Data
5212  *  T3E-shmem:blksize[:nblks]
5213  *  SysV-shmem:shmid:blksize:nblks
5214  *	if shmid is "private", use IPC_PRIVATE
5215  *	and nblks is not required
5216  *
5217  *  mmap:flags:filename:blksize[:nblks]
5218  *   flags are one of:
5219  *	p - private (MAP_PRIVATE)
5220  *	a - private, MAP_AUTORESRV
5221  *	l - local (MAP_LOCAL)
5222  *	s - shared (nblks required)
5223  *
5224  *   plus any of:
5225  *	f - fixed address (MAP_FIXED)
5226  *	A - use an address without MAP_FIXED
5227  *	a - autogrow (map once at startup)
5228  *
5229  *  mmap:flags:devzero
5230  *	mmap /dev/zero  (shared not allowd)
5231  *	maps the first 4096 bytes of /dev/zero
5232  *
5233  * - put a directory at the beginning of the shared
5234  *   regions saying what pid has what region.
5235  *	DIRMAGIC
5236  *	BLKSIZE
5237  *	NBLKS
5238  *	nblks worth of directories - 1 int pids
5239  */
5240 #ifndef CRAY
parse_memalloc(char * arg)5241 void parse_memalloc(char *arg)
5242 {
5243 	char *allocargs[NMEMALLOC];
5244 	int nalloc;
5245 	struct memalloc *M;
5246 
5247 	if (Nmemalloc >= NMEMALLOC) {
5248 		doio_fprintf(stderr, "Error - too many memory types (%d).\n",
5249 			     Nmemalloc);
5250 		return;
5251 	}
5252 
5253 	M = &Memalloc[Nmemalloc];
5254 
5255 	nalloc = string_to_tokens(arg, allocargs, 32, ":");
5256 	if (!strcmp(allocargs[0], "data")) {
5257 		M->memtype = MEM_DATA;
5258 		M->flags = 0;
5259 		M->name = NULL;
5260 		M->space = NULL;
5261 		Nmemalloc++;
5262 		if (nalloc >= 2) {
5263 			if (strchr(allocargs[1], 'p'))
5264 				M->flags |= MEMF_MPIN;
5265 		}
5266 	} else if (!strcmp(allocargs[0], "mmap")) {
5267 		/* mmap:flags:filename[:size] */
5268 		M->memtype = MEM_MMAP;
5269 		M->flags = 0;
5270 		M->space = NULL;
5271 		if (nalloc >= 1) {
5272 			if (strchr(allocargs[1], 'p'))
5273 				M->flags |= MEMF_PRIVATE;
5274 			if (strchr(allocargs[1], 'a'))
5275 				M->flags |= MEMF_AUTORESRV;
5276 			if (strchr(allocargs[1], 'l'))
5277 				M->flags |= MEMF_LOCAL;
5278 			if (strchr(allocargs[1], 's'))
5279 				M->flags |= MEMF_SHARED;
5280 
5281 			if (strchr(allocargs[1], 'f'))
5282 				M->flags |= MEMF_FIXADDR;
5283 			if (strchr(allocargs[1], 'A'))
5284 				M->flags |= MEMF_ADDR;
5285 			if (strchr(allocargs[1], 'G'))
5286 				M->flags |= MEMF_AUTOGROW;
5287 
5288 			if (strchr(allocargs[1], 'U'))
5289 				M->flags |= MEMF_FILE;
5290 		} else {
5291 			M->flags |= MEMF_PRIVATE;
5292 		}
5293 
5294 		if (nalloc > 2) {
5295 			if (!strcmp(allocargs[2], "devzero")) {
5296 				M->name = "/dev/zero";
5297 				if (M->flags &
5298 				    ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
5299 					M->flags |= MEMF_PRIVATE;
5300 			} else {
5301 				M->name = allocargs[2];
5302 			}
5303 		} else {
5304 			M->name = "/dev/zero";
5305 			if (M->flags & ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
5306 				M->flags |= MEMF_PRIVATE;
5307 		}
5308 		Nmemalloc++;
5309 
5310 	} else if (!strcmp(allocargs[0], "shmem")) {
5311 		/* shmem:shmid:size */
5312 		M->memtype = MEM_SHMEM;
5313 		M->flags = 0;
5314 		M->space = NULL;
5315 		if (nalloc >= 2) {
5316 			M->name = allocargs[1];
5317 		} else {
5318 			M->name = NULL;
5319 		}
5320 		if (nalloc >= 3) {
5321 			sscanf(allocargs[2], "%i", &M->nblks);
5322 		} else {
5323 			M->nblks = 0;
5324 		}
5325 		if (nalloc >= 4) {
5326 			if (strchr(allocargs[3], 'p'))
5327 				M->flags |= MEMF_MPIN;
5328 		}
5329 
5330 		Nmemalloc++;
5331 	} else {
5332 		doio_fprintf(stderr, "Error - unknown memory type '%s'.\n",
5333 			     allocargs[0]);
5334 		exit(1);
5335 	}
5336 }
5337 
dump_memalloc(void)5338 void dump_memalloc(void)
5339 {
5340 	int ma;
5341 	char *mt;
5342 
5343 	if (Nmemalloc == 0) {
5344 		printf("No memory allocation strategies devined\n");
5345 		return;
5346 	}
5347 
5348 	for (ma = 0; ma < Nmemalloc; ma++) {
5349 		switch (Memalloc[ma].memtype) {
5350 		case MEM_DATA:
5351 			mt = "data";
5352 			break;
5353 		case MEM_SHMEM:
5354 			mt = "shmem";
5355 			break;
5356 		case MEM_MMAP:
5357 			mt = "mmap";
5358 			break;
5359 		default:
5360 			mt = "unknown";
5361 			break;
5362 		}
5363 		printf("mstrat[%d] = %d %s\n", ma, Memalloc[ma].memtype, mt);
5364 		printf("\tflags=%#o name='%s' nblks=%d\n",
5365 		       Memalloc[ma].flags,
5366 		       Memalloc[ma].name, Memalloc[ma].nblks);
5367 	}
5368 }
5369 
5370 #endif /* !CRAY */
5371 
5372 /*
5373  * -d <op>:<time> - doio inter-operation delay
5374  *	currently this permits ONE type of delay between operations.
5375  */
5376 
parse_delay(char * arg)5377 void parse_delay(char *arg)
5378 {
5379 	char *delayargs[NMEMALLOC];
5380 	int ndelay;
5381 	struct smap *s;
5382 
5383 	ndelay = string_to_tokens(arg, delayargs, 32, ":");
5384 	if (ndelay < 2) {
5385 		doio_fprintf(stderr,
5386 			     "Illegal delay arg (%s). Must be operation:time\n",
5387 			     arg);
5388 		exit(1);
5389 	}
5390 	for (s = delaymap; s->string != NULL; s++)
5391 		if (!strcmp(s->string, delayargs[0]))
5392 			break;
5393 	if (s->string == NULL) {
5394 		fprintf(stderr,
5395 			"Illegal Delay arg (%s).  Must be one of: ", arg);
5396 
5397 		for (s = delaymap; s->string != NULL; s++)
5398 			fprintf(stderr, "%s ", s->string);
5399 		fprintf(stderr, "\n");
5400 		exit(1);
5401 	}
5402 
5403 	delayop = s->value;
5404 
5405 	sscanf(delayargs[1], "%i", &delaytime);
5406 
5407 	if (ndelay > 2) {
5408 		fprintf(stderr, "Warning: extra delay arguments ignored.\n");
5409 	}
5410 }
5411 
5412 /*
5413  * Usage clause - obvious
5414  */
5415 
usage(FILE * stream)5416 int usage(FILE * stream)
5417 {
5418 	/*
5419 	 * Only do this if we are on vpe 0, to avoid seeing it from every
5420 	 * process in the application.
5421 	 */
5422 
5423 	if (Npes > 1 && Vpe != 0) {
5424 		return 0;
5425 	}
5426 
5427 	fprintf(stream,
5428 		"usage%s:  %s [-aekv] [-m message_interval] [-n nprocs] [-r release_interval] [-w write_log] [-V validation_ftype] [-U upanic_cond] [infile]\n",
5429 		TagName, Prog);
5430 	return 0;
5431 }
5432 
help(FILE * stream)5433 void help(FILE * stream)
5434 {
5435 	/*
5436 	 * Only the app running on vpe 0 gets to issue help - this prevents
5437 	 * everybody in the application from doing this.
5438 	 */
5439 
5440 	if (Npes > 1 && Vpe != 0) {
5441 		return;
5442 	}
5443 
5444 	usage(stream);
5445 	fprintf(stream, "\n");
5446 	fprintf(stream,
5447 		"\t-a                   abort - kill all doio processes on data compare\n");
5448 	fprintf(stream,
5449 		"\t                     errors.  Normally only the erroring process exits\n");
5450 	fprintf(stream, "\t-C data-pattern-type \n");
5451 	fprintf(stream,
5452 		"\t                     Available data patterns are:\n");
5453 	fprintf(stream, "\t                     default - repeating pattern\n");
5454 	fprintf(stream, "\t-d Operation:Time    Inter-operation delay.\n");
5455 	fprintf(stream, "\t                     Operations are:\n");
5456 	fprintf(stream,
5457 		"\t                         select:time (1 second=1000000)\n");
5458 	fprintf(stream, "\t                         sleep:time (1 second=1)\n");
5459 #ifdef sgi
5460 	fprintf(stream,
5461 		"\t                         sginap:time (1 second=CLK_TCK=100)\n");
5462 #endif
5463 	fprintf(stream, "\t                         alarm:time (1 second=1)\n");
5464 	fprintf(stream,
5465 		"\t-e                   Re-exec children before entering the main\n");
5466 	fprintf(stream,
5467 		"\t                     loop.  This is useful for spreading\n");
5468 	fprintf(stream,
5469 		"\t                     procs around on multi-pe systems.\n");
5470 	fprintf(stream,
5471 		"\t-k                   Lock file regions during writes using fcntl()\n");
5472 	fprintf(stream,
5473 		"\t-v                   Verify writes - this is done by doing a buffered\n");
5474 	fprintf(stream,
5475 		"\t                     read() of the data if file io was done, or\n");
5476 	fprintf(stream,
5477 		"\t                     an ssread()of the data if sds io was done\n");
5478 #ifndef CRAY
5479 	fprintf(stream,
5480 		"\t-M                   Data buffer allocation method\n");
5481 	fprintf(stream, "\t                     alloc-type[,type]\n");
5482 #ifdef sgi
5483 	fprintf(stream, "\t			    data:flags\n");
5484 	fprintf(stream, "\t			        p - mpin buffer\n");
5485 	fprintf(stream, "\t			    shmem:shmid:size:flags\n");
5486 	fprintf(stream, "\t			        p - mpin buffer\n");
5487 #else
5488 	fprintf(stream, "\t			    data\n");
5489 	fprintf(stream, "\t			    shmem:shmid:size\n");
5490 #endif /* sgi */
5491 	fprintf(stream, "\t			    mmap:flags:filename\n");
5492 	fprintf(stream, "\t			        p - private\n");
5493 #ifdef sgi
5494 	fprintf(stream, "\t			        s - shared\n");
5495 	fprintf(stream, "\t			        l - local\n");
5496 	fprintf(stream, "\t			        a - autoresrv\n");
5497 	fprintf(stream, "\t			        G - autogrow\n");
5498 #else
5499 	fprintf(stream,
5500 		"\t			        s - shared (shared file must exist\n"),
5501 	    fprintf(stream,
5502 		    "\t			            and have needed length)\n");
5503 #endif
5504 	fprintf(stream,
5505 		"\t			        f - fixed address (not used)\n");
5506 	fprintf(stream,
5507 		"\t			        a - specify address (not used)\n");
5508 	fprintf(stream,
5509 		"\t			        U - Unlink file when done\n");
5510 	fprintf(stream,
5511 		"\t			        The default flag is private\n");
5512 	fprintf(stream, "\n");
5513 #endif /* !CRAY */
5514 	fprintf(stream,
5515 		"\t-m message_interval  Generate a message every 'message_interval'\n");
5516 	fprintf(stream,
5517 		"\t                     requests.  An interval of 0 suppresses\n");
5518 	fprintf(stream,
5519 		"\t                     messages.  The default is 0.\n");
5520 	fprintf(stream, "\t-N tagname           Tag name, for Monster.\n");
5521 	fprintf(stream, "\t-n nprocs            # of processes to start up\n");
5522 	fprintf(stream,
5523 		"\t-r release_interval  Release all memory and close\n");
5524 	fprintf(stream,
5525 		"\t                     files every release_interval operations.\n");
5526 	fprintf(stream,
5527 		"\t                     By default procs never release memory\n");
5528 	fprintf(stream,
5529 		"\t                     or close fds unless they have to.\n");
5530 	fprintf(stream,
5531 		"\t-V validation_ftype  The type of file descriptor to use for doing data\n");
5532 	fprintf(stream,
5533 		"\t                     validation.  validation_ftype may be an octal,\n");
5534 	fprintf(stream,
5535 		"\t                     hex, or decimal number representing the open()\n");
5536 	fprintf(stream,
5537 		"\t                     flags, or may be one of the following strings:\n");
5538 	fprintf(stream,
5539 		"\t                     'buffered' - validate using bufferd read\n");
5540 	fprintf(stream,
5541 		"\t                     'sync'     - validate using O_SYNC read\n");
5542 #ifdef sgi
5543 	fprintf(stream,
5544 		"\t                     'direct    - validate using O_DIRECT read'\n");
5545 #endif
5546 #ifdef CRAY
5547 	fprintf(stream,
5548 		"\t                     'ldraw'    - validate using O_LDRAW read\n");
5549 	fprintf(stream,
5550 		"\t                     'parallel' - validate using O_PARALLEL read\n");
5551 	fprintf(stream,
5552 		"\t                     'raw'      - validate using O_RAW read\n");
5553 #endif
5554 	fprintf(stream, "\t                     By default, 'parallel'\n");
5555 	fprintf(stream,
5556 		"\t                     is used if the write was done with O_PARALLEL\n");
5557 	fprintf(stream,
5558 		"\t                     or 'buffered' for all other writes.\n");
5559 	fprintf(stream,
5560 		"\t-w write_log         File to log file writes to.  The doio_check\n");
5561 	fprintf(stream,
5562 		"\t                     program can reconstruct datafiles using the\n");
5563 	fprintf(stream,
5564 		"\t                     write_log, and detect if a file is corrupt\n");
5565 	fprintf(stream,
5566 		"\t                     after all procs have exited.\n");
5567 	fprintf(stream,
5568 		"\t-U upanic_cond       Comma separated list of conditions that will\n");
5569 	fprintf(stream,
5570 		"\t                     cause a call to upanic(PA_PANIC).\n");
5571 	fprintf(stream,
5572 		"\t                     'corruption' -> upanic on bad data comparisons\n");
5573 	fprintf(stream,
5574 		"\t                     'iosw'     ---> upanic on unexpected async iosw\n");
5575 	fprintf(stream,
5576 		"\t                     'rval'     ---> upanic on unexpected syscall rvals\n");
5577 	fprintf(stream,
5578 		"\t                     'all'      ---> all of the above\n");
5579 	fprintf(stream, "\n");
5580 	fprintf(stream,
5581 		"\tinfile               Input stream - default is stdin - must be a list\n");
5582 	fprintf(stream,
5583 		"\t                     of io_req structures (see doio.h).  Currently\n");
5584 	fprintf(stream,
5585 		"\t                     only the iogen program generates the proper\n");
5586 	fprintf(stream, "\t                     format\n");
5587 }
5588