1 /*
2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
31 */
32 /*
33 * doio - a general purpose io initiator with system call and
34 * write logging. See doio.h for the structure which defines
35 * what doio requests should look like.
36 *
37 * Currently doio can handle read,write,reada,writea,ssread,
38 * sswrite, and many varieties of listio requests.
39 * For disk io, if the O_SSD flag is set doio will allocate
40 * the appropriate amount of ssd and do the transfer - thus, doio
41 * can handle all of the primitive types of file io.
42 *
43 * programming
44 * notes:
45 * -----------
46 * messages should generally be printed using doio_fprintf().
47 *
48 */
49
50 #include <stdio.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <stdlib.h>
54 #include <signal.h>
55 #include <string.h>
56 #include <ctype.h>
57 #include <unistd.h>
58 #include <time.h>
59 #include <stdarg.h>
60 #include <sys/stat.h>
61 #include <sys/param.h>
62 #include <sys/types.h>
63 #include <sys/sysmacros.h>
64 #ifdef CRAY
65 #include <sys/iosw.h>
66 #endif
67 #ifdef sgi
68 #include <aio.h> /* for aio_read,write */
69 #include <inttypes.h> /* for uint64_t type */
70 #include <siginfo.h> /* signal handlers & SA_SIGINFO */
71 #endif
72 #ifndef CRAY
73 #include <sys/uio.h> /* for struct iovec (readv) */
74 #include <sys/mman.h> /* for mmap(2) */
75 #include <sys/ipc.h> /* for i/o buffer in shared memory */
76 #include <sys/shm.h> /* for i/o buffer in shared memory */
77 #endif
78 #include <sys/wait.h>
79 #ifdef CRAY
80 #include <sys/listio.h>
81 #include <sys/panic.h>
82 #endif
83 #include <sys/time.h> /* for delays */
84
85 #include "doio.h"
86 #include "write_log.h"
87 #include "random_range.h"
88 #include "string_to_tokens.h"
89 #include "pattern.h"
90
91 #define NMEMALLOC 32
92 #define MEM_DATA 1 /* data space */
93 #define MEM_SHMEM 2 /* System V shared memory */
94 #define MEM_T3ESHMEM 3 /* T3E Shared Memory */
95 #define MEM_MMAP 4 /* mmap(2) */
96
97 #define MEMF_PRIVATE 0001
98 #define MEMF_AUTORESRV 0002
99 #define MEMF_LOCAL 0004
100 #define MEMF_SHARED 0010
101
102 #define MEMF_FIXADDR 0100
103 #define MEMF_ADDR 0200
104 #define MEMF_AUTOGROW 0400
105 #define MEMF_FILE 01000 /* regular file -- unlink on close */
106 #define MEMF_MPIN 010000 /* use mpin(2) to lock pages in memory */
107
108 struct memalloc {
109 int memtype;
110 int flags;
111 int nblks;
112 char *name;
113 void *space; /* memory address of allocated space */
114 int fd; /* FD open for mmaping */
115 int size;
116 } Memalloc[NMEMALLOC];
117
118 /*
119 * Structure for maintaining open file test descriptors. Used by
120 * alloc_fd().
121 */
122
123 struct fd_cache {
124 char c_file[MAX_FNAME_LENGTH + 1];
125 int c_oflags;
126 int c_fd;
127 long c_rtc;
128 #ifdef sgi
129 int c_memalign; /* from F_DIOINFO */
130 int c_miniosz;
131 int c_maxiosz;
132 #endif
133 #ifndef CRAY
134 void *c_memaddr; /* mmapped address */
135 int c_memlen; /* length of above region */
136 #endif
137 };
138
139 /*
140 * Name-To-Value map
141 * Used to map cmdline arguments to values
142 */
143 struct smap {
144 char *string;
145 int value;
146 };
147
148 struct aio_info {
149 int busy;
150 int id;
151 int fd;
152 int strategy;
153 volatile int done;
154 #ifdef CRAY
155 struct iosw iosw;
156 #endif
157 #ifdef sgi
158 aiocb_t aiocb;
159 int aio_ret; /* from aio_return */
160 int aio_errno; /* from aio_error */
161 #endif
162 int sig;
163 int signalled;
164 struct sigaction osa;
165 };
166
167 /* ---------------------------------------------------------------------------
168 *
169 * A new paradigm of doing the r/w system call where there is a "stub"
170 * function that builds the info for the system call, then does the system
171 * call; this is called by code that is common to all system calls and does
172 * the syscall return checking, async I/O wait, iosw check, etc.
173 *
174 * Flags:
175 * WRITE, ASYNC, SSD/SDS,
176 * FILE_LOCK, WRITE_LOG, VERIFY_DATA,
177 */
178
179 struct status {
180 int rval; /* syscall return */
181 int err; /* errno */
182 int *aioid; /* list of async I/O structures */
183 };
184
185 struct syscall_info {
186 char *sy_name;
187 int sy_type;
188 struct status *(*sy_syscall) ();
189 int (*sy_buffer) ();
190 char *(*sy_format) ();
191 int sy_flags;
192 int sy_bits;
193 };
194
195 #define SY_WRITE 00001
196 #define SY_ASYNC 00010
197 #define SY_IOSW 00020
198 #define SY_SDS 00100
199
200 #ifndef O_SSD
201 #define O_SSD 0 /* so code compiles on a CRAY2 */
202 #endif
203
204 #ifdef sgi
205 #define UINT64_T uint64_t
206 #else
207 #define UINT64_T unsigned long
208 #endif
209
210 #ifndef O_PARALLEL
211 #define O_PARALLEL 0 /* so O_PARALLEL may be used in expressions */
212 #endif
213
214 #define PPID_CHECK_INTERVAL 5 /* check ppid every <-- iterations */
215 #define MAX_AIO 256 /* maximum number of async I/O ops */
216 #ifdef _CRAYMPP
217 #define MPP_BUMP 16 /* page un-alignment for MPP */
218 #else
219 #define MPP_BUMP 0
220 #endif
221
222 #define SYSERR strerror(errno)
223
224 /*
225 * getopt() string of supported cmdline arguments.
226 */
227
228 #define OPTS "aC:d:ehm:n:kr:w:vU:V:M:N:"
229
230 #define DEF_RELEASE_INTERVAL 0
231
232 /*
233 * Flags set in parse_cmdline() to indicate which options were selected
234 * on the cmdline.
235 */
236
237 int a_opt = 0; /* abort on data compare errors */
238 int e_opt = 0; /* exec() after fork()'ing */
239 int C_opt = 0; /* Data Check Type */
240 int d_opt = 0; /* delay between operations */
241 int k_opt = 0; /* lock file regions during writes */
242 int m_opt = 0; /* generate periodic messages */
243 int n_opt = 0; /* nprocs */
244 int r_opt = 0; /* resource release interval */
245 int w_opt = 0; /* file write log file */
246 int v_opt = 0; /* verify writes if set */
247 int U_opt = 0; /* upanic() on varios conditions */
248 int V_opt = 0; /* over-ride default validation fd type */
249 int M_opt = 0; /* data buffer allocation types */
250 char TagName[40]; /* name of this doio (see Monster) */
251
252 /*
253 * Misc globals initialized in parse_cmdline()
254 */
255
256 char *Prog = NULL; /* set up in parse_cmdline() */
257 int Upanic_Conditions; /* set by args to -U */
258 int Release_Interval; /* arg to -r */
259 int Nprocs; /* arg to -n */
260 char *Write_Log; /* arg to -w */
261 char *Infile; /* input file (defaults to stdin) */
262 int *Children; /* pids of child procs */
263 int Nchildren = 0;
264 int Nsiblings = 0; /* tfork'ed siblings */
265 int Execd = 0;
266 int Message_Interval = 0;
267 int Npes = 0; /* non-zero if built as an mpp multi-pe app */
268 int Vpe = -1; /* Virtual pe number if Npes >= 0 */
269 int Reqno = 1; /* request # - used in some error messages */
270 int Reqskipcnt = 0; /* count of I/O requests that are skipped */
271 int Validation_Flags;
272 char *(*Data_Check) (); /* function to call for data checking */
273 int (*Data_Fill) (); /* function to call for data filling */
274 int Nmemalloc = 0; /* number of memory allocation strategies */
275 int delayop = 0; /* delay between operations - type of delay */
276 int delaytime = 0; /* delay between operations - how long */
277
278 struct wlog_file Wlog;
279
280 int active_mmap_rw = 0; /* Indicates that mmapped I/O is occurring. */
281 /* Used by sigbus_action() in the child doio. */
282 int havesigint = 0;
283
284 #define SKIP_REQ -2 /* skip I/O request */
285
286 /*
287 * Global file descriptors
288 */
289
290 int Wfd_Append; /* for appending to the write-log */
291 int Wfd_Random; /* for overlaying write-log entries */
292
293 #define FD_ALLOC_INCR 32 /* allocate this many fd_map structs */
294 /* at a time */
295
296 /*
297 * Globals for tracking Sds and Core usage
298 */
299
300 char *Memptr; /* ptr to core buffer space */
301 int Memsize; /* # bytes pointed to by Memptr */
302 /* maintained by alloc_mem() */
303
304 int Sdsptr; /* sds offset (always 0) */
305 int Sdssize; /* # bytes of allocated sds space */
306 /* Maintained by alloc_sds() */
307 char Host[16];
308 char Pattern[128];
309 int Pattern_Length;
310
311 /*
312 * Signal handlers, and related globals
313 */
314
315 char *syserrno(int err);
316 void doio(void);
317 void doio_delay(void);
318 char *format_oflags(int oflags);
319 char *format_strat(int strategy);
320 char *format_rw(struct io_req *ioreq, int fd, void *buffer,
321 int signo, char *pattern, void *iosw);
322 #ifdef CRAY
323 char *format_sds(struct io_req *ioreq, void *buffer, int sds char *pattern);
324 #endif /* CRAY */
325
326 int do_read(struct io_req *req);
327 int do_write(struct io_req *req);
328 int lock_file_region(char *fname, int fd, int type, int start, int nbytes);
329
330 #ifdef CRAY
331 char *format_listio(struct io_req *ioreq, int lcmd,
332 struct listreq *list, int nent, int fd, char *pattern);
333 #endif /* CRAY */
334
335 int do_listio(struct io_req *req);
336
337 #if defined(_CRAY1) || defined(CRAY)
338 int do_ssdio(struct io_req *req);
339 #endif /* defined(_CRAY1) || defined(CRAY) */
340
341 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd);
342
343 #ifdef CRAY
344 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc,
345 int fd, char *addr);
346 int listio_mem(struct io_req *req, int offset, int fmstride,
347 int *min, int *max);
348 char *fmt_listio(struct io_req *req, struct syscall_info *sy,
349 int fd, char *addr);
350 #endif /* CRAY */
351
352 #ifdef sgi
353 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc,
354 int fd, char *addr);
355 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc,
356 int fd, char *addr);
357 char *fmt_pread(struct io_req *req, struct syscall_info *sy,
358 int fd, char *addr);
359 #endif /* sgi */
360
361 #ifndef CRAY
362 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc,
363 int fd, char *addr);
364 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc,
365 int fd, char *addr);
366 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc,
367 int fd, char *addr, int rw);
368 char *fmt_readv(struct io_req *req, struct syscall_info *sy,
369 int fd, char *addr);
370 #endif /* !CRAY */
371
372 #ifdef sgi
373 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc,
374 int fd, char *addr);
375 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc,
376 int fd, char *addr)
377 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc,
378 int fd, char *addr, int rw);
379 char *fmt_aread(struct io_req *req, struct syscall_info *sy,
380 int fd, char *addr);
381 #endif /* sgi */
382
383 #ifndef CRAY
384 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc,
385 int fd, char *addr);
386 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc,
387 int fd, char *addr);
388 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc,
389 int fd, char *addr, int rw);
390 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr);
391 #endif /* !CRAY */
392
393 int do_rw(struct io_req *req);
394
395 #ifdef sgi
396 int do_fcntl(struct io_req *req);
397 #endif /* sgi */
398
399 #ifndef CRAY
400 int do_sync(struct io_req *req);
401 #endif /* !CRAY */
402
403 int doio_pat_fill(char *addr, int mem_needed, char *Pattern,
404 int Pattern_Length, int shift);
405 char *doio_pat_check(char *buf, int offset, int length,
406 char *pattern, int pattern_length, int patshift);
407 char *check_file(char *file, int offset, int length, char *pattern,
408 int pattern_length, int patshift, int fsa);
409 int doio_fprintf(FILE * stream, char *format, ...);
410 int alloc_mem(int nbytes);
411
412 #if defined(_CRAY1) || defined(CRAY)
413 int alloc_sds(int nbytes);
414 #endif /* defined(_CRAY1) || defined(CRAY) */
415
416 int alloc_fd(char *file, int oflags);
417 struct fd_cache *alloc_fdcache(char *file, int oflags);
418
419 #ifdef sgi
420 void signal_info(int sig, siginfo_t * info, void *v);
421 void cleanup_handler(int sig, siginfo_t * info, void *v);
422 void die_handler(int sig, siginfo_t * info, void *v);
423 void sigbus_handler(int sig, siginfo_t * info, void *v);
424 #else /* !sgi */
425 void cleanup_handler(int sig);
426 void die_handler(int sig);
427
428 #ifndef CRAY
429 void sigbus_handler(int sig);
430 #endif /* !CRAY */
431 #endif /* sgi */
432
433 void noop_handler(int sig);
434 void sigint_handler(int sig);
435 void aio_handler(int sig);
436 void dump_aio(void);
437
438 #ifdef sgi
439 void cb_handler(sigval_t val);
440 #endif /* sgi */
441
442 struct aio_info *aio_slot(int aio_id);
443 int aio_register(int fd, int strategy, int sig);
444 int aio_unregister(int aio_id);
445
446 #ifndef __linux__
447 int aio_wait(int aio_id);
448 #endif /* !__linux__ */
449
450 char *hms(time_t t);
451 int aio_done(struct aio_info *ainfo);
452 void doio_upanic(int mask);
453 int parse_cmdline(int argc, char **argv, char *opts);
454
455 #ifndef CRAY
456 void parse_memalloc(char *arg);
457 void dump_memalloc(void);
458 #endif /* !CRAY */
459
460 void parse_delay(char *arg);
461 int usage(FILE * stream);
462 void help(FILE * stream);
463
464 /*
465 * Upanic conditions, and a map from symbolics to values
466 */
467
468 #define U_CORRUPTION 0001 /* upanic on data corruption */
469 #define U_IOSW 0002 /* upanic on bad iosw */
470 #define U_RVAL 0004 /* upanic on bad rval */
471
472 #define U_ALL (U_CORRUPTION | U_IOSW | U_RVAL)
473
474 struct smap Upanic_Args[] = {
475 {"corruption", U_CORRUPTION},
476 {"iosw", U_IOSW},
477 {"rval", U_RVAL},
478 {"all", U_ALL},
479 {NULL, 0}
480 };
481
482 struct aio_info Aio_Info[MAX_AIO];
483
484 /* -C data-fill/check type */
485 #define C_DEFAULT 1
486 struct smap checkmap[] = {
487 {"default", C_DEFAULT},
488 {NULL, 0},
489 };
490
491 /* -d option delay types */
492 #define DELAY_SELECT 1
493 #define DELAY_SLEEP 2
494 #define DELAY_SGINAP 3
495 #define DELAY_ALARM 4
496 #define DELAY_ITIMER 5 /* POSIX timer */
497
498 struct smap delaymap[] = {
499 {"select", DELAY_SELECT},
500 {"sleep", DELAY_SLEEP},
501 #ifdef sgi
502 {"sginap", DELAY_SGINAP},
503 #endif
504 {"alarm", DELAY_ALARM},
505 {NULL, 0},
506 };
507
508 /******
509 *
510 * strerror() does similar actions.
511
512 char *
513 syserrno(int err)
514 {
515 static char sys_errno[10];
516 sprintf(sys_errno, "%d", errno);
517 return(sys_errno);
518 }
519
520 ******/
521
main(int argc,char ** argv)522 int main(int argc, char **argv)
523 {
524 int i, pid, stat, ex_stat;
525 #ifdef CRAY
526 sigset_t omask;
527 #elif defined(linux)
528 sigset_t omask, block_mask;
529 #else
530 int omask;
531 #endif
532 struct sigaction sa;
533
534 umask(0); /* force new file modes to known values */
535 #if _CRAYMPP
536 Npes = sysconf(_SC_CRAY_NPES); /* must do this before parse_cmdline */
537 Vpe = sysconf(_SC_CRAY_VPE);
538 #endif
539
540 TagName[0] = '\0';
541 parse_cmdline(argc, argv, OPTS);
542
543 random_range_seed(getpid()); /* initialize random number generator */
544
545 /*
546 * If this is a re-exec of doio, jump directly into the doio function.
547 */
548
549 if (Execd) {
550 doio();
551 exit(E_SETUP);
552 }
553
554 /*
555 * Stop on all but a few signals...
556 */
557 sigemptyset(&sa.sa_mask);
558 sa.sa_handler = sigint_handler;
559 sa.sa_flags = SA_RESETHAND; /* sigint is ignored after the */
560 /* first time */
561 for (i = 1; i <= NSIG; i++) {
562 switch (i) {
563 #ifdef SIGRECOVERY
564 case SIGRECOVERY:
565 break;
566 #endif
567 #ifdef SIGCKPT
568 case SIGCKPT:
569 #endif
570 #ifdef SIGRESTART
571 case SIGRESTART:
572 #endif
573 case SIGTSTP:
574 case SIGSTOP:
575 case SIGCONT:
576 case SIGCLD:
577 case SIGBUS:
578 case SIGSEGV:
579 case SIGQUIT:
580 break;
581 default:
582 sigaction(i, &sa, NULL);
583 }
584 }
585
586 /*
587 * If we're logging write operations, make a dummy call to wlog_open
588 * to initialize the write history file. This call must be done in
589 * the parent, to ensure that the history file exists and/or has
590 * been truncated before any children attempt to open it, as the doio
591 * children are not allowed to truncate the file.
592 */
593
594 if (w_opt) {
595 strcpy(Wlog.w_file, Write_Log);
596
597 if (wlog_open(&Wlog, 1, 0666) < 0) {
598 doio_fprintf(stderr,
599 "Could not create/truncate write log %s\n",
600 Write_Log);
601 exit(2);
602 }
603
604 wlog_close(&Wlog);
605 }
606
607 /*
608 * Malloc space for the children pid array. Initialize all entries
609 * to -1.
610 */
611
612 Children = malloc(sizeof(int) * Nprocs);
613 for (i = 0; i < Nprocs; i++) {
614 Children[i] = -1;
615 }
616
617 sigemptyset(&block_mask);
618 sigaddset(&block_mask, SIGCLD);
619 sigprocmask(SIG_BLOCK, &block_mask, &omask);
620
621 /*
622 * Fork Nprocs. This [parent] process is a watchdog, to notify the
623 * invoker of procs which exit abnormally, and to make sure that all
624 * child procs get cleaned up. If the -e option was used, we will also
625 * re-exec. This is mostly for unicos/mk on mpp's, to ensure that not
626 * all of the doio's don't end up in the same pe.
627 *
628 * Note - if Nprocs is 1, or this doio is a multi-pe app (Npes > 1),
629 * jump directly to doio(). multi-pe apps can't fork(), and there is
630 * no reason to fork() for 1 proc.
631 */
632
633 if (Nprocs == 1 || Npes > 1) {
634 doio();
635 exit(0);
636 } else {
637 for (i = 0; i < Nprocs; i++) {
638 if ((pid = fork()) == -1) {
639 doio_fprintf(stderr,
640 "(parent) Could not fork %d children: %s (%d)\n",
641 i + 1, SYSERR, errno);
642 exit(E_SETUP);
643 }
644
645 Children[Nchildren] = pid;
646 Nchildren++;
647
648 if (pid == 0) {
649 if (e_opt) {
650 char *exec_path;
651
652 exec_path = argv[0];
653 argv[0] = malloc(strlen(exec_path) + 2);
654 sprintf(argv[0], "-%s", exec_path);
655
656 execvp(exec_path, argv);
657 doio_fprintf(stderr,
658 "(parent) Could not execvp %s: %s (%d)\n",
659 exec_path, SYSERR, errno);
660 exit(E_SETUP);
661 } else {
662 doio();
663 exit(E_SETUP);
664 }
665 }
666 }
667
668 /*
669 * Parent spins on wait(), until all children exit.
670 */
671
672 ex_stat = E_NORMAL;
673
674 while (Nprocs) {
675 if ((pid = wait(&stat)) == -1) {
676 if (errno == EINTR)
677 continue;
678 }
679
680 for (i = 0; i < Nchildren; i++)
681 if (Children[i] == pid)
682 Children[i] = -1;
683
684 Nprocs--;
685
686 if (WIFEXITED(stat)) {
687 switch (WEXITSTATUS(stat)) {
688 case E_NORMAL:
689 /* noop */
690 break;
691
692 case E_INTERNAL:
693 doio_fprintf(stderr,
694 "(parent) pid %d exited because of an internal error\n",
695 pid);
696 ex_stat |= E_INTERNAL;
697 break;
698
699 case E_SETUP:
700 doio_fprintf(stderr,
701 "(parent) pid %d exited because of a setup error\n",
702 pid);
703 ex_stat |= E_SETUP;
704 break;
705
706 case E_COMPARE:
707 doio_fprintf(stderr,
708 "(parent) pid %d exited because of data compare errors\n",
709 pid);
710
711 ex_stat |= E_COMPARE;
712
713 if (a_opt)
714 kill(0, SIGINT);
715
716 break;
717
718 case E_USAGE:
719 doio_fprintf(stderr,
720 "(parent) pid %d exited because of a usage error\n",
721 pid);
722
723 ex_stat |= E_USAGE;
724 break;
725
726 default:
727 doio_fprintf(stderr,
728 "(parent) pid %d exited with unknown status %d\n",
729 pid, WEXITSTATUS(stat));
730 ex_stat |= E_INTERNAL;
731 break;
732 }
733 } else if (WIFSIGNALED(stat)
734 && WTERMSIG(stat) != SIGINT) {
735 doio_fprintf(stderr,
736 "(parent) pid %d terminated by signal %d\n",
737 pid, WTERMSIG(stat));
738
739 ex_stat |= E_SIGNAL;
740 }
741
742 fflush(NULL);
743 }
744 }
745
746 exit(ex_stat);
747
748 } /* main */
749
750 /*
751 * main doio function. Each doio child starts here, and never returns.
752 */
753
doio(void)754 void doio(void)
755 {
756 int rval, i, infd, nbytes;
757 char *cp;
758 struct io_req ioreq;
759 struct sigaction sa, def_action, ignore_action, exit_action;
760 #ifndef CRAY
761 struct sigaction sigbus_action;
762 #endif
763
764 Memsize = Sdssize = 0;
765
766 /*
767 * Initialize the Pattern - write-type syscalls will replace Pattern[1]
768 * with the pattern passed in the request. Make sure that
769 * strlen(Pattern) is not mod 16 so that out of order words will be
770 * detected.
771 */
772
773 gethostname(Host, sizeof(Host));
774 if ((cp = strchr(Host, '.')) != NULL)
775 *cp = '\0';
776
777 Pattern_Length = sprintf(Pattern, "-:%d:%s:%s*", getpid(), Host, Prog);
778
779 if (!(Pattern_Length % 16)) {
780 Pattern_Length = sprintf(Pattern, "-:%d:%s:%s**",
781 getpid(), Host, Prog);
782 }
783
784 /*
785 * Open a couple of descriptors for the write-log file. One descriptor
786 * is for appending, one for random access. Write logging is done for
787 * file corruption detection. The program doio_check is capable of
788 * doing corruption detection based on a doio write-log.
789 */
790
791 if (w_opt) {
792
793 strcpy(Wlog.w_file, Write_Log);
794
795 if (wlog_open(&Wlog, 0, 0666) == -1) {
796 doio_fprintf(stderr,
797 "Could not open write log file (%s): wlog_open() failed\n",
798 Write_Log);
799 exit(E_SETUP);
800 }
801 }
802
803 /*
804 * Open the input stream - either a file or stdin
805 */
806
807 if (Infile == NULL) {
808 infd = 0;
809 } else {
810 if ((infd = open(Infile, O_RDWR)) == -1) {
811 doio_fprintf(stderr,
812 "Could not open input file (%s): %s (%d)\n",
813 Infile, SYSERR, errno);
814 exit(E_SETUP);
815 }
816 }
817
818 /*
819 * Define a set of signals that should never be masked. Receipt of
820 * these signals generally indicates a programming error, and we want
821 * a corefile at the point of error. We put SIGQUIT in this list so
822 * that ^\ will force a user core dump.
823 *
824 * Note: the handler for these should be SIG_DFL, all of them
825 * produce a corefile as the default action.
826 */
827
828 ignore_action.sa_handler = SIG_IGN;
829 ignore_action.sa_flags = 0;
830 sigemptyset(&ignore_action.sa_mask);
831
832 def_action.sa_handler = SIG_DFL;
833 def_action.sa_flags = 0;
834 sigemptyset(&def_action.sa_mask);
835
836 #ifdef sgi
837 exit_action.sa_sigaction = cleanup_handler;
838 exit_action.sa_flags = SA_SIGINFO;
839 sigemptyset(&exit_action.sa_mask);
840
841 sa.sa_sigaction = die_handler;
842 sa.sa_flags = SA_SIGINFO;
843 sigemptyset(&sa.sa_mask);
844
845 sigbus_action.sa_sigaction = sigbus_handler;
846 sigbus_action.sa_flags = SA_SIGINFO;
847 sigemptyset(&sigbus_action.sa_mask);
848 #else
849 exit_action.sa_handler = cleanup_handler;
850 exit_action.sa_flags = 0;
851 sigemptyset(&exit_action.sa_mask);
852
853 sa.sa_handler = die_handler;
854 sa.sa_flags = 0;
855 sigemptyset(&sa.sa_mask);
856
857 #ifndef CRAY
858 sigbus_action.sa_handler = sigbus_handler;
859 sigbus_action.sa_flags = 0;
860 sigemptyset(&sigbus_action.sa_mask);
861 #endif
862 #endif
863
864 for (i = 1; i <= NSIG; i++) {
865 switch (i) {
866 /* Signals to terminate program on */
867 case SIGINT:
868 sigaction(i, &exit_action, NULL);
869 break;
870
871 #ifndef CRAY
872 /* This depends on active_mmap_rw */
873 case SIGBUS:
874 sigaction(i, &sigbus_action, NULL);
875 break;
876 #endif
877
878 /* Signals to Ignore... */
879 case SIGSTOP:
880 case SIGCONT:
881 #ifdef SIGRECOVERY
882 case SIGRECOVERY:
883 #endif
884 sigaction(i, &ignore_action, NULL);
885 break;
886
887 /* Signals to trap & report & die */
888 /*case SIGTRAP: */
889 /*case SIGABRT: */
890 #ifdef SIGERR /* cray only signals */
891 case SIGERR:
892 case SIGBUFIO:
893 case SIGINFO:
894 #endif
895 /*case SIGFPE: */
896 case SIGURG:
897 case SIGHUP:
898 case SIGTERM:
899 case SIGPIPE:
900 case SIGIO:
901 case SIGUSR1:
902 case SIGUSR2:
903 sigaction(i, &sa, NULL);
904 break;
905
906 /* Default Action for all other signals */
907 default:
908 sigaction(i, &def_action, NULL);
909 break;
910 }
911 }
912
913 /*
914 * Main loop - each doio proc does this until the read returns eof (0).
915 * Call the appropriate io function based on the request type.
916 */
917
918 while ((nbytes = read(infd, (char *)&ioreq, sizeof(ioreq)))) {
919
920 /*
921 * Periodically check our ppid. If it is 1, the child exits to
922 * help clean up in the case that the main doio process was
923 * killed.
924 */
925
926 if (Reqno && ((Reqno % PPID_CHECK_INTERVAL) == 0)) {
927 if (getppid() == 1) {
928 doio_fprintf(stderr,
929 "Parent doio process has exited\n");
930 alloc_mem(-1);
931 exit(E_SETUP);
932 }
933 }
934
935 if (nbytes == -1) {
936 doio_fprintf(stderr,
937 "read of %d bytes from input failed: %s (%d)\n",
938 sizeof(ioreq), SYSERR, errno);
939 alloc_mem(-1);
940 exit(E_SETUP);
941 }
942
943 if (nbytes != sizeof(ioreq)) {
944 doio_fprintf(stderr,
945 "read wrong # bytes from input stream, expected %d, got %d\n",
946 sizeof(ioreq), nbytes);
947 alloc_mem(-1);
948 exit(E_SETUP);
949 }
950
951 if (ioreq.r_magic != DOIO_MAGIC) {
952 doio_fprintf(stderr,
953 "got a bad magic # from input stream. Expected 0%o, got 0%o\n",
954 DOIO_MAGIC, ioreq.r_magic);
955 alloc_mem(-1);
956 exit(E_SETUP);
957 }
958
959 /*
960 * If we're on a Release_Interval multiple, relase all ssd and
961 * core space, and close all fd's in Fd_Map[].
962 */
963
964 if (Reqno && Release_Interval && !(Reqno % Release_Interval)) {
965 if (Memsize) {
966 #ifdef NOTDEF
967 sbrk(-1 * Memsize);
968 #else
969 alloc_mem(-1);
970 #endif
971 }
972 #ifdef _CRAY1
973 if (Sdssize) {
974 ssbreak(-1 * btoc(Sdssize));
975 Sdsptr = 0;
976 Sdssize = 0;
977 }
978 #endif /* _CRAY1 */
979
980 alloc_fd(NULL, 0);
981 }
982
983 switch (ioreq.r_type) {
984 case READ:
985 case READA:
986 rval = do_read(&ioreq);
987 break;
988
989 case WRITE:
990 case WRITEA:
991 rval = do_write(&ioreq);
992 break;
993
994 case READV:
995 case AREAD:
996 case PREAD:
997 case LREAD:
998 case LREADA:
999 case LSREAD:
1000 case LSREADA:
1001 case WRITEV:
1002 case AWRITE:
1003 case PWRITE:
1004 case MMAPR:
1005 case MMAPW:
1006 case LWRITE:
1007 case LWRITEA:
1008 case LSWRITE:
1009 case LSWRITEA:
1010 case LEREAD:
1011 case LEREADA:
1012 case LEWRITE:
1013 case LEWRITEA:
1014 rval = do_rw(&ioreq);
1015 break;
1016
1017 #ifdef CRAY
1018 case SSREAD:
1019 case SSWRITE:
1020 rval = do_ssdio(&ioreq);
1021 break;
1022
1023 case LISTIO:
1024 rval = do_listio(&ioreq);
1025 break;
1026 #endif
1027
1028 #ifdef sgi
1029 case RESVSP:
1030 case UNRESVSP:
1031 #ifdef F_FSYNC
1032 case DFFSYNC:
1033 #endif
1034 rval = do_fcntl(&ioreq);
1035 break;
1036 #endif /* sgi */
1037
1038 #ifndef CRAY
1039 case FSYNC2:
1040 case FDATASYNC:
1041 rval = do_sync(&ioreq);
1042 break;
1043 #endif
1044 default:
1045 doio_fprintf(stderr,
1046 "Don't know how to handle io request type %d\n",
1047 ioreq.r_type);
1048 alloc_mem(-1);
1049 exit(E_SETUP);
1050 }
1051
1052 if (rval == SKIP_REQ) {
1053 Reqskipcnt++;
1054 } else if (rval != 0) {
1055 alloc_mem(-1);
1056 doio_fprintf(stderr,
1057 "doio(): operation %d returned != 0\n",
1058 ioreq.r_type);
1059 exit(E_SETUP);
1060 }
1061
1062 if (Message_Interval && Reqno % Message_Interval == 0) {
1063 doio_fprintf(stderr,
1064 "Info: %d requests done (%d skipped) by this process\n",
1065 Reqno, Reqskipcnt);
1066 }
1067
1068 Reqno++;
1069
1070 if (delayop != 0)
1071 doio_delay();
1072 }
1073
1074 /*
1075 * Child exits normally
1076 */
1077 alloc_mem(-1);
1078 exit(E_NORMAL);
1079
1080 } /* doio */
1081
doio_delay(void)1082 void doio_delay(void)
1083 {
1084 struct timeval tv_delay;
1085 struct sigaction sa_al, sa_old;
1086 sigset_t al_mask;
1087
1088 switch (delayop) {
1089 case DELAY_SELECT:
1090 tv_delay.tv_sec = delaytime / 1000000;
1091 tv_delay.tv_usec = delaytime % 1000000;
1092 /*doio_fprintf(stdout, "delay_select: %d %d\n",
1093 tv_delay.tv_sec, tv_delay.tv_usec); */
1094 select(0, NULL, NULL, NULL, &tv_delay);
1095 break;
1096
1097 case DELAY_SLEEP:
1098 sleep(delaytime);
1099 break;
1100
1101 #ifdef sgi
1102 case DELAY_SGINAP:
1103 sginap(delaytime);
1104 break;
1105 #endif
1106
1107 case DELAY_ALARM:
1108 sa_al.sa_flags = 0;
1109 sa_al.sa_handler = noop_handler;
1110 sigemptyset(&sa_al.sa_mask);
1111 sigaction(SIGALRM, &sa_al, &sa_old);
1112 sigemptyset(&al_mask);
1113 alarm(delaytime);
1114 sigsuspend(&al_mask);
1115 sigaction(SIGALRM, &sa_old, 0);
1116 break;
1117 }
1118 }
1119
1120 /*
1121 * Format IO requests, returning a pointer to the formatted text.
1122 *
1123 * format_strat - formats the async i/o completion strategy
1124 * format_rw - formats a read[a]/write[a] request
1125 * format_sds - formats a ssread/sswrite request
1126 * format_listio- formats a listio request
1127 *
1128 * ioreq is the doio io request structure.
1129 */
1130
1131 struct smap sysnames[] = {
1132 {"READ", READ},
1133 {"WRITE", WRITE},
1134 {"READA", READA},
1135 {"WRITEA", WRITEA},
1136 {"SSREAD", SSREAD},
1137 {"SSWRITE", SSWRITE},
1138 {"LISTIO", LISTIO},
1139 {"LREAD", LREAD},
1140 {"LREADA", LREADA},
1141 {"LWRITE", LWRITE},
1142 {"LWRITEA", LWRITEA},
1143 {"LSREAD", LSREAD},
1144 {"LSREADA", LSREADA},
1145 {"LSWRITE", LSWRITE},
1146 {"LSWRITEA", LSWRITEA},
1147
1148 /* Irix System Calls */
1149 {"PREAD", PREAD},
1150 {"PWRITE", PWRITE},
1151 {"AREAD", AREAD},
1152 {"AWRITE", AWRITE},
1153 {"LLREAD", LLREAD},
1154 {"LLAREAD", LLAREAD},
1155 {"LLWRITE", LLWRITE},
1156 {"LLAWRITE", LLAWRITE},
1157 {"RESVSP", RESVSP},
1158 {"UNRESVSP", UNRESVSP},
1159 {"DFFSYNC", DFFSYNC},
1160
1161 /* Irix and Linux System Calls */
1162 {"READV", READV},
1163 {"WRITEV", WRITEV},
1164 {"MMAPR", MMAPR},
1165 {"MMAPW", MMAPW},
1166 {"FSYNC2", FSYNC2},
1167 {"FDATASYNC", FDATASYNC},
1168
1169 {"unknown", -1},
1170 };
1171
1172 struct smap aionames[] = {
1173 {"poll", A_POLL},
1174 {"signal", A_SIGNAL},
1175 {"recall", A_RECALL},
1176 {"recalla", A_RECALLA},
1177 {"recalls", A_RECALLS},
1178 {"suspend", A_SUSPEND},
1179 {"callback", A_CALLBACK},
1180 {"synch", 0},
1181 {"unknown", -1},
1182 };
1183
format_oflags(int oflags)1184 char *format_oflags(int oflags)
1185 {
1186 char flags[255];
1187
1188 flags[0] = '\0';
1189 switch (oflags & 03) {
1190 case O_RDONLY:
1191 strcat(flags, "O_RDONLY,");
1192 break;
1193 case O_WRONLY:
1194 strcat(flags, "O_WRONLY,");
1195 break;
1196 case O_RDWR:
1197 strcat(flags, "O_RDWR,");
1198 break;
1199 default:
1200 strcat(flags, "O_weird");
1201 break;
1202 }
1203
1204 if (oflags & O_EXCL)
1205 strcat(flags, "O_EXCL,");
1206
1207 if (oflags & O_SYNC)
1208 strcat(flags, "O_SYNC,");
1209 #ifdef CRAY
1210 if (oflags & O_RAW)
1211 strcat(flags, "O_RAW,");
1212 if (oflags & O_WELLFORMED)
1213 strcat(flags, "O_WELLFORMED,");
1214 #ifdef O_SSD
1215 if (oflags & O_SSD)
1216 strcat(flags, "O_SSD,");
1217 #endif
1218 if (oflags & O_LDRAW)
1219 strcat(flags, "O_LDRAW,");
1220 if (oflags & O_PARALLEL)
1221 strcat(flags, "O_PARALLEL,");
1222 if (oflags & O_BIG)
1223 strcat(flags, "O_BIG,");
1224 if (oflags & O_PLACE)
1225 strcat(flags, "O_PLACE,");
1226 if (oflags & O_ASYNC)
1227 strcat(flags, "O_ASYNC,");
1228 #endif
1229
1230 #ifdef sgi
1231 if (oflags & O_DIRECT)
1232 strcat(flags, "O_DIRECT,");
1233 if (oflags & O_DSYNC)
1234 strcat(flags, "O_DSYNC,");
1235 if (oflags & O_RSYNC)
1236 strcat(flags, "O_RSYNC,");
1237 #endif
1238
1239 return (strdup(flags));
1240 }
1241
format_strat(int strategy)1242 char *format_strat(int strategy)
1243 {
1244 char msg[64];
1245 char *aio_strat;
1246
1247 switch (strategy) {
1248 case A_POLL:
1249 aio_strat = "POLL";
1250 break;
1251 case A_SIGNAL:
1252 aio_strat = "SIGNAL";
1253 break;
1254 case A_RECALL:
1255 aio_strat = "RECALL";
1256 break;
1257 case A_RECALLA:
1258 aio_strat = "RECALLA";
1259 break;
1260 case A_RECALLS:
1261 aio_strat = "RECALLS";
1262 break;
1263 case A_SUSPEND:
1264 aio_strat = "SUSPEND";
1265 break;
1266 case A_CALLBACK:
1267 aio_strat = "CALLBACK";
1268 break;
1269 case 0:
1270 aio_strat = "<zero>";
1271 break;
1272 default:
1273 sprintf(msg, "<error:%#o>", strategy);
1274 aio_strat = strdup(msg);
1275 break;
1276 }
1277
1278 return (aio_strat);
1279 }
1280
format_rw(struct io_req * ioreq,int fd,void * buffer,int signo,char * pattern,void * iosw)1281 char *format_rw(struct io_req *ioreq, int fd, void *buffer, int signo,
1282 char *pattern, void *iosw)
1283 {
1284 static char *errbuf = NULL;
1285 char *aio_strat, *cp;
1286 struct read_req *readp = &ioreq->r_data.read;
1287 struct write_req *writep = &ioreq->r_data.write;
1288 struct read_req *readap = &ioreq->r_data.read;
1289 struct write_req *writeap = &ioreq->r_data.write;
1290
1291 if (errbuf == NULL)
1292 errbuf = malloc(32768);
1293
1294 cp = errbuf;
1295 cp += sprintf(cp, "Request number %d\n", Reqno);
1296
1297 switch (ioreq->r_type) {
1298 case READ:
1299 cp += sprintf(cp, "syscall: read(%d, %#lo, %d)\n",
1300 fd, (unsigned long)buffer, readp->r_nbytes);
1301 cp +=
1302 sprintf(cp,
1303 " fd %d is file %s - open flags are %#o\n",
1304 fd, readp->r_file, readp->r_oflags);
1305 cp +=
1306 sprintf(cp, " read done at file offset %d\n",
1307 readp->r_offset);
1308 break;
1309
1310 case WRITE:
1311 cp += sprintf(cp, "syscall: write(%d, %#lo, %d)\n",
1312 fd, (unsigned long)buffer, writep->r_nbytes);
1313 cp +=
1314 sprintf(cp,
1315 " fd %d is file %s - open flags are %#o\n",
1316 fd, writep->r_file, writep->r_oflags);
1317 cp +=
1318 sprintf(cp,
1319 " write done at file offset %d - pattern is %s\n",
1320 writep->r_offset, pattern);
1321 break;
1322
1323 case READA:
1324 aio_strat = format_strat(readap->r_aio_strat);
1325
1326 cp += sprintf(cp, "syscall: reada(%d, %#lo, %d, %#lo, %d)\n",
1327 fd, (unsigned long)buffer, readap->r_nbytes,
1328 (unsigned long)iosw, signo);
1329 cp +=
1330 sprintf(cp,
1331 " fd %d is file %s - open flags are %#o\n",
1332 fd, readap->r_file, readp->r_oflags);
1333 cp +=
1334 sprintf(cp, " reada done at file offset %d\n",
1335 readap->r_offset);
1336 cp +=
1337 sprintf(cp,
1338 " async io completion strategy is %s\n",
1339 aio_strat);
1340 break;
1341
1342 case WRITEA:
1343 aio_strat = format_strat(writeap->r_aio_strat);
1344
1345 cp += sprintf(cp, "syscall: writea(%d, %#lo, %d, %#lo, %d)\n",
1346 fd, (unsigned long)buffer, writeap->r_nbytes,
1347 (unsigned long)iosw, signo);
1348 cp +=
1349 sprintf(cp,
1350 " fd %d is file %s - open flags are %#o\n",
1351 fd, writeap->r_file, writeap->r_oflags);
1352 cp +=
1353 sprintf(cp,
1354 " writea done at file offset %d - pattern is %s\n",
1355 writeap->r_offset, pattern);
1356 cp +=
1357 sprintf(cp,
1358 " async io completion strategy is %s\n",
1359 aio_strat);
1360 break;
1361
1362 }
1363
1364 return errbuf;
1365 }
1366
1367 #ifdef CRAY
format_sds(struct io_req * ioreq,void * buffer,int sds,char * pattern)1368 char *format_sds(struct io_req *ioreq, void *buffer, int sds, char *pattern)
1369 {
1370 int i;
1371 static char *errbuf = NULL;
1372 char *cp;
1373
1374 struct ssread_req *ssreadp = &ioreq->r_data.ssread;
1375 struct sswrite_req *sswritep = &ioreq->r_data.sswrite;
1376
1377 if (errbuf == NULL)
1378 errbuf = malloc(32768);
1379
1380 cp = errbuf;
1381 cp += sprintf(cp, "Request number %d\n", Reqno);
1382
1383 switch (ioreq->r_type) {
1384 case SSREAD:
1385 cp += sprintf(cp, "syscall: ssread(%#o, %#o, %d)\n",
1386 buffer, sds, ssreadp->r_nbytes);
1387 break;
1388
1389 case SSWRITE:
1390 cp +=
1391 sprintf(cp,
1392 "syscall: sswrite(%#o, %#o, %d) - pattern was %s\n",
1393 buffer, sds, sswritep->r_nbytes, pattern);
1394 break;
1395 }
1396 return errbuf;
1397 }
1398 #endif /* CRAY */
1399
1400 /*
1401 * Perform the various sorts of disk reads
1402 */
1403
do_read(struct io_req * req)1404 int do_read(struct io_req *req)
1405 {
1406 int fd, offset, nbytes, oflags, rval;
1407 char *addr, *file;
1408 #ifdef CRAY
1409 struct aio_info *aiop;
1410 int aio_id, aio_strat, signo;
1411 #endif
1412 #ifdef sgi
1413 struct fd_cache *fdc;
1414 #endif
1415
1416 /*
1417 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
1418 * r_nbytes are at the same offset in the read_req and reada_req
1419 * structures.
1420 */
1421
1422 file = req->r_data.read.r_file;
1423 oflags = req->r_data.read.r_oflags;
1424 offset = req->r_data.read.r_offset;
1425 nbytes = req->r_data.read.r_nbytes;
1426
1427 /*printf("read: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
1428
1429 /*
1430 * Grab an open file descriptor
1431 * Note: must be done before memory allocation so that the direct i/o
1432 * information is available in mem. allocate
1433 */
1434
1435 if ((fd = alloc_fd(file, oflags)) == -1)
1436 return -1;
1437
1438 /*
1439 * Allocate core or sds - based on the O_SSD flag
1440 */
1441
1442 #ifndef wtob
1443 #define wtob(x) (x * sizeof(UINT64_T))
1444 #endif
1445
1446 #ifdef CRAY
1447 if (oflags & O_SSD) {
1448 if (alloc_sds(nbytes) == -1)
1449 return -1;
1450
1451 addr = (char *)Sdsptr;
1452 } else {
1453 if ((rval =
1454 alloc_mem(nbytes + wtob(1) * 2 +
1455 MPP_BUMP * sizeof(UINT64_T))) < 0) {
1456 return rval;
1457 }
1458
1459 addr = Memptr;
1460
1461 /*
1462 * if io is not raw, bump the offset by a random amount
1463 * to generate non-word-aligned io.
1464 */
1465 if (!(req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
1466 addr += random_range(0, wtob(1) - 1, 1, NULL);
1467 }
1468 }
1469 #else
1470 #ifdef sgi
1471 /* get memory alignment for using DIRECT I/O */
1472 fdc = alloc_fdcache(file, oflags);
1473
1474 if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
1475 return rval;
1476 }
1477
1478 addr = Memptr;
1479
1480 if ((req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
1481 /*
1482 * Force memory alignment for Direct I/O
1483 */
1484 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
1485 addr +=
1486 fdc->c_memalign - ((long)addr % fdc->c_memalign);
1487 }
1488 } else {
1489 addr += random_range(0, wtob(1) - 1, 1, NULL);
1490 }
1491 #else
1492 /* what is !CRAY && !sgi ? */
1493 if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
1494 return rval;
1495 }
1496
1497 addr = Memptr;
1498 #endif /* !CRAY && sgi */
1499 #endif /* CRAY */
1500
1501 switch (req->r_type) {
1502 case READ:
1503 /* move to the desired file position. */
1504 if (lseek(fd, offset, SEEK_SET) == -1) {
1505 doio_fprintf(stderr,
1506 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n",
1507 fd, offset, SYSERR, errno);
1508 return -1;
1509 }
1510
1511 if ((rval = read(fd, addr, nbytes)) == -1) {
1512 doio_fprintf(stderr,
1513 "read() request failed: %s (%d)\n%s\n",
1514 SYSERR, errno,
1515 format_rw(req, fd, addr, -1, NULL, NULL));
1516 doio_upanic(U_RVAL);
1517 return -1;
1518 } else if (rval != nbytes) {
1519 doio_fprintf(stderr,
1520 "read() request returned wrong # of bytes - expected %d, got %d\n%s\n",
1521 nbytes, rval,
1522 format_rw(req, fd, addr, -1, NULL, NULL));
1523 doio_upanic(U_RVAL);
1524 return -1;
1525 }
1526 break;
1527
1528 #ifdef CRAY
1529 case READA:
1530 /*
1531 * Async read
1532 */
1533
1534 /* move to the desired file position. */
1535 if (lseek(fd, offset, SEEK_SET) == -1) {
1536 doio_fprintf(stderr,
1537 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n",
1538 fd, offset, SYSERR, errno);
1539 return -1;
1540 }
1541
1542 aio_strat = req->r_data.read.r_aio_strat;
1543 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
1544
1545 aio_id = aio_register(fd, aio_strat, signo);
1546 aiop = aio_slot(aio_id);
1547
1548 if (reada(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
1549 doio_fprintf(stderr, "reada() failed: %s (%d)\n%s\n",
1550 SYSERR, errno,
1551 format_rw(req, fd, addr, signo, NULL,
1552 &aiop->iosw));
1553 aio_unregister(aio_id);
1554 doio_upanic(U_RVAL);
1555 rval = -1;
1556 } else {
1557 /*
1558 * Wait for io to complete
1559 */
1560
1561 aio_wait(aio_id);
1562
1563 /*
1564 * make sure the io completed without error
1565 */
1566
1567 if (aiop->iosw.sw_count != nbytes) {
1568 doio_fprintf(stderr,
1569 "Bad iosw from reada()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
1570 1, 0, nbytes,
1571 aiop->iosw.sw_flag,
1572 aiop->iosw.sw_error,
1573 aiop->iosw.sw_count,
1574 format_rw(req, fd, addr, signo,
1575 NULL, &aiop->iosw));
1576 aio_unregister(aio_id);
1577 doio_upanic(U_IOSW);
1578 rval = -1;
1579 } else {
1580 aio_unregister(aio_id);
1581 rval = 0;
1582 }
1583 }
1584
1585 if (rval == -1)
1586 return rval;
1587 break;
1588 #endif /* CRAY */
1589 }
1590
1591 return 0; /* if we get here, everything went ok */
1592 }
1593
1594 /*
1595 * Perform the verious types of disk writes.
1596 */
1597
do_write(struct io_req * req)1598 int do_write(struct io_req *req)
1599 {
1600 static int pid = -1;
1601 int fd, nbytes, oflags, signo;
1602 int logged_write, rval, got_lock;
1603 off_t offset, woffset;
1604 char *addr, pattern, *file, *msg;
1605 struct wlog_rec wrec;
1606 #ifdef CRAY
1607 int aio_strat, aio_id;
1608 struct aio_info *aiop;
1609 #endif
1610 #ifdef sgi
1611 struct fd_cache *fdc;
1612 #endif
1613
1614 woffset = 0;
1615
1616 /*
1617 * Misc variable setup
1618 */
1619
1620 signo = 0;
1621 nbytes = req->r_data.write.r_nbytes;
1622 offset = req->r_data.write.r_offset;
1623 pattern = req->r_data.write.r_pattern;
1624 file = req->r_data.write.r_file;
1625 oflags = req->r_data.write.r_oflags;
1626
1627 /*printf("pwrite: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
1628
1629 /*
1630 * Allocate core memory and possibly sds space. Initialize the data
1631 * to be written.
1632 */
1633
1634 Pattern[0] = pattern;
1635
1636 /*
1637 * Get a descriptor to do the io on
1638 */
1639
1640 if ((fd = alloc_fd(file, oflags)) == -1)
1641 return -1;
1642
1643 /*printf("write: %d, %s, %#o, %d %d\n",
1644 fd, file, oflags, offset, nbytes); */
1645
1646 /*
1647 * Allocate SDS space for backdoor write if desired
1648 */
1649
1650 #ifdef CRAY
1651 if (oflags & O_SSD) {
1652 #ifndef _CRAYMPP
1653 if ((rval = alloc_mem(nbytes + wtob(1))) < 0) {
1654 return rval;
1655 }
1656
1657 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1658 /*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
1659
1660 if (alloc_sds(nbytes) == -1)
1661 return -1;
1662
1663 if (sswrite((long)Memptr, Sdsptr, btoc(nbytes)) == -1) {
1664 doio_fprintf(stderr,
1665 "sswrite(%d, %d, %d) failed: %s (%d)\n",
1666 (long)Memptr, Sdsptr, btoc(nbytes), SYSERR,
1667 errno);
1668 fflush(stderr);
1669 return -1;
1670 }
1671
1672 addr = (char *)Sdsptr;
1673 #else
1674 doio_fprintf(stderr,
1675 "Invalid O_SSD flag was generated for MPP system\n");
1676 fflush(stderr);
1677 return -1;
1678 #endif /* !CRAYMPP */
1679 } else {
1680 if ((rval = alloc_mem(nbytes + wtob(1)) < 0)) {
1681 return rval;
1682 }
1683
1684 addr = Memptr;
1685
1686 /*
1687 * if io is not raw, bump the offset by a random amount
1688 * to generate non-word-aligned io.
1689 */
1690
1691 if (!(req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
1692 addr += random_range(0, wtob(1) - 1, 1, NULL);
1693 }
1694
1695 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1696 if (addr != Memptr)
1697 memmove(addr, Memptr, nbytes);
1698 }
1699 #else /* CRAY */
1700 #ifdef sgi
1701 /* get memory alignment for using DIRECT I/O */
1702 fdc = alloc_fdcache(file, oflags);
1703
1704 if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
1705 return rval;
1706 }
1707
1708 addr = Memptr;
1709
1710 if ((req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
1711 /*
1712 * Force memory alignment for Direct I/O
1713 */
1714 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
1715 addr +=
1716 fdc->c_memalign - ((long)addr % fdc->c_memalign);
1717 }
1718 } else {
1719 addr += random_range(0, wtob(1) - 1, 1, NULL);
1720 }
1721
1722 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1723 if (addr != Memptr)
1724 memmove(addr, Memptr, nbytes);
1725
1726 #else /* sgi */
1727 if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
1728 return rval;
1729 }
1730
1731 addr = Memptr;
1732
1733 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
1734 if (addr != Memptr)
1735 memmove(addr, Memptr, nbytes);
1736 #endif /* sgi */
1737 #endif /* CRAY */
1738
1739 rval = -1;
1740 got_lock = 0;
1741 logged_write = 0;
1742
1743 if (k_opt) {
1744 if (lock_file_region(file, fd, F_WRLCK, offset, nbytes) < 0) {
1745 alloc_mem(-1);
1746 exit(E_INTERNAL);
1747 }
1748
1749 got_lock = 1;
1750 }
1751
1752 /*
1753 * Write a preliminary write-log entry. This is done so that
1754 * doio_check can do corruption detection across an interrupt/crash.
1755 * Note that w_done is set to 0. If doio_check sees this, it
1756 * re-creates the file extents as if the write completed, but does not
1757 * do any checking - see comments in doio_check for more details.
1758 */
1759
1760 if (w_opt) {
1761 if (pid == -1) {
1762 pid = getpid();
1763 }
1764 wrec.w_async = (req->r_type == WRITEA) ? 1 : 0;
1765 wrec.w_oflags = oflags;
1766 wrec.w_pid = pid;
1767 wrec.w_offset = offset;
1768 wrec.w_nbytes = nbytes;
1769
1770 wrec.w_pathlen = strlen(file);
1771 memcpy(wrec.w_path, file, wrec.w_pathlen);
1772 wrec.w_hostlen = strlen(Host);
1773 memcpy(wrec.w_host, Host, wrec.w_hostlen);
1774 wrec.w_patternlen = Pattern_Length;
1775 memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
1776
1777 wrec.w_done = 0;
1778
1779 if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
1780 doio_fprintf(stderr,
1781 "Could not append to write-log: %s (%d)\n",
1782 SYSERR, errno);
1783 } else {
1784 logged_write = 1;
1785 }
1786 }
1787
1788 switch (req->r_type) {
1789 case WRITE:
1790 /*
1791 * sync write
1792 */
1793
1794 if (lseek(fd, offset, SEEK_SET) == -1) {
1795 doio_fprintf(stderr,
1796 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n",
1797 fd, offset, SYSERR, errno);
1798 return -1;
1799 }
1800
1801 rval = write(fd, addr, nbytes);
1802
1803 if (rval == -1) {
1804 doio_fprintf(stderr,
1805 "write() failed: %s (%d)\n%s\n",
1806 SYSERR, errno,
1807 format_rw(req, fd, addr, -1, Pattern,
1808 NULL));
1809 #ifdef sgi
1810 doio_fprintf(stderr,
1811 "write() failed: %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%miniou(%d)=%d, oflags=%#o memalign=%d, addr%%memalign=%d\n",
1812 strerror(errno),
1813 fd, addr, nbytes,
1814 offset,
1815 fdc->c_miniosz, nbytes % fdc->c_miniosz,
1816 oflags, fdc->c_memalign,
1817 (long)addr % fdc->c_memalign);
1818 #else
1819 doio_fprintf(stderr,
1820 "write() failed: %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%1B=%d, oflags=%#o\n",
1821 strerror(errno),
1822 fd, addr, nbytes,
1823 offset, nbytes % 4096, oflags);
1824 #endif
1825 doio_upanic(U_RVAL);
1826 } else if (rval != nbytes) {
1827 doio_fprintf(stderr,
1828 "write() returned wrong # bytes - expected %d, got %d\n%s\n",
1829 nbytes, rval,
1830 format_rw(req, fd, addr, -1, Pattern,
1831 NULL));
1832 doio_upanic(U_RVAL);
1833 rval = -1;
1834 }
1835
1836 break;
1837
1838 #ifdef CRAY
1839 case WRITEA:
1840 /*
1841 * async write
1842 */
1843 if (lseek(fd, offset, SEEK_SET) == -1) {
1844 doio_fprintf(stderr,
1845 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n",
1846 fd, offset, SYSERR, errno);
1847 return -1;
1848 }
1849
1850 aio_strat = req->r_data.write.r_aio_strat;
1851 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
1852
1853 aio_id = aio_register(fd, aio_strat, signo);
1854 aiop = aio_slot(aio_id);
1855
1856 /*
1857 * init iosw and do the async write
1858 */
1859
1860 if (writea(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
1861 doio_fprintf(stderr,
1862 "writea() failed: %s (%d)\n%s\n",
1863 SYSERR, errno,
1864 format_rw(req, fd, addr, -1, Pattern,
1865 NULL));
1866 doio_upanic(U_RVAL);
1867 aio_unregister(aio_id);
1868 rval = -1;
1869 } else {
1870
1871 /*
1872 * Wait for io to complete
1873 */
1874
1875 aio_wait(aio_id);
1876
1877 /*
1878 * check that iosw is ok
1879 */
1880
1881 if (aiop->iosw.sw_count != nbytes) {
1882 doio_fprintf(stderr,
1883 "Bad iosw from writea()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
1884 1, 0, nbytes,
1885 aiop->iosw.sw_flag,
1886 aiop->iosw.sw_error,
1887 aiop->iosw.sw_count,
1888 format_rw(req, fd, addr, -1,
1889 Pattern, &aiop->iosw));
1890 aio_unregister(aio_id);
1891 doio_upanic(U_IOSW);
1892 rval = -1;
1893 } else {
1894 aio_unregister(aio_id);
1895 rval = 0;
1896 }
1897 }
1898 break;
1899
1900 #endif /* CRAY */
1901 }
1902
1903 /*
1904 * Verify that the data was written correctly - check_file() returns
1905 * a non-null pointer which contains an error message if there are
1906 * problems.
1907 */
1908
1909 if (v_opt) {
1910 msg = check_file(file, offset, nbytes, Pattern, Pattern_Length,
1911 0, oflags & O_PARALLEL);
1912 if (msg != NULL) {
1913 doio_fprintf(stderr, "%s%s\n", msg,
1914 #ifdef CRAY
1915 format_rw(req, fd, addr, -1, Pattern,
1916 &aiop->iosw)
1917 #else
1918 format_rw(req, fd, addr, -1, Pattern, NULL)
1919 #endif
1920 );
1921 doio_upanic(U_CORRUPTION);
1922 exit(E_COMPARE);
1923
1924 }
1925 }
1926
1927 /*
1928 * General cleanup ...
1929 *
1930 * Write extent information to the write-log, so that doio_check can do
1931 * corruption detection. Note that w_done is set to 1, indicating that
1932 * the write has been verified as complete. We don't need to write the
1933 * filename on the second logging.
1934 */
1935
1936 if (w_opt && logged_write) {
1937 wrec.w_done = 1;
1938 wlog_record_write(&Wlog, &wrec, woffset);
1939 }
1940
1941 /*
1942 * Unlock file region if necessary
1943 */
1944
1945 if (got_lock) {
1946 if (lock_file_region(file, fd, F_UNLCK, offset, nbytes) < 0) {
1947 alloc_mem(-1);
1948 exit(E_INTERNAL);
1949 }
1950 }
1951
1952 return ((rval == -1) ? -1 : 0);
1953 }
1954
1955 /*
1956 * Simple routine to lock/unlock a file using fcntl()
1957 */
1958
lock_file_region(char * fname,int fd,int type,int start,int nbytes)1959 int lock_file_region(char *fname, int fd, int type, int start, int nbytes)
1960 {
1961 struct flock flk;
1962
1963 flk.l_type = type;
1964 flk.l_whence = 0;
1965 flk.l_start = start;
1966 flk.l_len = nbytes;
1967
1968 if (fcntl(fd, F_SETLKW, &flk) < 0) {
1969 doio_fprintf(stderr,
1970 "fcntl(%d, %d, %#o) failed for file %s, lock type %d, offset %d, length %d: %s (%d), open flags: %#o\n",
1971 fd, F_SETLKW, &flk, fname, type,
1972 start, nbytes, SYSERR, errno,
1973 fcntl(fd, F_GETFL, 0));
1974 return -1;
1975 }
1976
1977 return 0;
1978 }
1979
1980 /*
1981 * Perform a listio request.
1982 */
1983
1984 #ifdef CRAY
format_listio(struct io_req * ioreq,int lcmd,struct listreq * list,int nent,int fd,char * pattern)1985 char *format_listio(struct io_req *ioreq, int lcmd, struct listreq *list,
1986 int nent, int fd, char *pattern)
1987 {
1988 static char *errbuf = NULL;
1989 struct listio_req *liop = &ioreq->r_data.listio;
1990 struct listreq *listreq;
1991 char *cp, *cmd, *opcode, *aio_strat;
1992 int i;
1993
1994 switch (lcmd) {
1995 case LC_START:
1996 cmd = "LC_START";
1997 break;
1998 case LC_WAIT:
1999 cmd = "LC_WAIT";
2000 break;
2001 default:
2002 cmd = "???";
2003 break;
2004 }
2005
2006 if (errbuf == NULL)
2007 errbuf = malloc(32768);
2008
2009 cp = errbuf;
2010 cp += sprintf(cp, "Request number %d\n", Reqno);
2011
2012 cp += sprintf(cp, "syscall: listio(%s, %#o, %d)\n\n", cmd, list, nent);
2013
2014 aio_strat = format_strat(liop->r_aio_strat);
2015
2016 for (i = 0; i < nent; i++) {
2017 cp += sprintf(cp, "struct lioreq for request element %d\n", i);
2018 cp += sprintf(cp, "----------------------------------------\n");
2019
2020 listreq = list + i;
2021
2022 switch (listreq->li_opcode) {
2023 case LO_READ:
2024 opcode = "LO_READ";
2025 break;
2026 case LO_WRITE:
2027 opcode = "LO_WRITE";
2028 break;
2029 default:
2030 opcode = "???";
2031 break;
2032 }
2033
2034 cp += sprintf(cp, " li_opcode = %s\n", opcode);
2035 cp +=
2036 sprintf(cp, " li_drvr = %#o\n",
2037 listreq->li_drvr);
2038 cp +=
2039 sprintf(cp, " li_flags = %#o\n",
2040 listreq->li_flags);
2041 cp +=
2042 sprintf(cp, " li_offset = %d\n",
2043 listreq->li_offset);
2044 cp +=
2045 sprintf(cp, " li_fildes = %d\n",
2046 listreq->li_fildes);
2047 cp +=
2048 sprintf(cp, " li_buf = %#o\n",
2049 listreq->li_buf);
2050 cp +=
2051 sprintf(cp, " li_nbyte = %d\n",
2052 listreq->li_nbyte);
2053 cp +=
2054 sprintf(cp, " li_status = %#o (%d, %d, %d)\n",
2055 listreq->li_status, listreq->li_status->sw_flag,
2056 listreq->li_status->sw_error,
2057 listreq->li_status->sw_count);
2058 cp +=
2059 sprintf(cp, " li_signo = %d\n",
2060 listreq->li_signo);
2061 cp +=
2062 sprintf(cp, " li_nstride = %d\n",
2063 listreq->li_nstride);
2064 cp +=
2065 sprintf(cp, " li_filstride = %d\n",
2066 listreq->li_filstride);
2067 cp +=
2068 sprintf(cp, " li_memstride = %d\n",
2069 listreq->li_memstride);
2070 cp +=
2071 sprintf(cp, " io completion strategy is %s\n",
2072 aio_strat);
2073 }
2074 return errbuf;
2075 }
2076 #endif /* CRAY */
2077
do_listio(struct io_req * req)2078 int do_listio(struct io_req *req)
2079 {
2080 #ifdef CRAY
2081 struct listio_req *lio;
2082 int fd, oflags, signo, nb, i;
2083 int logged_write, rval, got_lock;
2084 int aio_strat, aio_id;
2085 int min_byte, max_byte;
2086 int mem_needed;
2087 int foffset, fstride, mstride, nstrides;
2088 char *moffset;
2089 long offset, woffset;
2090 char *addr, *msg;
2091 sigset_t block_mask, omask;
2092 struct wlog_rec wrec;
2093 struct aio_info *aiop;
2094 struct listreq lio_req;
2095
2096 lio = &req->r_data.listio;
2097
2098 /*
2099 * If bytes per stride is less than the stride size, drop the request
2100 * since it will cause overlapping strides, and we cannot predict
2101 * the order they will complete in.
2102 */
2103
2104 if (lio->r_filestride && abs(lio->r_filestride) < lio->r_nbytes) {
2105 doio_fprintf(stderr,
2106 "do_listio(): Bogus listio request - abs(filestride) [%d] < nbytes [%d]\n",
2107 abs(lio->r_filestride), lio->r_nbytes);
2108 return -1;
2109 }
2110
2111 /*
2112 * Allocate core memory. Initialize the data to be written. Make
2113 * sure we get enough, based on the memstride.
2114 */
2115
2116 mem_needed =
2117 stride_bounds(0, lio->r_memstride, lio->r_nstrides,
2118 lio->r_nbytes, NULL, NULL);
2119
2120 if ((rval = alloc_mem(mem_needed + wtob(1))) < 0) {
2121 return rval;
2122 }
2123
2124 /*
2125 * Set the memory address pointer. If the io is not raw, adjust
2126 * addr by a random amount, so that non-raw io is not necessarily
2127 * word aligned.
2128 */
2129
2130 addr = Memptr;
2131
2132 if (!(lio->r_uflags & F_WORD_ALIGNED)) {
2133 addr += random_range(0, wtob(1) - 1, 1, NULL);
2134 }
2135
2136 if (lio->r_opcode == LO_WRITE) {
2137 Pattern[0] = lio->r_pattern;
2138 (*Data_Fill) (Memptr, mem_needed, Pattern, Pattern_Length, 0);
2139 if (addr != Memptr)
2140 memmove(addr, Memptr, mem_needed);
2141 }
2142
2143 /*
2144 * Get a descriptor to do the io on. No need to do an lseek, as this
2145 * is encoded in the listio request.
2146 */
2147
2148 if ((fd = alloc_fd(lio->r_file, lio->r_oflags)) == -1) {
2149 return -1;
2150 }
2151
2152 rval = -1;
2153 got_lock = 0;
2154 logged_write = 0;
2155
2156 /*
2157 * If the opcode is LO_WRITE, lock all regions of the file that
2158 * are touched by this listio request. Currently, we use
2159 * stride_bounds() to figure out the min and max bytes affected, and
2160 * lock the entire region, regardless of the file stride.
2161 */
2162
2163 if (lio->r_opcode == LO_WRITE && k_opt) {
2164 stride_bounds(lio->r_offset,
2165 lio->r_filestride, lio->r_nstrides,
2166 lio->r_nbytes, &min_byte, &max_byte);
2167
2168 if (lock_file_region(lio->r_file, fd, F_WRLCK,
2169 min_byte, (max_byte - min_byte + 1)) < 0) {
2170 doio_fprintf(stderr,
2171 "stride_bounds(%d, %d, %d, %d, ..., ...) set min_byte to %d, max_byte to %d\n",
2172 lio->r_offset, lio->r_filestride,
2173 lio->r_nstrides, lio->r_nbytes, min_byte,
2174 max_byte);
2175 return -1;
2176 } else {
2177 got_lock = 1;
2178 }
2179 }
2180
2181 /*
2182 * async write
2183 */
2184
2185 aio_strat = lio->r_aio_strat;
2186 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2187
2188 aio_id = aio_register(fd, aio_strat, signo);
2189 aiop = aio_slot(aio_id);
2190
2191 /*
2192 * Form the listio request, and make the call.
2193 */
2194
2195 lio_req.li_opcode = lio->r_opcode;
2196 lio_req.li_drvr = 0;
2197 lio_req.li_flags = LF_LSEEK;
2198 lio_req.li_offset = lio->r_offset;
2199 lio_req.li_fildes = fd;
2200
2201 if (lio->r_memstride >= 0 || lio->r_nstrides <= 1) {
2202 lio_req.li_buf = addr;
2203 } else {
2204 lio_req.li_buf = addr + mem_needed - lio->r_nbytes;
2205 }
2206
2207 lio_req.li_nbyte = lio->r_nbytes;
2208 lio_req.li_status = &aiop->iosw;
2209 lio_req.li_signo = signo;
2210 lio_req.li_nstride = lio->r_nstrides;
2211 lio_req.li_filstride = lio->r_filestride;
2212 lio_req.li_memstride = lio->r_memstride;
2213
2214 /*
2215 * If signo != 0, block signo while we're in the system call, so that
2216 * we don't get interrupted syscall failures.
2217 */
2218
2219 if (signo) {
2220 sigemptyset(&block_mask);
2221 sigaddset(&block_mask, signo);
2222 sigprocmask(SIG_BLOCK, &block_mask, &omask);
2223 }
2224
2225 if (listio(lio->r_cmd, &lio_req, 1) < 0) {
2226 doio_fprintf(stderr,
2227 "listio() failed: %s (%d)\n%s\n",
2228 SYSERR, errno,
2229 format_listio(req, lio->r_cmd, &lio_req, 1, fd,
2230 Pattern));
2231 aio_unregister(aio_id);
2232 doio_upanic(U_RVAL);
2233 goto lio_done;
2234 }
2235
2236 if (signo) {
2237 sigprocmask(SIG_SETMASK, &omask, NULL);
2238 }
2239
2240 /*
2241 * Wait for io to complete
2242 */
2243
2244 aio_wait(aio_id);
2245
2246 nstrides = lio->r_nstrides ? lio->r_nstrides : 1;
2247 if (aiop->iosw.sw_count != lio->r_nbytes * nstrides) {
2248 doio_fprintf(stderr,
2249 "Bad iosw from listio()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
2250 1, 0, lio->r_nbytes * lio->r_nstrides,
2251 aiop->iosw.sw_flag,
2252 aiop->iosw.sw_error, aiop->iosw.sw_count,
2253 format_listio(req, lio->r_cmd, &lio_req, 1, fd,
2254 Pattern));
2255 aio_unregister(aio_id);
2256 doio_upanic(U_IOSW);
2257 goto lio_done;
2258 }
2259
2260 aio_unregister(aio_id);
2261
2262 /*
2263 * Verify that the data was written correctly - check_file() returns
2264 * a non-null pointer which contains an error message if there are
2265 * problems.
2266 *
2267 * For listio, we basically have to make 1 call to check_file for each
2268 * stride.
2269 */
2270
2271 if (v_opt && lio_req.li_opcode == LO_WRITE) {
2272 fstride = lio->r_filestride ? lio->r_filestride : lio->r_nbytes;
2273 mstride = lio->r_memstride ? lio->r_memstride : lio->r_nbytes;
2274 foffset = lio->r_offset;
2275
2276 if (mstride > 0 || lio->r_nstrides <= 1) {
2277 moffset = addr;
2278 } else {
2279 moffset = addr + mem_needed - lio->r_nbytes;
2280 }
2281
2282 for (i = 0; i < lio_req.li_nstride; i++) {
2283 msg = check_file(lio->r_file,
2284 foffset, lio->r_nbytes,
2285 Pattern, Pattern_Length,
2286 moffset - addr,
2287 lio->r_oflags & O_PARALLEL);
2288
2289 if (msg != NULL) {
2290 doio_fprintf(stderr, "%s\n%s\n",
2291 msg,
2292 format_listio(req, lio->r_cmd,
2293 &lio_req, 1, fd,
2294 Pattern));
2295 doio_upanic(U_CORRUPTION);
2296 exit(E_COMPARE);
2297 }
2298
2299 moffset += mstride;
2300 foffset += fstride;
2301 }
2302
2303 }
2304
2305 rval = 0;
2306
2307 lio_done:
2308
2309 /*
2310 * General cleanup ...
2311 *
2312 */
2313
2314 /*
2315 * Release file locks if necessary
2316 */
2317
2318 if (got_lock) {
2319 if (lock_file_region(lio->r_file, fd, F_UNLCK,
2320 min_byte, (max_byte - min_byte + 1)) < 0) {
2321 return -1;
2322 }
2323 }
2324
2325 return rval;
2326 #else
2327 return -1;
2328 #endif
2329 }
2330
2331 /*
2332 * perform ssread/sswrite operations
2333 */
2334
2335 #ifdef _CRAY1
2336
do_ssdio(struct io_req * req)2337 int do_ssdio(struct io_req *req)
2338 {
2339 int nbytes, nb;
2340 char errbuf[BSIZE];
2341
2342 nbytes = req->r_data.ssread.r_nbytes;
2343
2344 /*
2345 * Grab core and sds space
2346 */
2347
2348 if ((nb = alloc_mem(nbytes)) < 0)
2349 return nb;
2350
2351 if (alloc_sds(nbytes) == -1)
2352 return -1;
2353
2354 if (req->r_type == SSWRITE) {
2355
2356 /*
2357 * Init data and ship it to the ssd
2358 */
2359
2360 Pattern[0] = req->r_data.sswrite.r_pattern;
2361 /*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
2362 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
2363
2364 if (sswrite((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
2365 doio_fprintf(stderr, "sswrite() failed: %s (%d)\n%s\n",
2366 SYSERR, errno,
2367 format_sds(req, Memptr, Sdsptr, Pattern));
2368 doio_upanic(U_RVAL);
2369 return -1;
2370 }
2371 } else {
2372 /*
2373 * read from sds
2374 */
2375
2376 if (ssread((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
2377 doio_fprintf(stderr, "ssread() failed: %s (%d)\n%s\n",
2378 SYSERR, errno,
2379 format_sds(req, Memptr, Sdsptr, Pattern));
2380
2381 doio_upanic(U_RVAL);
2382 return -1;
2383 }
2384 }
2385
2386 /*
2387 * Verify data if SSWRITE and v_opt
2388 */
2389
2390 if (v_opt && req->r_type == SSWRITE) {
2391 ssread((long)Memptr, (long)Sdsptr, btoc(nbytes));
2392
2393 if (pattern_check(Memptr, nbytes, Pattern, Pattern_Length, 0) ==
2394 -1) {
2395 doio_fprintf(stderr,
2396 "sds DATA COMPARE ERROR - ABORTING\n%s\n",
2397 format_sds(req, Memptr, Sdsptr, Pattern));
2398
2399 doio_upanic(U_CORRUPTION);
2400 exit(E_COMPARE);
2401 }
2402 }
2403 }
2404
2405 #else
2406
2407 #ifdef CRAY
2408
do_ssdio(struct io_req * req)2409 int do_ssdio(struct io_req *req)
2410 {
2411 doio_fprintf(stderr,
2412 "Internal Error - do_ssdio() called on a non-cray1 system\n");
2413 alloc_mem(-1);
2414 exit(E_INTERNAL);
2415 }
2416
2417 #endif /* CRAY */
2418
2419 #endif /* _CRAY1 */
2420
fmt_ioreq(struct io_req * ioreq,struct syscall_info * sy,int fd)2421 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd)
2422 {
2423 static char *errbuf = NULL;
2424 char *cp;
2425 struct rw_req *io;
2426 struct smap *aname;
2427 #ifdef CRAY
2428 struct stat sbuf;
2429 #endif
2430 #ifdef sgi
2431 struct dioattr finfo;
2432 #endif
2433
2434 if (errbuf == NULL)
2435 errbuf = malloc(32768);
2436
2437 io = &ioreq->r_data.io;
2438
2439 /*
2440 * Look up async I/O completion strategy
2441 */
2442 for (aname = aionames;
2443 aname->value != -1 && aname->value != io->r_aio_strat; aname++) ;
2444
2445 cp = errbuf;
2446 cp += sprintf(cp, "Request number %d\n", Reqno);
2447
2448 cp +=
2449 sprintf(cp, " fd %d is file %s - open flags are %#o %s\n",
2450 fd, io->r_file, io->r_oflags, format_oflags(io->r_oflags));
2451
2452 if (sy->sy_flags & SY_WRITE) {
2453 cp +=
2454 sprintf(cp,
2455 " write done at file offset %d - pattern is %c (%#o)\n",
2456 io->r_offset,
2457 (io->r_pattern == '\0') ? '?' : io->r_pattern,
2458 io->r_pattern);
2459 } else {
2460 cp += sprintf(cp, " read done at file offset %d\n",
2461 io->r_offset);
2462 }
2463
2464 if (sy->sy_flags & SY_ASYNC) {
2465 cp +=
2466 sprintf(cp,
2467 " async io completion strategy is %s\n",
2468 aname->string);
2469 }
2470
2471 cp +=
2472 sprintf(cp,
2473 " number of requests is %d, strides per request is %d\n",
2474 io->r_nent, io->r_nstrides);
2475
2476 cp += sprintf(cp, " i/o byte count = %d\n", io->r_nbytes);
2477
2478 cp += sprintf(cp, " memory alignment is %s\n",
2479 (io->
2480 r_uflags & F_WORD_ALIGNED) ? "aligned" : "unaligned");
2481
2482 #ifdef CRAY
2483 if (io->r_oflags & O_RAW) {
2484 cp +=
2485 sprintf(cp,
2486 " RAW I/O: offset %% 4096 = %d length %% 4096 = %d\n",
2487 io->r_offset % 4096, io->r_nbytes % 4096);
2488 fstat(fd, &sbuf);
2489 cp +=
2490 sprintf(cp,
2491 " optimal file xfer size: small: %d large: %d\n",
2492 sbuf.st_blksize, sbuf.st_oblksize);
2493 cp +=
2494 sprintf(cp, " cblks %d cbits %#o\n", sbuf.st_cblks,
2495 sbuf.st_cbits);
2496 }
2497 #endif
2498 #ifdef sgi
2499 if (io->r_oflags & O_DIRECT) {
2500
2501 if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
2502 cp +=
2503 sprintf(cp,
2504 " Error %s (%d) getting direct I/O info\n",
2505 strerror(errno), errno);
2506 finfo.d_mem = 1;
2507 finfo.d_miniosz = 1;
2508 finfo.d_maxiosz = 1;
2509 }
2510
2511 cp +=
2512 sprintf(cp,
2513 " DIRECT I/O: offset %% %d = %d length %% %d = %d\n",
2514 finfo.d_miniosz, io->r_offset % finfo.d_miniosz,
2515 io->r_nbytes, io->r_nbytes % finfo.d_miniosz);
2516 cp +=
2517 sprintf(cp,
2518 " mem alignment 0x%x xfer size: small: %d large: %d\n",
2519 finfo.d_mem, finfo.d_miniosz, finfo.d_maxiosz);
2520 }
2521 #endif
2522
2523 return (errbuf);
2524 }
2525
2526 /*
2527 * Issue listio requests
2528 */
2529 #ifdef CRAY
sy_listio(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2530 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc, int fd,
2531 char *addr)
2532 {
2533 int offset, nbytes, nstrides, nents, aio_strat;
2534 int aio_id, signo, o, i, lc;
2535 char *a;
2536 struct listreq *lio_req, *l;
2537 struct aio_info *aiop;
2538 struct status *status;
2539
2540 /*
2541 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
2542 * r_nbytes are at the same offset in the read_req and reada_req
2543 * structures.
2544 */
2545 offset = req->r_data.io.r_offset;
2546 nbytes = req->r_data.io.r_nbytes;
2547 nstrides = req->r_data.io.r_nstrides;
2548 nents = req->r_data.io.r_nent;
2549 aio_strat = req->r_data.io.r_aio_strat;
2550
2551 lc = (sysc->sy_flags & SY_ASYNC) ? LC_START : LC_WAIT;
2552
2553 status = malloc(sizeof(struct status));
2554 if (status == NULL) {
2555 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2556 __FILE__, __LINE__);
2557 return NULL;
2558 }
2559 status->aioid = malloc((nents + 1) * sizeof(int));
2560 if (status->aioid == NULL) {
2561 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2562 __FILE__, __LINE__);
2563 return NULL;
2564 }
2565
2566 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2567
2568 lio_req = malloc(nents * sizeof(struct listreq));
2569 if (lio_req == NULL) {
2570 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2571 __FILE__, __LINE__);
2572 return NULL;
2573 }
2574 for (l = lio_req, a = addr, o = offset, i = 0;
2575 i < nents; l++, a += nbytes, o += nbytes, i++) {
2576
2577 aio_id = aio_register(fd, aio_strat, signo);
2578 aiop = aio_slot(aio_id);
2579 status->aioid[i] = aio_id;
2580
2581 l->li_opcode = (sysc->sy_flags & SY_WRITE) ? LO_WRITE : LO_READ;
2582 l->li_offset = o;
2583 l->li_fildes = fd;
2584 l->li_buf = a;
2585 l->li_nbyte = nbytes;
2586 l->li_status = &aiop->iosw;
2587 l->li_signo = signo;
2588 l->li_nstride = nstrides;
2589 l->li_filstride = 0;
2590 l->li_memstride = 0;
2591 l->li_drvr = 0;
2592 l->li_flags = LF_LSEEK;
2593 }
2594
2595 status->aioid[nents] = -1; /* end sentinel */
2596
2597 if ((status->rval = listio(lc, lio_req, nents)) == -1) {
2598 status->err = errno;
2599 }
2600
2601 free(lio_req);
2602 return (status);
2603 }
2604
2605 /*
2606 * Calculate the size of a request in bytes and min/max boundaries
2607 *
2608 * This assumes filestride & memstride = 0.
2609 */
listio_mem(struct io_req * req,int offset,int fmstride,int * min,int * max)2610 int listio_mem(struct io_req *req, int offset, int fmstride, int *min, int *max)
2611 {
2612 int i, size;
2613
2614 size = stride_bounds(offset, fmstride,
2615 req->r_data.io.r_nstrides * req->r_data.io.r_nent,
2616 req->r_data.io.r_nbytes, min, max);
2617 return (size);
2618 }
2619
fmt_listio(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2620 char *fmt_listio(struct io_req *req, struct syscall_info *sy, int fd,
2621 char *addr)
2622 {
2623 static char *errbuf = NULL;
2624 char *cp;
2625 char *c, *opcode;
2626 int i;
2627
2628 if (errbuf == NULL) {
2629 errbuf = malloc(32768);
2630 if (errbuf == NULL) {
2631 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2632 __FILE__, __LINE__);
2633 return NULL;
2634 }
2635 }
2636
2637 c = (sy->sy_flags & SY_ASYNC) ? "lc_wait" : "lc_start";
2638
2639 cp = errbuf;
2640 cp += sprintf(cp, "syscall: listio(%s, (?), %d)\n",
2641 c, req->r_data.io.r_nent);
2642
2643 cp += sprintf(cp, " data buffer at %#o\n", addr);
2644
2645 return (errbuf);
2646 }
2647 #endif /* CRAY */
2648
2649 #ifdef sgi
sy_pread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2650 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc, int fd,
2651 char *addr)
2652 {
2653 int rc;
2654 struct status *status;
2655
2656 rc = pread(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
2657
2658 status = malloc(sizeof(struct status));
2659 if (status == NULL) {
2660 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2661 __FILE__, __LINE__);
2662 return NULL;
2663 }
2664 status->aioid = NULL;
2665 status->rval = rc;
2666 status->err = errno;
2667
2668 return (status);
2669 }
2670
sy_pwrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2671 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc, int fd,
2672 char *addr)
2673 {
2674 int rc;
2675 struct status *status;
2676
2677 rc = pwrite(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
2678
2679 status = malloc(sizeof(struct status));
2680 if (status == NULL) {
2681 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2682 __FILE__, __LINE__);
2683 return NULL;
2684 }
2685 status->aioid = NULL;
2686 status->rval = rc;
2687 status->err = errno;
2688
2689 return (status);
2690 }
2691
fmt_pread(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2692 char *fmt_pread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2693 {
2694 static char *errbuf = NULL;
2695 char *cp;
2696
2697 if (errbuf == NULL) {
2698 errbuf = malloc(32768);
2699 if (errbuf == NULL) {
2700 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2701 __FILE__, __LINE__);
2702 return NULL;
2703 }
2704 }
2705
2706 cp = errbuf;
2707 cp += sprintf(cp, "syscall: %s(%d, 0x%lx, %d)\n",
2708 sy->sy_name, fd, addr, req->r_data.io.r_nbytes);
2709 return (errbuf);
2710 }
2711 #endif /* sgi */
2712
2713 #ifndef CRAY
sy_readv(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2714 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc, int fd,
2715 char *addr)
2716 {
2717 struct status *sy_rwv();
2718 return sy_rwv(req, sysc, fd, addr, 0);
2719 }
2720
sy_writev(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2721 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc, int fd,
2722 char *addr)
2723 {
2724 struct status *sy_rwv();
2725 return sy_rwv(req, sysc, fd, addr, 1);
2726 }
2727
sy_rwv(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2728 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc, int fd,
2729 char *addr, int rw)
2730 {
2731 int rc;
2732 struct status *status;
2733 struct iovec iov[2];
2734
2735 status = malloc(sizeof(struct status));
2736 if (status == NULL) {
2737 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2738 __FILE__, __LINE__);
2739 return NULL;
2740 }
2741 status->aioid = NULL;
2742
2743 /* move to the desired file position. */
2744 if ((rc = lseek(fd, req->r_data.io.r_offset, SEEK_SET)) == -1) {
2745 status->rval = rc;
2746 status->err = errno;
2747 return (status);
2748 }
2749
2750 iov[0].iov_base = addr;
2751 iov[0].iov_len = req->r_data.io.r_nbytes;
2752
2753 if (rw)
2754 rc = writev(fd, iov, 1);
2755 else
2756 rc = readv(fd, iov, 1);
2757 status->aioid = NULL;
2758 status->rval = rc;
2759 status->err = errno;
2760 return (status);
2761 }
2762
fmt_readv(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2763 char *fmt_readv(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2764 {
2765 static char errbuf[32768];
2766 char *cp;
2767
2768 cp = errbuf;
2769 cp += sprintf(cp, "syscall: %s(%d, (iov on stack), 1)\n",
2770 sy->sy_name, fd);
2771 return (errbuf);
2772 }
2773 #endif /* !CRAY */
2774
2775 #ifdef sgi
sy_aread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2776 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc, int fd,
2777 char *addr)
2778 {
2779 struct status *sy_arw();
2780 return sy_arw(req, sysc, fd, addr, 0);
2781 }
2782
sy_awrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2783 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc, int fd,
2784 char *addr)
2785 {
2786 struct status *sy_arw();
2787 return sy_arw(req, sysc, fd, addr, 1);
2788 }
2789
2790 /*
2791 #define sy_aread(A, B, C, D) sy_arw(A, B, C, D, 0)
2792 #define sy_awrite(A, B, C, D) sy_arw(A, B, C, D, 1)
2793 */
2794
sy_arw(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2795 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc, int fd,
2796 char *addr, int rw)
2797 {
2798 /* POSIX 1003.1b-1993 Async read */
2799 struct status *status;
2800 int rc;
2801 int aio_id, aio_strat, signo;
2802 struct aio_info *aiop;
2803
2804 status = malloc(sizeof(struct status));
2805 if (status == NULL) {
2806 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2807 __FILE__, __LINE__);
2808 return NULL;
2809 }
2810 aio_strat = req->r_data.io.r_aio_strat;
2811 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
2812
2813 aio_id = aio_register(fd, aio_strat, signo);
2814 aiop = aio_slot(aio_id);
2815
2816 memset((void *)&aiop->aiocb, 0, sizeof(aiocb_t));
2817
2818 aiop->aiocb.aio_fildes = fd;
2819 aiop->aiocb.aio_nbytes = req->r_data.io.r_nbytes;
2820 aiop->aiocb.aio_offset = req->r_data.io.r_offset;
2821 aiop->aiocb.aio_buf = addr;
2822 aiop->aiocb.aio_reqprio = 0; /* must be 0 */
2823 aiop->aiocb.aio_lio_opcode = 0;
2824
2825 if (aio_strat == A_SIGNAL) { /* siginfo(2) stuff */
2826 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
2827 aiop->aiocb.aio_sigevent.sigev_signo = signo;
2828 } else if (aio_strat == A_CALLBACK) {
2829 aiop->aiocb.aio_sigevent.sigev_signo = 0;
2830 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_CALLBACK;
2831 aiop->aiocb.aio_sigevent.sigev_func = cb_handler;
2832 aiop->aiocb.aio_sigevent.sigev_value.sival_int = aio_id;
2833 } else {
2834 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_NONE;
2835 aiop->aiocb.aio_sigevent.sigev_signo = 0;
2836 }
2837
2838 if (rw)
2839 rc = aio_write(&aiop->aiocb);
2840 else
2841 rc = aio_read(&aiop->aiocb);
2842
2843 status->aioid = malloc(2 * sizeof(int));
2844 if (status->aioid == NULL) {
2845 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2846 __FILE__, __LINE__);
2847 return NULL;
2848 }
2849 status->aioid[0] = aio_id;
2850 status->aioid[1] = -1;
2851 status->rval = rc;
2852 status->err = errno;
2853 return (status);
2854 }
2855
fmt_aread(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2856 char *fmt_aread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2857 {
2858 static char errbuf[32768];
2859 char *cp;
2860
2861 cp = errbuf;
2862 cp += sprintf(cp, "syscall: %s(&aiop->aiocb)\n", sy->sy_name);
2863 return (errbuf);
2864 }
2865 #endif /* sgi */
2866
2867 #ifndef CRAY
2868
sy_mmread(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2869 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc, int fd,
2870 char *addr)
2871 {
2872 struct status *sy_mmrw();
2873 return sy_mmrw(req, sysc, fd, addr, 0);
2874 }
2875
sy_mmwrite(struct io_req * req,struct syscall_info * sysc,int fd,char * addr)2876 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc, int fd,
2877 char *addr)
2878 {
2879 struct status *sy_mmrw();
2880 return sy_mmrw(req, sysc, fd, addr, 1);
2881 }
2882
sy_mmrw(struct io_req * req,struct syscall_info * sysc,int fd,char * addr,int rw)2883 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc, int fd,
2884 char *addr, int rw)
2885 {
2886 /*
2887 * mmap read/write
2888 * This version is oriented towards mmaping the file to memory
2889 * ONCE and keeping it mapped.
2890 */
2891 struct status *status;
2892 void *mrc = NULL, *memaddr = NULL;
2893 struct fd_cache *fdc;
2894 struct stat sbuf;
2895 int rc;
2896
2897 status = malloc(sizeof(struct status));
2898 if (status == NULL) {
2899 doio_fprintf(stderr, "malloc failed, %s/%d\n",
2900 __FILE__, __LINE__);
2901 return NULL;
2902 }
2903 status->aioid = NULL;
2904 status->rval = -1;
2905
2906 fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
2907
2908 if (v_opt || fdc->c_memaddr == NULL) {
2909 if (fstat(fd, &sbuf) < 0) {
2910 doio_fprintf(stderr, "fstat failed, errno=%d\n", errno);
2911 status->err = errno;
2912 return (status);
2913 }
2914
2915 fdc->c_memlen = (int)sbuf.st_size;
2916 mrc = mmap(NULL, (int)sbuf.st_size,
2917 rw ? PROT_WRITE | PROT_READ : PROT_READ,
2918 MAP_SHARED, fd, 0);
2919
2920 if (mrc == MAP_FAILED) {
2921 doio_fprintf(stderr, "mmap() failed - 0x%lx %d\n",
2922 mrc, errno);
2923 status->err = errno;
2924 return (status);
2925 }
2926
2927 fdc->c_memaddr = mrc;
2928 }
2929
2930 memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
2931
2932 active_mmap_rw = 1;
2933 if (rw)
2934 memcpy(memaddr, addr, req->r_data.io.r_nbytes);
2935 else
2936 memcpy(addr, memaddr, req->r_data.io.r_nbytes);
2937 if (v_opt)
2938 msync(fdc->c_memaddr, (int)sbuf.st_size, MS_SYNC);
2939 active_mmap_rw = 0;
2940
2941 status->rval = req->r_data.io.r_nbytes;
2942 status->err = 0;
2943
2944 if (v_opt) {
2945 rc = munmap(mrc, (int)sbuf.st_size);
2946 }
2947
2948 return (status);
2949 }
2950
fmt_mmrw(struct io_req * req,struct syscall_info * sy,int fd,char * addr)2951 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
2952 {
2953 static char errbuf[32768];
2954 char *cp;
2955 struct fd_cache *fdc;
2956 void *memaddr;
2957
2958 fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
2959
2960 cp = errbuf;
2961 cp += sprintf(cp, "syscall: %s(NULL, %d, %s, MAP_SHARED, %d, 0)\n",
2962 sy->sy_name,
2963 fdc->c_memlen,
2964 (sy->sy_flags & SY_WRITE) ? "PROT_WRITE" : "PROT_READ",
2965 fd);
2966
2967 cp += sprintf(cp, "\tfile is mmaped to: 0x%lx\n",
2968 (unsigned long)fdc->c_memaddr);
2969
2970 memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
2971
2972 cp += sprintf(cp, "\tfile-mem=0x%lx, length=%d, buffer=0x%lx\n",
2973 (unsigned long)memaddr, req->r_data.io.r_nbytes,
2974 (unsigned long)addr);
2975
2976 return (errbuf);
2977 }
2978 #endif /* !CRAY */
2979
2980 struct syscall_info syscalls[] = {
2981 #ifdef CRAY
2982 {"listio-read-sync", LREAD,
2983 sy_listio, NULL, fmt_listio,
2984 SY_IOSW},
2985 {"listio-read-strides-sync", LSREAD,
2986 sy_listio, listio_mem, fmt_listio,
2987 SY_IOSW},
2988 {"listio-read-reqs-sync", LEREAD,
2989 sy_listio, listio_mem, fmt_listio,
2990 SY_IOSW},
2991 {"listio-read-async", LREADA,
2992 sy_listio, NULL, fmt_listio,
2993 SY_IOSW | SY_ASYNC},
2994 {"listio-read-strides-async", LSREADA,
2995 sy_listio, listio_mem, fmt_listio,
2996 SY_IOSW | SY_ASYNC},
2997 {"listio-read-reqs-async", LEREADA,
2998 sy_listio, listio_mem, fmt_listio,
2999 SY_IOSW | SY_ASYNC},
3000 {"listio-write-sync", LWRITE,
3001 sy_listio, listio_mem, fmt_listio,
3002 SY_IOSW | SY_WRITE},
3003 {"listio-write-strides-sync", LSWRITE,
3004 sy_listio, listio_mem, fmt_listio,
3005 SY_IOSW | SY_WRITE},
3006 {"listio-write-reqs-sync", LEWRITE,
3007 sy_listio, listio_mem, fmt_listio,
3008 SY_IOSW | SY_WRITE},
3009 {"listio-write-async", LWRITEA,
3010 sy_listio, listio_mem, fmt_listio,
3011 SY_IOSW | SY_WRITE | SY_ASYNC},
3012 {"listio-write-strides-async", LSWRITEA,
3013 sy_listio, listio_mem, fmt_listio,
3014 SY_IOSW | SY_WRITE | SY_ASYNC},
3015 {"listio-write-reqs-async", LEWRITEA,
3016 sy_listio, listio_mem, fmt_listio,
3017 SY_IOSW | SY_WRITE | SY_ASYNC},
3018 #endif
3019
3020 #ifdef sgi
3021 {"aread", AREAD,
3022 sy_aread, NULL, fmt_aread,
3023 SY_IOSW | SY_ASYNC},
3024 {"awrite", AWRITE,
3025 sy_awrite, NULL, fmt_aread,
3026 SY_IOSW | SY_WRITE | SY_ASYNC},
3027 {"pread", PREAD,
3028 sy_pread, NULL, fmt_pread,
3029 0},
3030 {"pwrite", PWRITE,
3031 sy_pwrite, NULL, fmt_pread,
3032 SY_WRITE},
3033 #endif
3034
3035 #ifndef CRAY
3036 {"readv", READV,
3037 sy_readv, NULL, fmt_readv,
3038 0},
3039 {"writev", WRITEV,
3040 sy_writev, NULL, fmt_readv,
3041 SY_WRITE},
3042 {"mmap-read", MMAPR,
3043 sy_mmread, NULL, fmt_mmrw,
3044 0},
3045 {"mmap-write", MMAPW,
3046 sy_mmwrite, NULL, fmt_mmrw,
3047 SY_WRITE},
3048 #endif
3049
3050 {NULL, 0,
3051 0, 0, 0,
3052 0},
3053 };
3054
do_rw(struct io_req * req)3055 int do_rw(struct io_req *req)
3056 {
3057 static int pid = -1;
3058 int fd, offset, nbytes, nstrides, nents, oflags;
3059 int rval, mem_needed, i;
3060 int logged_write, got_lock, pattern;
3061 off_t woffset;
3062 int min_byte, max_byte;
3063 char *addr, *file, *msg;
3064 struct status *s;
3065 struct wlog_rec wrec;
3066 struct syscall_info *sy;
3067 #if defined(CRAY) || defined(sgi)
3068 struct aio_info *aiop;
3069 struct iosw *iosw;
3070 #endif
3071 #ifdef sgi
3072 struct fd_cache *fdc;
3073 #endif
3074
3075 woffset = 0;
3076
3077 /*
3078 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3079 * r_nbytes are at the same offset in the read_req and reada_req
3080 * structures.
3081 */
3082 file = req->r_data.io.r_file;
3083 oflags = req->r_data.io.r_oflags;
3084 offset = req->r_data.io.r_offset;
3085 nbytes = req->r_data.io.r_nbytes;
3086 nstrides = req->r_data.io.r_nstrides;
3087 nents = req->r_data.io.r_nent;
3088 pattern = req->r_data.io.r_pattern;
3089
3090 if (nents >= MAX_AIO) {
3091 doio_fprintf(stderr,
3092 "do_rw: too many list requests, %d. Maximum is %d\n",
3093 nents, MAX_AIO);
3094 return (-1);
3095 }
3096
3097 /*
3098 * look up system call info
3099 */
3100 for (sy = syscalls; sy->sy_name != NULL && sy->sy_type != req->r_type;
3101 sy++) ;
3102
3103 if (sy->sy_name == NULL) {
3104 doio_fprintf(stderr, "do_rw: unknown r_type %d.\n",
3105 req->r_type);
3106 return (-1);
3107 }
3108
3109 /*
3110 * Get an open file descriptor
3111 * Note: must be done before memory allocation so that the direct i/o
3112 * information is available in mem. allocate
3113 */
3114
3115 if ((fd = alloc_fd(file, oflags)) == -1)
3116 return -1;
3117
3118 /*
3119 * Allocate core memory and possibly sds space. Initialize the
3120 * data to be written. Make sure we get enough, based on the
3121 * memstride.
3122 *
3123 * need:
3124 * 1 extra word for possible partial-word address "bump"
3125 * 1 extra word for dynamic pattern overrun
3126 * MPP_BUMP extra words for T3E non-hw-aligned memory address.
3127 */
3128
3129 if (sy->sy_buffer != NULL) {
3130 mem_needed = (*sy->sy_buffer) (req, 0, 0, NULL, NULL);
3131 } else {
3132 mem_needed = nbytes;
3133 }
3134
3135 #ifdef CRAY
3136 if ((rval =
3137 alloc_mem(mem_needed + wtob(1) * 2 +
3138 MPP_BUMP * sizeof(UINT64_T))) < 0) {
3139 return rval;
3140 }
3141 #else
3142 #ifdef sgi
3143 /* get memory alignment for using DIRECT I/O */
3144 fdc = alloc_fdcache(file, oflags);
3145
3146 if ((rval = alloc_mem(mem_needed + wtob(1) * 2 + fdc->c_memalign)) < 0) {
3147 return rval;
3148 }
3149 #else
3150 /* what is !CRAY && !sgi ? */
3151 if ((rval = alloc_mem(mem_needed + wtob(1) * 2)) < 0) {
3152 return rval;
3153 }
3154 #endif /* sgi */
3155 #endif /* CRAY */
3156
3157 Pattern[0] = pattern;
3158
3159 /*
3160 * Allocate SDS space for backdoor write if desired
3161 */
3162
3163 if (oflags & O_SSD) {
3164 #ifdef CRAY
3165 #ifndef _CRAYMPP
3166 if (alloc_sds(nbytes) == -1)
3167 return -1;
3168
3169 if (sy->sy_flags & SY_WRITE) {
3170 /*pattern_fill(Memptr, mem_needed, Pattern, Pattern_Length, 0); */
3171 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length,
3172 0);
3173
3174 if (sswrite((long)Memptr, Sdsptr, btoc(mem_needed)) ==
3175 -1) {
3176 doio_fprintf(stderr,
3177 "sswrite(%d, %d, %d) failed: %s (%d)\n",
3178 (long)Memptr, Sdsptr,
3179 btoc(mem_needed), SYSERR, errno);
3180 fflush(stderr);
3181 return -1;
3182 }
3183 }
3184
3185 addr = (char *)Sdsptr;
3186 #else
3187 doio_fprintf(stderr,
3188 "Invalid O_SSD flag was generated for MPP system\n");
3189 fflush(stderr);
3190 return -1;
3191 #endif /* _CRAYMPP */
3192 #else /* CRAY */
3193 doio_fprintf(stderr,
3194 "Invalid O_SSD flag was generated for non-Cray system\n");
3195 fflush(stderr);
3196 return -1;
3197 #endif /* CRAY */
3198 } else {
3199 addr = Memptr;
3200
3201 /*
3202 * if io is not raw, bump the offset by a random amount
3203 * to generate non-word-aligned io.
3204 *
3205 * On MPP systems, raw I/O must start on an 0x80 byte boundary.
3206 * For non-aligned I/O, bump the address from 1 to 8 words.
3207 */
3208
3209 if (!(req->r_data.io.r_uflags & F_WORD_ALIGNED)) {
3210 #ifdef _CRAYMPP
3211 addr +=
3212 random_range(0, MPP_BUMP, 1, NULL) * sizeof(int);
3213 #endif
3214 addr += random_range(0, wtob(1) - 1, 1, NULL);
3215 }
3216 #ifdef sgi
3217 /*
3218 * Force memory alignment for Direct I/O
3219 */
3220 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
3221 addr +=
3222 fdc->c_memalign - ((long)addr % fdc->c_memalign);
3223 }
3224 #endif
3225
3226 /*
3227 * FILL must be done on a word-aligned buffer.
3228 * Call the fill function with Memptr which is aligned,
3229 * then memmove it to the right place.
3230 */
3231 if (sy->sy_flags & SY_WRITE) {
3232 (*Data_Fill) (Memptr, mem_needed, Pattern,
3233 Pattern_Length, 0);
3234 if (addr != Memptr)
3235 memmove(addr, Memptr, mem_needed);
3236 }
3237 }
3238
3239 rval = 0;
3240 got_lock = 0;
3241 logged_write = 0;
3242
3243 /*
3244 * Lock data if this is a write and locking option is set
3245 */
3246 if (sy->sy_flags & SY_WRITE && k_opt) {
3247 if (sy->sy_buffer != NULL) {
3248 (*sy->sy_buffer) (req, offset, 0, &min_byte, &max_byte);
3249 } else {
3250 min_byte = offset;
3251 max_byte = offset + (nbytes * nstrides * nents);
3252 }
3253
3254 if (lock_file_region(file, fd, F_WRLCK,
3255 min_byte, (max_byte - min_byte + 1)) < 0) {
3256 doio_fprintf(stderr,
3257 "file lock failed:\n%s\n",
3258 fmt_ioreq(req, sy, fd));
3259 doio_fprintf(stderr,
3260 " buffer(req, %d, 0, 0x%x, 0x%x)\n",
3261 offset, min_byte, max_byte);
3262 alloc_mem(-1);
3263 exit(E_INTERNAL);
3264 }
3265
3266 got_lock = 1;
3267 }
3268
3269 /*
3270 * Write a preliminary write-log entry. This is done so that
3271 * doio_check can do corruption detection across an interrupt/crash.
3272 * Note that w_done is set to 0. If doio_check sees this, it
3273 * re-creates the file extents as if the write completed, but does not
3274 * do any checking - see comments in doio_check for more details.
3275 */
3276
3277 if (sy->sy_flags & SY_WRITE && w_opt) {
3278 if (pid == -1) {
3279 pid = getpid();
3280 }
3281
3282 wrec.w_async = (sy->sy_flags & SY_ASYNC) ? 1 : 0;
3283 wrec.w_oflags = oflags;
3284 wrec.w_pid = pid;
3285 wrec.w_offset = offset;
3286 wrec.w_nbytes = nbytes; /* mem_needed -- total length */
3287
3288 wrec.w_pathlen = strlen(file);
3289 memcpy(wrec.w_path, file, wrec.w_pathlen);
3290 wrec.w_hostlen = strlen(Host);
3291 memcpy(wrec.w_host, Host, wrec.w_hostlen);
3292 wrec.w_patternlen = Pattern_Length;
3293 memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
3294
3295 wrec.w_done = 0;
3296
3297 if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
3298 doio_fprintf(stderr,
3299 "Could not append to write-log: %s (%d)\n",
3300 SYSERR, errno);
3301 } else {
3302 logged_write = 1;
3303 }
3304 }
3305
3306 s = (*sy->sy_syscall) (req, sy, fd, addr);
3307
3308 if (s->rval == -1) {
3309 doio_fprintf(stderr,
3310 "%s() request failed: %s (%d)\n%s\n%s\n",
3311 sy->sy_name, SYSERR, errno,
3312 fmt_ioreq(req, sy, fd),
3313 (*sy->sy_format) (req, sy, fd, addr));
3314
3315 doio_upanic(U_RVAL);
3316
3317 for (i = 0; i < nents; i++) {
3318 if (s->aioid == NULL)
3319 break;
3320 aio_unregister(s->aioid[i]);
3321 }
3322 rval = -1;
3323 } else {
3324 /*
3325 * If the syscall was async, wait for I/O to complete
3326 */
3327 #ifndef __linux__
3328 if (sy->sy_flags & SY_ASYNC) {
3329 for (i = 0; i < nents; i++) {
3330 aio_wait(s->aioid[i]);
3331 }
3332 }
3333 #endif
3334
3335 /*
3336 * Check the syscall how-much-data-written return. Look
3337 * for this in either the return value or the 'iosw'
3338 * structure.
3339 */
3340
3341 if (sy->sy_flags & SY_IOSW) {
3342 #ifdef CRAY
3343 for (i = 0; i < nents; i++) {
3344 if (s->aioid == NULL)
3345 break; /* >>> error condition? */
3346 aiop = aio_slot(s->aioid[i]);
3347 iosw = &aiop->iosw;
3348 if (iosw->sw_error != 0) {
3349 doio_fprintf(stderr,
3350 "%s() iosw error set: %s\n%s\n%s\n",
3351 sy->sy_name,
3352 strerror(iosw->sw_error),
3353 fmt_ioreq(req, sy, fd),
3354 (*sy->sy_format) (req, sy,
3355 fd,
3356 addr));
3357 doio_upanic(U_IOSW);
3358 rval = -1;
3359 } else if (iosw->sw_count != nbytes * nstrides) {
3360 doio_fprintf(stderr,
3361 "Bad iosw from %s() #%d\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n%s\n",
3362 sy->sy_name, i,
3363 1, 0, nbytes * nstrides,
3364 iosw->sw_flag,
3365 iosw->sw_error,
3366 iosw->sw_count,
3367 fmt_ioreq(req, sy, fd),
3368 (*sy->sy_format) (req, sy,
3369 fd,
3370 addr));
3371 doio_upanic(U_IOSW);
3372 rval = -1;
3373 }
3374
3375 aio_unregister(s->aioid[i]);
3376 }
3377 #endif /* CRAY */
3378 #ifdef sgi
3379 for (i = 0; s->aioid[i] != -1; i++) {
3380 if (s->aioid == NULL) {
3381 doio_fprintf(stderr,
3382 "aioid == NULL!\n");
3383 break;
3384 }
3385 aiop = aio_slot(s->aioid[i]);
3386
3387 /*
3388 * make sure the io completed without error
3389 */
3390 if (aiop->aio_errno != 0) {
3391 doio_fprintf(stderr,
3392 "%s() aio error set: %s (%d)\n%s\n%s\n",
3393 sy->sy_name,
3394 strerror(aiop->aio_errno),
3395 aiop->aio_errno,
3396 fmt_ioreq(req, sy, fd),
3397 (*sy->sy_format) (req, sy,
3398 fd,
3399 addr));
3400 doio_upanic(U_IOSW);
3401 rval = -1;
3402 } else if (aiop->aio_ret != nbytes) {
3403 doio_fprintf(stderr,
3404 "Bad aio return from %s() #%d\nExpected (%d,%d), got (%d,%d)\n%s\n%s\n",
3405 sy->sy_name, i,
3406 0, nbytes,
3407 aiop->aio_errno,
3408 aiop->aio_ret,
3409 fmt_ioreq(req, sy, fd),
3410 (*sy->sy_format) (req, sy,
3411 fd,
3412 addr));
3413 aio_unregister(s->aioid[i]);
3414 doio_upanic(U_IOSW);
3415 return -1;
3416 } else {
3417 aio_unregister(s->aioid[i]);
3418 rval = 0;
3419 }
3420 }
3421 #endif /* sgi */
3422 } else {
3423
3424 if (s->rval != mem_needed) {
3425 doio_fprintf(stderr,
3426 "%s() request returned wrong # of bytes - expected %d, got %d\n%s\n%s\n",
3427 sy->sy_name, nbytes, s->rval,
3428 fmt_ioreq(req, sy, fd),
3429 (*sy->sy_format) (req, sy, fd,
3430 addr));
3431 rval = -1;
3432 doio_upanic(U_RVAL);
3433 }
3434 }
3435 }
3436
3437 /*
3438 * Verify that the data was written correctly - check_file() returns
3439 * a non-null pointer which contains an error message if there are
3440 * problems.
3441 */
3442
3443 if (rval == 0 && sy->sy_flags & SY_WRITE && v_opt) {
3444 msg = check_file(file, offset, nbytes * nstrides * nents,
3445 Pattern, Pattern_Length, 0,
3446 oflags & O_PARALLEL);
3447 if (msg != NULL) {
3448 doio_fprintf(stderr, "%s\n%s\n%s\n",
3449 msg,
3450 fmt_ioreq(req, sy, fd),
3451 (*sy->sy_format) (req, sy, fd, addr));
3452 doio_upanic(U_CORRUPTION);
3453 exit(E_COMPARE);
3454 }
3455 }
3456
3457 /*
3458 * General cleanup ...
3459 *
3460 * Write extent information to the write-log, so that doio_check can do
3461 * corruption detection. Note that w_done is set to 1, indicating that
3462 * the write has been verified as complete. We don't need to write the
3463 * filename on the second logging.
3464 */
3465
3466 if (w_opt && logged_write) {
3467 wrec.w_done = 1;
3468 wlog_record_write(&Wlog, &wrec, woffset);
3469 }
3470
3471 /*
3472 * Unlock file region if necessary
3473 */
3474
3475 if (got_lock) {
3476 if (lock_file_region(file, fd, F_UNLCK,
3477 min_byte, (max_byte - min_byte + 1)) < 0) {
3478 alloc_mem(-1);
3479 exit(E_INTERNAL);
3480 }
3481 }
3482
3483 if (s->aioid != NULL)
3484 free(s->aioid);
3485 free(s);
3486 return (rval == -1) ? -1 : 0;
3487 }
3488
3489 /*
3490 * fcntl-based requests
3491 * - F_FRESVSP
3492 * - F_UNRESVSP
3493 * - F_FSYNC
3494 */
3495 #ifdef sgi
do_fcntl(struct io_req * req)3496 int do_fcntl(struct io_req *req)
3497 {
3498 int fd, oflags, offset, nbytes;
3499 int rval, op;
3500 int got_lock;
3501 int min_byte, max_byte;
3502 char *file, *msg;
3503 struct flock flk;
3504
3505 /*
3506 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3507 * r_nbytes are at the same offset in the read_req and reada_req
3508 * structures.
3509 */
3510 file = req->r_data.io.r_file;
3511 oflags = req->r_data.io.r_oflags;
3512 offset = req->r_data.io.r_offset;
3513 nbytes = req->r_data.io.r_nbytes;
3514
3515 flk.l_type = 0;
3516 flk.l_whence = SEEK_SET;
3517 flk.l_start = offset;
3518 flk.l_len = nbytes;
3519
3520 /*
3521 * Get an open file descriptor
3522 */
3523
3524 if ((fd = alloc_fd(file, oflags)) == -1)
3525 return -1;
3526
3527 rval = 0;
3528 got_lock = 0;
3529
3530 /*
3531 * Lock data if this is locking option is set
3532 */
3533 if (k_opt) {
3534 min_byte = offset;
3535 max_byte = offset + nbytes;
3536
3537 if (lock_file_region(file, fd, F_WRLCK,
3538 min_byte, (nbytes + 1)) < 0) {
3539 doio_fprintf(stderr, "file lock failed:\n");
3540 doio_fprintf(stderr,
3541 " buffer(req, %d, 0, 0x%x, 0x%x)\n",
3542 offset, min_byte, max_byte);
3543 alloc_mem(-1);
3544 exit(E_INTERNAL);
3545 }
3546
3547 got_lock = 1;
3548 }
3549
3550 switch (req->r_type) {
3551 case RESVSP:
3552 op = F_RESVSP;
3553 msg = "f_resvsp";
3554 break;
3555 case UNRESVSP:
3556 op = F_UNRESVSP;
3557 msg = "f_unresvsp";
3558 break;
3559 #ifdef F_FSYNC
3560 case DFFSYNC:
3561 op = F_FSYNC;
3562 msg = "f_fsync";
3563 break;
3564 #endif
3565 }
3566
3567 rval = fcntl(fd, op, &flk);
3568
3569 if (rval == -1) {
3570 doio_fprintf(stderr,
3571 "fcntl %s request failed: %s (%d)\n\tfcntl(%d, %s %d, {%d %lld ==> %lld}\n",
3572 msg, SYSERR, errno,
3573 fd, msg, op, flk.l_whence,
3574 (long long)flk.l_start, (long long)flk.l_len);
3575
3576 doio_upanic(U_RVAL);
3577 rval = -1;
3578 }
3579
3580 /*
3581 * Unlock file region if necessary
3582 */
3583
3584 if (got_lock) {
3585 if (lock_file_region(file, fd, F_UNLCK,
3586 min_byte, (max_byte - min_byte + 1)) < 0) {
3587 alloc_mem(-1);
3588 exit(E_INTERNAL);
3589 }
3590 }
3591
3592 return (rval == -1) ? -1 : 0;
3593 }
3594 #endif /* sgi */
3595
3596 /*
3597 * fsync(2) and fdatasync(2)
3598 */
3599 #ifndef CRAY
do_sync(struct io_req * req)3600 int do_sync(struct io_req *req)
3601 {
3602 int fd, oflags;
3603 int rval;
3604 char *file;
3605
3606 /*
3607 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
3608 * r_nbytes are at the same offset in the read_req and reada_req
3609 * structures.
3610 */
3611 file = req->r_data.io.r_file;
3612 oflags = req->r_data.io.r_oflags;
3613
3614 /*
3615 * Get an open file descriptor
3616 */
3617
3618 if ((fd = alloc_fd(file, oflags)) == -1)
3619 return -1;
3620
3621 rval = 0;
3622 switch (req->r_type) {
3623 case FSYNC2:
3624 rval = fsync(fd);
3625 break;
3626 case FDATASYNC:
3627 rval = fdatasync(fd);
3628 break;
3629 default:
3630 rval = -1;
3631 }
3632 return (rval == -1) ? -1 : 0;
3633 }
3634 #endif /* !CRAY */
3635
3636 int
doio_pat_fill(char * addr,int mem_needed,char * Pattern,int Pattern_Length,int shift)3637 doio_pat_fill(char *addr, int mem_needed, char *Pattern, int Pattern_Length,
3638 int shift)
3639 {
3640 return pattern_fill(addr, mem_needed, Pattern, Pattern_Length, 0);
3641 }
3642
doio_pat_check(char * buf,int offset,int length,char * pattern,int pattern_length,int patshift)3643 char *doio_pat_check(char *buf, int offset, int length, char *pattern,
3644 int pattern_length, int patshift)
3645 {
3646 static char errbuf[4096];
3647 int nb, i, pattern_index;
3648 char *cp, *bufend, *ep;
3649 char actual[33], expected[33];
3650
3651 if (pattern_check(buf, length, pattern, pattern_length, patshift) != 0) {
3652 ep = errbuf;
3653 ep +=
3654 sprintf(ep,
3655 "Corrupt regions follow - unprintable chars are represented as '.'\n");
3656 ep +=
3657 sprintf(ep,
3658 "-----------------------------------------------------------------\n");
3659
3660 pattern_index = patshift % pattern_length;;
3661 cp = buf;
3662 bufend = buf + length;
3663
3664 while (cp < bufend) {
3665 if (*cp != pattern[pattern_index]) {
3666 nb = bufend - cp;
3667 if ((unsigned int)nb > sizeof(expected) - 1) {
3668 nb = sizeof(expected) - 1;
3669 }
3670
3671 ep +=
3672 sprintf(ep,
3673 "corrupt bytes starting at file offset %d\n",
3674 offset + (int)(cp - buf));
3675
3676 /*
3677 * Fill in the expected and actual patterns
3678 */
3679 memset(expected, 0x00, sizeof(expected));
3680 memset(actual, 0x00, sizeof(actual));
3681
3682 for (i = 0; i < nb; i++) {
3683 expected[i] =
3684 pattern[(pattern_index +
3685 i) % pattern_length];
3686 if (!isprint(expected[i])) {
3687 expected[i] = '.';
3688 }
3689
3690 actual[i] = cp[i];
3691 if (!isprint(actual[i])) {
3692 actual[i] = '.';
3693 }
3694 }
3695
3696 ep +=
3697 sprintf(ep,
3698 " 1st %2d expected bytes: %s\n",
3699 nb, expected);
3700 ep +=
3701 sprintf(ep,
3702 " 1st %2d actual bytes: %s\n",
3703 nb, actual);
3704 fflush(stderr);
3705 return errbuf;
3706 } else {
3707 cp++;
3708 pattern_index++;
3709
3710 if (pattern_index == pattern_length) {
3711 pattern_index = 0;
3712 }
3713 }
3714 }
3715 return errbuf;
3716 }
3717
3718 return NULL;
3719 }
3720
3721 /*
3722 * Check the contents of a file beginning at offset, for length bytes. It
3723 * is assumed that there is a string of pattern bytes in this area of the
3724 * file. Use normal buffered reads to do the verification.
3725 *
3726 * If there is a data mismatch, write a detailed message into a static buffer
3727 * suitable for the caller to print. Otherwise print NULL.
3728 *
3729 * The fsa flag is set to non-zero if the buffer should be read back through
3730 * the FSA (unicos/mk). This implies the file will be opened
3731 * O_PARALLEL|O_RAW|O_WELLFORMED to do the validation. We must do this because
3732 * FSA will not allow the file to be opened for buffered io if it was
3733 * previously opened for O_PARALLEL io.
3734 */
3735
check_file(char * file,int offset,int length,char * pattern,int pattern_length,int patshift,int fsa)3736 char *check_file(char *file, int offset, int length, char *pattern,
3737 int pattern_length, int patshift, int fsa)
3738 {
3739 static char errbuf[4096];
3740 int fd, nb, flags;
3741 char *buf, *em, *ep;
3742 #ifdef sgi
3743 struct fd_cache *fdc;
3744 #endif
3745
3746 buf = Memptr;
3747
3748 if (V_opt) {
3749 flags = Validation_Flags | O_RDONLY;
3750 } else {
3751 flags = O_RDONLY;
3752 if (fsa) {
3753 #ifdef CRAY
3754 flags |= O_PARALLEL | O_RAW | O_WELLFORMED;
3755 #endif
3756 }
3757 }
3758
3759 if ((fd = alloc_fd(file, flags)) == -1) {
3760 sprintf(errbuf,
3761 "Could not open file %s with flags %#o (%s) for data comparison: %s (%d)\n",
3762 file, flags, format_oflags(flags), SYSERR, errno);
3763 return errbuf;
3764 }
3765
3766 if (lseek(fd, offset, SEEK_SET) == -1) {
3767 sprintf(errbuf,
3768 "Could not lseek to offset %d in %s for verification: %s (%d)\n",
3769 offset, file, SYSERR, errno);
3770 return errbuf;
3771 }
3772 #ifdef sgi
3773 /* Irix: Guarantee a properly aligned address on Direct I/O */
3774 fdc = alloc_fdcache(file, flags);
3775 if ((flags & O_DIRECT) && ((long)buf % fdc->c_memalign != 0)) {
3776 buf += fdc->c_memalign - ((long)buf % fdc->c_memalign);
3777 }
3778 #endif
3779
3780 if ((nb = read(fd, buf, length)) == -1) {
3781 #ifdef sgi
3782 sprintf(errbuf,
3783 "Could not read %d bytes from %s for verification: %s (%d)\n\tread(%d, 0x%lx, %d)\n\tbuf %% alignment(%d) = %ld\n",
3784 length, file, SYSERR, errno,
3785 fd, buf, length,
3786 fdc->c_memalign, (long)buf % fdc->c_memalign);
3787 #else
3788 sprintf(errbuf,
3789 "Could not read %d bytes from %s for verification: %s (%d)\n",
3790 length, file, SYSERR, errno);
3791
3792 #endif
3793 return errbuf;
3794 }
3795
3796 if (nb != length) {
3797 sprintf(errbuf,
3798 "Read wrong # bytes from %s. Expected %d, got %d\n",
3799 file, length, nb);
3800 return errbuf;
3801 }
3802
3803 if ((em =
3804 (*Data_Check) (buf, offset, length, pattern, pattern_length,
3805 patshift)) != NULL) {
3806 ep = errbuf;
3807 ep += sprintf(ep, "*** DATA COMPARISON ERROR ***\n");
3808 ep +=
3809 sprintf(ep, "check_file(%s, %d, %d, %s, %d, %d) failed\n\n",
3810 file, offset, length, pattern, pattern_length,
3811 patshift);
3812 ep +=
3813 sprintf(ep, "Comparison fd is %d, with open flags %#o\n",
3814 fd, flags);
3815 strcpy(ep, em);
3816 return (errbuf);
3817 }
3818 return NULL;
3819 }
3820
3821 /*
3822 * Function to single-thread stdio output.
3823 */
3824
doio_fprintf(FILE * stream,char * format,...)3825 int doio_fprintf(FILE * stream, char *format, ...)
3826 {
3827 static int pid = -1;
3828 char *date;
3829 int rval;
3830 struct flock flk;
3831 va_list arglist;
3832 struct timeval ts;
3833 gettimeofday(&ts, NULL);
3834 date = hms(ts.tv_sec);
3835
3836 if (pid == -1) {
3837 pid = getpid();
3838 }
3839
3840 flk.l_whence = flk.l_start = flk.l_len = 0;
3841 flk.l_type = F_WRLCK;
3842 fcntl(fileno(stream), F_SETLKW, &flk);
3843
3844 va_start(arglist, format);
3845 rval = fprintf(stream, "\n%s%s (%5d) %s\n", Prog, TagName, pid, date);
3846 rval += fprintf(stream, "---------------------\n");
3847 vfprintf(stream, format, arglist);
3848 va_end(arglist);
3849
3850 fflush(stream);
3851
3852 flk.l_type = F_UNLCK;
3853 fcntl(fileno(stream), F_SETLKW, &flk);
3854
3855 return rval;
3856 }
3857
3858 /*
3859 * Simple function for allocating core memory. Uses Memsize and Memptr to
3860 * keep track of the current amount allocated.
3861 */
3862 #ifndef CRAY
alloc_mem(int nbytes)3863 int alloc_mem(int nbytes)
3864 {
3865 char *cp;
3866 void *addr;
3867 int me = 0, flags, key, shmid;
3868 static int mturn = 0; /* which memory type to use */
3869 struct memalloc *M;
3870 char filename[255];
3871 #ifdef __linux__
3872 struct shmid_ds shm_ds;
3873 #endif
3874
3875 #ifdef __linux__
3876 memset(&shm_ds, 0x00, sizeof(struct shmid_ds));
3877 #endif
3878
3879 /* nbytes = -1 means "free all allocated memory" */
3880 if (nbytes == -1) {
3881
3882 for (me = 0; me < Nmemalloc; me++) {
3883 if (Memalloc[me].space == NULL)
3884 continue;
3885
3886 switch (Memalloc[me].memtype) {
3887 case MEM_DATA:
3888 #ifdef sgi
3889 if (Memalloc[me].flags & MEMF_MPIN)
3890 munpin(Memalloc[me].space,
3891 Memalloc[me].size);
3892 #endif
3893 free(Memalloc[me].space);
3894 Memalloc[me].space = NULL;
3895 Memptr = NULL;
3896 Memsize = 0;
3897 break;
3898 case MEM_SHMEM:
3899 #ifdef sgi
3900 if (Memalloc[me].flags & MEMF_MPIN)
3901 munpin(Memalloc[me].space,
3902 Memalloc[me].size);
3903 #endif
3904 shmdt(Memalloc[me].space);
3905 Memalloc[me].space = NULL;
3906 #ifdef sgi
3907 shmctl(Memalloc[me].fd, IPC_RMID);
3908 #else
3909 shmctl(Memalloc[me].fd, IPC_RMID, &shm_ds);
3910 #endif
3911 break;
3912 case MEM_MMAP:
3913 #ifdef sgi
3914 if (Memalloc[me].flags & MEMF_MPIN)
3915 munpin(Memalloc[me].space,
3916 Memalloc[me].size);
3917 #endif
3918 munmap(Memalloc[me].space, Memalloc[me].size);
3919 close(Memalloc[me].fd);
3920 if (Memalloc[me].flags & MEMF_FILE) {
3921 unlink(Memalloc[me].name);
3922 }
3923 Memalloc[me].space = NULL;
3924 break;
3925 default:
3926 doio_fprintf(stderr,
3927 "alloc_mem: HELP! Unknown memory space type %d index %d\n",
3928 Memalloc[me].memtype, me);
3929 break;
3930 }
3931 }
3932 return 0;
3933 }
3934
3935 /*
3936 * Select a memory area (currently round-robbin)
3937 */
3938
3939 if (mturn >= Nmemalloc)
3940 mturn = 0;
3941
3942 M = &Memalloc[mturn];
3943
3944 switch (M->memtype) {
3945 case MEM_DATA:
3946 if (nbytes > M->size) {
3947 if (M->space != NULL) {
3948 #ifdef sgi
3949 if (M->flags & MEMF_MPIN)
3950 munpin(M->space, M->size);
3951 #endif
3952 free(M->space);
3953 }
3954 M->space = NULL;
3955 M->size = 0;
3956 }
3957
3958 if (M->space == NULL) {
3959 if ((cp = malloc(nbytes)) == NULL) {
3960 doio_fprintf(stderr,
3961 "malloc(%d) failed: %s (%d)\n",
3962 nbytes, SYSERR, errno);
3963 return -1;
3964 }
3965 #ifdef sgi
3966 if (M->flags & MEMF_MPIN) {
3967 if (mpin(cp, nbytes) == -1) {
3968 doio_fprintf(stderr,
3969 "mpin(0x%lx, %d) failed: %s (%d)\n",
3970 cp, nbytes, SYSERR, errno);
3971 }
3972 }
3973 #endif
3974 M->space = (void *)cp;
3975 M->size = nbytes;
3976 }
3977 break;
3978
3979 case MEM_MMAP:
3980 if (nbytes > M->size) {
3981 if (M->space != NULL) {
3982 #ifdef sgi
3983 if (M->flags & MEMF_MPIN)
3984 munpin(M->space, M->size);
3985 #endif
3986 munmap(M->space, M->size);
3987 close(M->fd);
3988 if (M->flags & MEMF_FILE)
3989 unlink(M->name);
3990 }
3991 M->space = NULL;
3992 M->size = 0;
3993 }
3994
3995 if (M->space == NULL) {
3996 if (strchr(M->name, '%')) {
3997 sprintf(filename, M->name, getpid());
3998 M->name = strdup(filename);
3999 }
4000
4001 if ((M->fd =
4002 open(M->name, O_CREAT | O_RDWR, 0666)) == -1) {
4003 doio_fprintf(stderr,
4004 "alloc_mmap: error %d (%s) opening '%s'\n",
4005 errno, SYSERR, M->name);
4006 return (-1);
4007 }
4008
4009 addr = NULL;
4010 flags = 0;
4011 M->size = nbytes * 4;
4012
4013 /* bias addr if MEMF_ADDR | MEMF_FIXADDR */
4014 /* >>> how to pick a memory address? */
4015
4016 /* bias flags on MEMF_PRIVATE etc */
4017 if (M->flags & MEMF_PRIVATE)
4018 flags |= MAP_PRIVATE;
4019 #ifdef sgi
4020 if (M->flags & MEMF_LOCAL)
4021 flags |= MAP_LOCAL;
4022 if (M->flags & MEMF_AUTORESRV)
4023 flags |= MAP_AUTORESRV;
4024 if (M->flags & MEMF_AUTOGROW)
4025 flags |= MAP_AUTOGROW;
4026 #endif
4027 if (M->flags & MEMF_SHARED)
4028 flags |= MAP_SHARED;
4029
4030 /*printf("alloc_mem, about to mmap, fd=%d, name=(%s)\n", M->fd, M->name);*/
4031 if ((M->space = mmap(addr, M->size,
4032 PROT_READ | PROT_WRITE,
4033 flags, M->fd, 0))
4034 == MAP_FAILED) {
4035 doio_fprintf(stderr,
4036 "alloc_mem: mmap error. errno %d (%s)\n\tmmap(addr 0x%x, size %d, read|write 0x%x, mmap flags 0x%x [%#o], fd %d, 0)\n\tfile %s\n",
4037 errno, SYSERR, addr, M->size,
4038 PROT_READ | PROT_WRITE, flags,
4039 M->flags, M->fd, M->name);
4040 doio_fprintf(stderr, "\t%s%s%s%s%s",
4041 (flags & MAP_PRIVATE) ? "private "
4042 : "",
4043 #ifdef sgi
4044 (flags & MAP_LOCAL) ? "local " :
4045 "",
4046 (flags & MAP_AUTORESRV) ?
4047 "autoresrv " : "",
4048 (flags & MAP_AUTOGROW) ?
4049 "autogrow " : "",
4050 #endif
4051 (flags & MAP_SHARED) ? "shared" :
4052 "");
4053 return (-1);
4054 }
4055 }
4056 break;
4057
4058 case MEM_SHMEM:
4059 if (nbytes > M->size) {
4060 if (M->space != NULL) {
4061 #ifdef sgi
4062 if (M->flags & MEMF_MPIN)
4063 munpin(M->space, M->size);
4064 #endif
4065 shmdt(M->space);
4066 #ifdef sgi
4067 shmctl(M->fd, IPC_RMID);
4068 #else
4069 shmctl(M->fd, IPC_RMID, &shm_ds);
4070 #endif
4071 }
4072 M->space = NULL;
4073 M->size = 0;
4074 }
4075
4076 if (M->space == NULL) {
4077 if (!strcmp(M->name, "private")) {
4078 key = IPC_PRIVATE;
4079 } else {
4080 sscanf(M->name, "%i", &key);
4081 }
4082
4083 M->size = M->nblks ? M->nblks * 512 : nbytes;
4084
4085 if (nbytes > M->size) {
4086 #ifdef DEBUG
4087 doio_fprintf(stderr,
4088 "MEM_SHMEM: nblks(%d) too small: nbytes=%d Msize=%d, skipping this req.\n",
4089 M->nblks, nbytes, M->size);
4090 #endif
4091 return SKIP_REQ;
4092 }
4093
4094 shmid = shmget(key, M->size, IPC_CREAT | 0666);
4095 if (shmid == -1) {
4096 doio_fprintf(stderr,
4097 "shmget(0x%x, %d, CREAT) failed: %s (%d)\n",
4098 key, M->size, SYSERR, errno);
4099 return (-1);
4100 }
4101 M->fd = shmid;
4102 M->space = shmat(shmid, NULL, SHM_RND);
4103 if (M->space == (void *)-1) {
4104 doio_fprintf(stderr,
4105 "shmat(0x%x, NULL, SHM_RND) failed: %s (%d)\n",
4106 shmid, SYSERR, errno);
4107 return (-1);
4108 }
4109 #ifdef sgi
4110 if (M->flags & MEMF_MPIN) {
4111 if (mpin(M->space, M->size) == -1) {
4112 doio_fprintf(stderr,
4113 "mpin(0x%lx, %d) failed: %s (%d)\n",
4114 M->space, M->size, SYSERR,
4115 errno);
4116 }
4117 }
4118 #endif
4119 }
4120 break;
4121
4122 default:
4123 doio_fprintf(stderr,
4124 "alloc_mem: HELP! Unknown memory space type %d index %d\n",
4125 Memalloc[me].memtype, mturn);
4126 break;
4127 }
4128
4129 Memptr = M->space;
4130 Memsize = M->size;
4131
4132 mturn++;
4133 return 0;
4134 }
4135 #else /* CRAY */
alloc_mem(int nbytes)4136 int alloc_mem(int nbytes)
4137 {
4138 char *cp;
4139 int ip;
4140 static char *malloc_space;
4141
4142 /*
4143 * The "unicos" version of this did some stuff with sbrk;
4144 * this caused problems with async I/O on irix, and now appears
4145 * to be causing problems with FSA I/O on unicos/mk.
4146 */
4147 #ifdef NOTDEF
4148 if (nbytes > Memsize) {
4149 if ((cp = (char *)sbrk(nbytes - Memsize)) == (char *)-1) {
4150 doio_fprintf(stderr, "sbrk(%d) failed: %s (%d)\n",
4151 nbytes - Memsize, SYSERR, errno);
4152 return -1;
4153 }
4154
4155 if (Memsize == 0)
4156 Memptr = cp;
4157 Memsize += nbytes - Memsize;
4158 }
4159 #else
4160
4161 /* nbytes = -1 means "free all allocated memory" */
4162 if (nbytes == -1) {
4163 free(malloc_space);
4164 Memptr = NULL;
4165 Memsize = 0;
4166 return 0;
4167 }
4168
4169 if (nbytes > Memsize) {
4170 if (Memsize != 0)
4171 free(malloc_space);
4172
4173 if ((cp = malloc_space = malloc(nbytes)) == NULL) {
4174 doio_fprintf(stderr, "malloc(%d) failed: %s (%d)\n",
4175 nbytes, SYSERR, errno);
4176 return -1;
4177 }
4178 #ifdef _CRAYT3E
4179 /* T3E requires memory to be aligned on 0x40 word boundaries */
4180 ip = (int)cp;
4181 if (ip & 0x3F != 0) {
4182 doio_fprintf(stderr,
4183 "malloc(%d) = 0x%x(0x%x) not aligned by 0x%x\n",
4184 nbytes, cp, ip, ip & 0x3f);
4185
4186 free(cp);
4187 if ((cp = malloc_space = malloc(nbytes + 0x40)) == NULL) {
4188 doio_fprintf(stderr,
4189 "malloc(%d) failed: %s (%d)\n",
4190 nbytes, SYSERR, errno);
4191 return -1;
4192 }
4193 ip = (int)cp;
4194 cp += (0x40 - (ip & 0x3F));
4195 }
4196 #endif /* _CRAYT3E */
4197 Memptr = cp;
4198 Memsize = nbytes;
4199 }
4200 #endif /* NOTDEF */
4201 return 0;
4202 }
4203 #endif /* CRAY */
4204
4205 /*
4206 * Simple function for allocating sds space. Uses Sdssize and Sdsptr to
4207 * keep track of location and size of currently allocated chunk.
4208 */
4209
4210 #ifdef _CRAY1
4211
alloc_sds(int nbytes)4212 int alloc_sds(int nbytes)
4213 {
4214 int nblks;
4215
4216 if (nbytes > Sdssize) {
4217 if ((nblks = ssbreak(btoc(nbytes - Sdssize))) == -1) {
4218 doio_fprintf(stderr, "ssbreak(%d) failed: %s (%d)\n",
4219 btoc(nbytes - Sdssize), SYSERR, errno);
4220 return -1;
4221 }
4222
4223 Sdssize = ctob(nblks);
4224 Sdsptr = 0;
4225 }
4226
4227 return 0;
4228 }
4229
4230 #else
4231
4232 #ifdef CRAY
4233
alloc_sds(int nbytes)4234 int alloc_sds(int nbytes)
4235 {
4236 doio_fprintf(stderr,
4237 "Internal Error - alloc_sds() called on a CRAY2 system\n");
4238 alloc_mem(-1);
4239 exit(E_INTERNAL);
4240 }
4241
4242 #endif
4243
4244 #endif /* _CRAY1 */
4245
4246 /*
4247 * Function to maintain a file descriptor cache, so that doio does not have
4248 * to do so many open() and close() calls. Descriptors are stored in the
4249 * cache by file name, and open flags. Each entry also has a _rtc value
4250 * associated with it which is used in aging. If doio cannot open a file
4251 * because it already has too many open (ie. system limit hit) it will close
4252 * the one in the cache that has the oldest _rtc value.
4253 *
4254 * If alloc_fd() is called with a file of NULL, it will close all descriptors
4255 * in the cache, and free the memory in the cache.
4256 */
4257
alloc_fd(char * file,int oflags)4258 int alloc_fd(char *file, int oflags)
4259 {
4260 struct fd_cache *fdc;
4261 struct fd_cache *alloc_fdcache(char *file, int oflags);
4262
4263 fdc = alloc_fdcache(file, oflags);
4264 if (fdc != NULL)
4265 return (fdc->c_fd);
4266 else
4267 return (-1);
4268 }
4269
alloc_fdcache(char * file,int oflags)4270 struct fd_cache *alloc_fdcache(char *file, int oflags)
4271 {
4272 int fd;
4273 struct fd_cache *free_slot, *oldest_slot, *cp;
4274 static int cache_size = 0;
4275 static struct fd_cache *cache = NULL;
4276 #ifdef sgi
4277 struct dioattr finfo;
4278 #endif
4279
4280 /*
4281 * If file is NULL, it means to free up the fd cache.
4282 */
4283
4284 if (file == NULL && cache != NULL) {
4285 for (cp = cache; cp < &cache[cache_size]; cp++) {
4286 if (cp->c_fd != -1) {
4287 close(cp->c_fd);
4288 }
4289 #ifndef CRAY
4290 if (cp->c_memaddr != NULL) {
4291 munmap(cp->c_memaddr, cp->c_memlen);
4292 }
4293 #endif
4294 }
4295
4296 free(cache);
4297 cache = NULL;
4298 cache_size = 0;
4299 return 0;
4300 }
4301
4302 free_slot = NULL;
4303 oldest_slot = NULL;
4304
4305 /*
4306 * Look for a fd in the cache. If one is found, return it directly.
4307 * Otherwise, when this loop exits, oldest_slot will point to the
4308 * oldest fd slot in the cache, and free_slot will point to an
4309 * unoccupied slot if there are any.
4310 */
4311
4312 for (cp = cache; cp != NULL && cp < &cache[cache_size]; cp++) {
4313 if (cp->c_fd != -1 &&
4314 cp->c_oflags == oflags && strcmp(cp->c_file, file) == 0) {
4315 #ifdef CRAY
4316 cp->c_rtc = _rtc();
4317 #else
4318 cp->c_rtc = Reqno;
4319 #endif
4320 return cp;
4321 }
4322
4323 if (cp->c_fd == -1) {
4324 if (free_slot == NULL) {
4325 free_slot = cp;
4326 }
4327 } else {
4328 if (oldest_slot == NULL ||
4329 cp->c_rtc < oldest_slot->c_rtc) {
4330 oldest_slot = cp;
4331 }
4332 }
4333 }
4334
4335 /*
4336 * No matching file/oflags pair was found in the cache. Attempt to
4337 * open a new fd.
4338 */
4339
4340 if ((fd = open(file, oflags, 0666)) < 0) {
4341 if (errno != EMFILE) {
4342 doio_fprintf(stderr,
4343 "Could not open file %s with flags %#o (%s): %s (%d)\n",
4344 file, oflags, format_oflags(oflags),
4345 SYSERR, errno);
4346 alloc_mem(-1);
4347 exit(E_SETUP);
4348 }
4349
4350 /*
4351 * If we get here, we have as many open fd's as we can have.
4352 * Close the oldest one in the cache (pointed to by
4353 * oldest_slot), and attempt to re-open.
4354 */
4355
4356 close(oldest_slot->c_fd);
4357 oldest_slot->c_fd = -1;
4358 free_slot = oldest_slot;
4359
4360 if ((fd = open(file, oflags, 0666)) < 0) {
4361 doio_fprintf(stderr,
4362 "Could not open file %s with flags %#o (%s): %s (%d)\n",
4363 file, oflags, format_oflags(oflags),
4364 SYSERR, errno);
4365 alloc_mem(-1);
4366 exit(E_SETUP);
4367 }
4368 }
4369
4370 /*printf("alloc_fd: new file %s flags %#o fd %d\n", file, oflags, fd);*/
4371
4372 /*
4373 * If we get here, fd is our open descriptor. If free_slot is NULL,
4374 * we need to grow the cache, otherwise free_slot is the slot that
4375 * should hold the fd info.
4376 */
4377
4378 if (free_slot == NULL) {
4379 cache =
4380 (struct fd_cache *)realloc(cache,
4381 sizeof(struct fd_cache) *
4382 (FD_ALLOC_INCR + cache_size));
4383 if (cache == NULL) {
4384 doio_fprintf(stderr,
4385 "Could not malloc() space for fd chace");
4386 alloc_mem(-1);
4387 exit(E_SETUP);
4388 }
4389
4390 cache_size += FD_ALLOC_INCR;
4391
4392 for (cp = &cache[cache_size - FD_ALLOC_INCR];
4393 cp < &cache[cache_size]; cp++) {
4394 cp->c_fd = -1;
4395 }
4396
4397 free_slot = &cache[cache_size - FD_ALLOC_INCR];
4398 }
4399
4400 /*
4401 * finally, fill in the cache slot info
4402 */
4403
4404 free_slot->c_fd = fd;
4405 free_slot->c_oflags = oflags;
4406 strcpy(free_slot->c_file, file);
4407 #ifdef CRAY
4408 free_slot->c_rtc = _rtc();
4409 #else
4410 free_slot->c_rtc = Reqno;
4411 #endif
4412
4413 #ifdef sgi
4414 if (oflags & O_DIRECT) {
4415 if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
4416 finfo.d_mem = 1;
4417 finfo.d_miniosz = 1;
4418 finfo.d_maxiosz = 1;
4419 }
4420 } else {
4421 finfo.d_mem = 1;
4422 finfo.d_miniosz = 1;
4423 finfo.d_maxiosz = 1;
4424 }
4425
4426 free_slot->c_memalign = finfo.d_mem;
4427 free_slot->c_miniosz = finfo.d_miniosz;
4428 free_slot->c_maxiosz = finfo.d_maxiosz;
4429 #endif /* sgi */
4430 #ifndef CRAY
4431 free_slot->c_memaddr = NULL;
4432 free_slot->c_memlen = 0;
4433 #endif
4434
4435 return free_slot;
4436 }
4437
4438 /*
4439 *
4440 * Signal Handling Section
4441 *
4442 *
4443 */
4444
4445 #ifdef sgi
4446 /*
4447 * "caller-id" for signals
4448 */
signal_info(int sig,siginfo_t * info,void * v)4449 void signal_info(int sig, siginfo_t * info, void *v)
4450 {
4451 int haveit = 0;
4452
4453 if (info != NULL) {
4454 switch (info->si_code) {
4455 case SI_USER:
4456 doio_fprintf(stderr,
4457 "signal_info: si_signo %d si_errno %d si_code SI_USER pid %d uid %d\n",
4458 info->si_signo, info->si_errno,
4459 info->si_pid, info->si_uid);
4460 haveit = 1;
4461 break;
4462
4463 case SI_QUEUE:
4464 doio_fprintf(stderr,
4465 "signal_info si_signo %d si_code = SI_QUEUE\n",
4466 info->si_signo);
4467 haveit = 1;
4468 break;
4469 }
4470
4471 if (!haveit) {
4472 if ((info->si_signo == SIGSEGV) ||
4473 (info->si_signo == SIGBUS)) {
4474 doio_fprintf(stderr,
4475 "signal_info si_signo %d si_errno %d si_code = %d si_addr=%p active_mmap_rw=%d havesigint=%d\n",
4476 info->si_signo, info->si_errno,
4477 info->si_code, info->si_addr,
4478 active_mmap_rw, havesigint);
4479 haveit = 1;
4480 }
4481 }
4482
4483 if (!haveit) {
4484 doio_fprintf(stderr,
4485 "signal_info: si_signo %d si_errno %d unknown code %d\n",
4486 info->si_signo, info->si_errno,
4487 info->si_code);
4488 }
4489 } else {
4490 doio_fprintf(stderr, "signal_info: sig %d\n", sig);
4491 }
4492 }
4493
cleanup_handler(int sig,siginfo_t * info,void * v)4494 void cleanup_handler(int sig, siginfo_t * info, void *v)
4495 {
4496 havesigint = 1; /* in case there's a followup signal */
4497 /*signal_info(sig, info, v); *//* be quiet on "normal" kill */
4498 alloc_mem(-1);
4499 exit(0);
4500 }
4501
die_handler(int sig,siginfo_t * info,void * v)4502 void die_handler(int sig, siginfo_t * info, void *v)
4503 {
4504 doio_fprintf(stderr, "terminating on signal %d\n", sig);
4505 signal_info(sig, info, v);
4506 alloc_mem(-1);
4507 exit(1);
4508 }
4509
sigbus_handler(int sig,siginfo_t * info,void * v)4510 void sigbus_handler(int sig, siginfo_t * info, void *v)
4511 {
4512 /* While we are doing a memcpy to/from an mmapped region we can
4513 get a SIGBUS for a variety of reasons--and not all of them
4514 should be considered failures.
4515
4516 Under normal conditions if we get a SIGINT it means we've been
4517 told to shutdown. However, if we're currently doing the above-
4518 mentioned memcopy then the kernel will follow that SIGINT with
4519 a SIGBUS. We can guess that we're in this situation by seeing
4520 that the si_errno field in the siginfo structure has EINTR as
4521 an errno. (We might make the guess stronger by looking at the
4522 si_addr field to see that it's not faulting off the end of the
4523 mmapped region, but it seems that in such a case havesigint
4524 would not have been set so maybe that doesn't make the guess
4525 stronger.)
4526 */
4527
4528 if (active_mmap_rw && havesigint && (info->si_errno == EINTR)) {
4529 cleanup_handler(sig, info, v);
4530 } else {
4531 die_handler(sig, info, v);
4532 }
4533 }
4534 #else
4535
cleanup_handler(int sig)4536 void cleanup_handler(int sig)
4537 {
4538 havesigint = 1; /* in case there's a followup signal */
4539 alloc_mem(-1);
4540 exit(0);
4541 }
4542
die_handler(int sig)4543 void die_handler(int sig)
4544 {
4545 doio_fprintf(stderr, "terminating on signal %d\n", sig);
4546 alloc_mem(-1);
4547 exit(1);
4548 }
4549
4550 #ifndef CRAY
sigbus_handler(int sig)4551 void sigbus_handler(int sig)
4552 {
4553 /* See sigbus_handler() in the 'ifdef sgi' case for details. Here,
4554 we don't have the siginfo stuff so the guess is weaker but we'll
4555 do it anyway.
4556 */
4557
4558 if (active_mmap_rw && havesigint)
4559 cleanup_handler(sig);
4560 else
4561 die_handler(sig);
4562 }
4563 #endif /* !CRAY */
4564 #endif /* sgi */
4565
noop_handler(int sig)4566 void noop_handler(int sig)
4567 {
4568 return;
4569 }
4570
4571 /*
4572 * SIGINT handler for the parent (original doio) process. It simply sends
4573 * a SIGINT to all of the doio children. Since they're all in the same
4574 * pgrp, this can be done with a single kill().
4575 */
4576
sigint_handler(int sig)4577 void sigint_handler(int sig)
4578 {
4579 int i;
4580
4581 for (i = 0; i < Nchildren; i++) {
4582 if (Children[i] != -1) {
4583 kill(Children[i], SIGINT);
4584 }
4585 }
4586 }
4587
4588 /*
4589 * Signal handler used to inform a process when async io completes. Referenced
4590 * in do_read() and do_write(). Note that the signal handler is not
4591 * re-registered.
4592 */
4593
aio_handler(int sig)4594 void aio_handler(int sig)
4595 {
4596 unsigned int i;
4597 struct aio_info *aiop;
4598
4599 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4600 aiop = &Aio_Info[i];
4601
4602 if (aiop->strategy == A_SIGNAL && aiop->sig == sig) {
4603 aiop->signalled++;
4604
4605 if (aio_done(aiop)) {
4606 aiop->done++;
4607 }
4608 }
4609 }
4610 }
4611
4612 /*
4613 * dump info on all open aio slots
4614 */
dump_aio(void)4615 void dump_aio(void)
4616 {
4617 unsigned int i, count;
4618
4619 count = 0;
4620 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4621 if (Aio_Info[i].busy) {
4622 count++;
4623 fprintf(stderr,
4624 "Aio_Info[%03d] id=%d fd=%d signal=%d signaled=%d\n",
4625 i, Aio_Info[i].id,
4626 Aio_Info[i].fd,
4627 Aio_Info[i].sig, Aio_Info[i].signalled);
4628 fprintf(stderr, "\tstrategy=%s\n",
4629 format_strat(Aio_Info[i].strategy));
4630 }
4631 }
4632 fprintf(stderr, "%d active async i/os\n", count);
4633 }
4634
4635 #ifdef sgi
4636 /*
4637 * Signal handler called as a callback, not as a signal.
4638 * 'val' is the value from sigev_value and is assumed to be the
4639 * Aio_Info[] index.
4640 */
cb_handler(sigval_t val)4641 void cb_handler(sigval_t val)
4642 {
4643 struct aio_info *aiop;
4644
4645 /*printf("cb_handler requesting slot %d\n", val.sival_int);*/
4646 aiop = aio_slot(val.sival_int);
4647 /*printf("cb_handler, aiop=%p\n", aiop);*/
4648
4649 /*printf("%d in cb_handler\n", getpid() );*/
4650 if (aiop->strategy == A_CALLBACK) {
4651 aiop->signalled++;
4652
4653 if (aio_done(aiop)) {
4654 aiop->done++;
4655 }
4656 }
4657 }
4658 #endif
4659
aio_slot(int aio_id)4660 struct aio_info *aio_slot(int aio_id)
4661 {
4662 unsigned int i;
4663 static int id = 1;
4664 struct aio_info *aiop;
4665
4666 aiop = NULL;
4667
4668 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
4669 if (aio_id == -1) {
4670 if (!Aio_Info[i].busy) {
4671 aiop = &Aio_Info[i];
4672 aiop->busy = 1;
4673 aiop->id = id++;
4674 break;
4675 }
4676 } else {
4677 if (Aio_Info[i].busy && Aio_Info[i].id == aio_id) {
4678 aiop = &Aio_Info[i];
4679 break;
4680 }
4681 }
4682 }
4683
4684 if (aiop == NULL) {
4685 doio_fprintf(stderr, "aio_slot(%d) not found. Request %d\n",
4686 aio_id, Reqno);
4687 dump_aio();
4688 alloc_mem(-1);
4689 exit(E_INTERNAL);
4690 }
4691
4692 return aiop;
4693 }
4694
aio_register(int fd,int strategy,int sig)4695 int aio_register(int fd, int strategy, int sig)
4696 {
4697 struct aio_info *aiop;
4698 struct sigaction sa;
4699
4700 aiop = aio_slot(-1);
4701
4702 aiop->fd = fd;
4703 aiop->strategy = strategy;
4704 aiop->done = 0;
4705 #ifdef CRAY
4706 memset((char *)&aiop->iosw, 0x00, sizeof(aiop->iosw));
4707 #endif
4708
4709 if (strategy == A_SIGNAL) {
4710 aiop->sig = sig;
4711 aiop->signalled = 0;
4712
4713 sa.sa_handler = aio_handler;
4714 sa.sa_flags = 0;
4715 sigemptyset(&sa.sa_mask);
4716
4717 sigaction(sig, &sa, &aiop->osa);
4718 } else {
4719 aiop->sig = -1;
4720 aiop->signalled = 0;
4721 }
4722
4723 return aiop->id;
4724 }
4725
aio_unregister(int aio_id)4726 int aio_unregister(int aio_id)
4727 {
4728 struct aio_info *aiop;
4729
4730 aiop = aio_slot(aio_id);
4731
4732 if (aiop->strategy == A_SIGNAL) {
4733 sigaction(aiop->sig, &aiop->osa, NULL);
4734 }
4735
4736 aiop->busy = 0;
4737 return 0;
4738 }
4739
4740 #ifndef __linux__
aio_wait(int aio_id)4741 int aio_wait(int aio_id)
4742 {
4743 #ifdef RECALL_SIZEOF
4744 long mask[RECALL_SIZEOF];
4745 #endif
4746 sigset_t sigset;
4747 struct aio_info *aiop;
4748 #ifdef CRAY
4749 struct iosw *ioswlist[1];
4750 #endif
4751 #ifdef sgi
4752 const aiocb_t *aioary[1];
4753 #endif
4754 int r, cnt;
4755
4756 aiop = aio_slot(aio_id);
4757 /*printf("%d aiop B =%p\n", getpid(), aiop);*/
4758
4759 switch (aiop->strategy) {
4760 case A_POLL:
4761 while (!aio_done(aiop)) ;
4762 break;
4763
4764 case A_SIGNAL:
4765 sigemptyset(&sigset);
4766 sighold(aiop->sig);
4767
4768 while (!aiop->signalled || !aiop->done) {
4769 sigsuspend(&sigset);
4770 sighold(aiop->sig);
4771 }
4772 break;
4773
4774 #ifdef CRAY
4775 case A_RECALL:
4776 ioswlist[0] = &aiop->iosw;
4777 if (recall(aiop->fd, 1, ioswlist) < 0) {
4778 doio_fprintf(stderr, "recall() failed: %s (%d)\n",
4779 SYSERR, errno);
4780 exit(E_SETUP);
4781 }
4782 break;
4783
4784 #ifdef RECALL_SIZEOF
4785
4786 case A_RECALLA:
4787 RECALL_INIT(mask);
4788 RECALL_SET(mask, aiop->fd);
4789 if (recalla(mask) < 0) {
4790 doio_fprintf(stderr, "recalla() failed: %s (%d)\n",
4791 SYSERR, errno);
4792 exit(E_SETUP);
4793 }
4794
4795 RECALL_CLR(mask, aiop->fd);
4796 break;
4797 #endif
4798
4799 case A_RECALLS:
4800 ioswlist[0] = &aiop->iosw;
4801 if (recalls(1, ioswlist) < 0) {
4802 doio_fprintf(stderr, "recalls failed: %s (%d)\n",
4803 SYSERR, errno);
4804 exit(E_SETUP);
4805 }
4806 break;
4807 #endif /* CRAY */
4808
4809 #ifdef sgi
4810 case A_CALLBACK:
4811 aioary[0] = &aiop->aiocb;
4812 cnt = 0;
4813 do {
4814 r = aio_suspend(aioary, 1, NULL);
4815 if (r == -1) {
4816 doio_fprintf(stderr,
4817 "aio_suspend failed: %s (%d)\n",
4818 SYSERR, errno);
4819 exit(E_SETUP);
4820 }
4821 cnt++;
4822 } while (aiop->done == 0);
4823
4824 #if 0
4825 /*
4826 * after having this set for a while, I've decided that
4827 * it's too noisy
4828 */
4829 if (cnt > 1)
4830 doio_fprintf(stderr,
4831 "aio_wait: callback wait took %d tries\n",
4832 cnt);
4833 #endif
4834
4835 /*
4836 * Note: cb_handler already calls aio_done
4837 */
4838 break;
4839
4840 case A_SUSPEND:
4841 aioary[0] = &aiop->aiocb;
4842 r = aio_suspend(aioary, 1, NULL);
4843 if (r == -1) {
4844 doio_fprintf(stderr, "aio_suspend failed: %s (%d)\n",
4845 SYSERR, errno);
4846 exit(E_SETUP);
4847 }
4848
4849 aio_done(aiop);
4850 break;
4851 #endif
4852 }
4853
4854 /*printf("aio_wait: errno %d return %d\n", aiop->aio_errno, aiop->aio_ret);*/
4855
4856 return 0;
4857 }
4858 #endif /* !linux */
4859
4860 /*
4861 * Format specified time into HH:MM:SS format. t is the time to format
4862 * in seconds (as returned from time(2)).
4863 */
4864
hms(time_t t)4865 char *hms(time_t t)
4866 {
4867 static char ascii_time[9];
4868 struct tm *ltime;
4869
4870 ltime = localtime(&t);
4871 strftime(ascii_time, sizeof(ascii_time), "%H:%M:%S", ltime);
4872
4873 return ascii_time;
4874 }
4875
4876 /*
4877 * Simple routine to check if an async io request has completed.
4878 */
4879
aio_done(struct aio_info * ainfo)4880 int aio_done(struct aio_info *ainfo)
4881 {
4882 #ifdef CRAY
4883 return ainfo->iosw.sw_flag;
4884 #endif
4885
4886 #ifdef sgi
4887 if ((ainfo->aio_errno = aio_error(&ainfo->aiocb)) == -1) {
4888 doio_fprintf(stderr, "aio_done: aio_error failed: %s (%d)\n",
4889 SYSERR, errno);
4890 exit(E_SETUP);
4891 }
4892 /*printf("%d aio_done aio_errno=%d\n", getpid(), ainfo->aio_errno); */
4893 if (ainfo->aio_errno != EINPROGRESS) {
4894 if ((ainfo->aio_ret = aio_return(&ainfo->aiocb)) == -1) {
4895 doio_fprintf(stderr,
4896 "aio_done: aio_return failed: %s (%d)\n",
4897 SYSERR, errno);
4898 exit(E_SETUP);
4899 }
4900 }
4901
4902 return (ainfo->aio_errno != EINPROGRESS);
4903 #else
4904 return -1; /* invalid */
4905 #endif
4906 }
4907
4908 /*
4909 * Routine to handle upanic() - it first attempts to set the panic flag. If
4910 * the flag cannot be set, an error message is issued. A call to upanic
4911 * with PA_PANIC is then done unconditionally, in case the panic flag was set
4912 * from outside the program (as with the panic(8) program).
4913 *
4914 * Note - we only execute the upanic code if -U was used, and the passed in
4915 * mask is set in the Upanic_Conditions bitmask.
4916 */
4917
doio_upanic(int mask)4918 void doio_upanic(int mask)
4919 {
4920 if (U_opt == 0 || (mask & Upanic_Conditions) == 0) {
4921 return;
4922 }
4923 #ifdef CRAY
4924 if (upanic(PA_SET) < 0) {
4925 doio_fprintf(stderr,
4926 "WARNING - Could not set the panic flag - upanic(PA_SET) failed: %s (%d)\n",
4927 SYSERR, errno);
4928 }
4929
4930 upanic(PA_PANIC);
4931 #endif
4932 #ifdef sgi
4933 syssgi(1005); /* syssgi test panic - DEBUG kernels only */
4934 #endif
4935 doio_fprintf(stderr, "WARNING - upanic() failed\n");
4936 }
4937
4938 /*
4939 * Parse cmdline options/arguments and set appropriate global variables.
4940 * If the cmdline is valid, return 0 to caller. Otherwise exit with a status
4941 * of 1.
4942 */
4943
parse_cmdline(int argc,char ** argv,char * opts)4944 int parse_cmdline(int argc, char **argv, char *opts)
4945 {
4946 int c;
4947 char cc, *cp = NULL, *tok = NULL;
4948 extern int opterr;
4949 extern int optind;
4950 extern char *optarg;
4951 struct smap *s;
4952 char *memargs[NMEMALLOC];
4953 int nmemargs, ma;
4954
4955 if (*argv[0] == '-') {
4956 argv[0]++;
4957 Execd = 1;
4958 }
4959
4960 if ((Prog = strrchr(argv[0], '/')) == NULL) {
4961 Prog = argv[0];
4962 } else {
4963 Prog++;
4964 }
4965
4966 opterr = 0;
4967 while ((c = getopt(argc, argv, opts)) != EOF) {
4968 switch ((char)c) {
4969 case 'a':
4970 a_opt++;
4971 break;
4972
4973 case 'C':
4974 C_opt++;
4975 for (s = checkmap; s->string != NULL; s++)
4976 if (!strcmp(s->string, optarg))
4977 break;
4978 if (s->string == NULL && tok != NULL) {
4979 fprintf(stderr,
4980 "%s%s: Illegal -C arg (%s). Must be one of: ",
4981 Prog, TagName, tok);
4982
4983 for (s = checkmap; s->string != NULL; s++)
4984 fprintf(stderr, "%s ", s->string);
4985 fprintf(stderr, "\n");
4986 exit(1);
4987 }
4988
4989 switch (s->value) {
4990 case C_DEFAULT:
4991 Data_Fill = doio_pat_fill;
4992 Data_Check = doio_pat_check;
4993 break;
4994 default:
4995 fprintf(stderr,
4996 "%s%s: Unrecognised -C arg '%s' %d",
4997 Prog, TagName, s->string, s->value);
4998 exit(1);
4999 }
5000 break;
5001
5002 case 'd': /* delay between i/o ops */
5003 parse_delay(optarg);
5004 break;
5005
5006 case 'e':
5007 if (Npes > 1 && Nprocs > 1) {
5008 fprintf(stderr,
5009 "%s%s: Warning - Program is a multi-pe application - exec option is ignored.\n",
5010 Prog, TagName);
5011 }
5012 e_opt++;
5013 break;
5014
5015 case 'h':
5016 help(stdout);
5017 exit(0);
5018 break;
5019
5020 case 'k':
5021 k_opt++;
5022 break;
5023
5024 case 'm':
5025 Message_Interval = strtol(optarg, &cp, 10);
5026 if (*cp != '\0' || Message_Interval < 0) {
5027 fprintf(stderr,
5028 "%s%s: Illegal -m arg (%s): Must be an integer >= 0\n",
5029 Prog, TagName, optarg);
5030 exit(1);
5031 }
5032 m_opt++;
5033 break;
5034
5035 case 'M': /* memory allocation types */
5036 #ifndef CRAY
5037 nmemargs = string_to_tokens(optarg, memargs, 32, ",");
5038 for (ma = 0; ma < nmemargs; ma++) {
5039 parse_memalloc(memargs[ma]);
5040 }
5041 /*dump_memalloc(); */
5042 #else
5043 fprintf(stderr,
5044 "%s%s: Error: -M isn't supported on this platform\n",
5045 Prog, TagName);
5046 exit(1);
5047 #endif
5048 M_opt++;
5049 break;
5050
5051 case 'N':
5052 sprintf(TagName, "(%.39s)", optarg);
5053 break;
5054
5055 case 'n':
5056 Nprocs = strtol(optarg, &cp, 10);
5057 if (*cp != '\0' || Nprocs < 1) {
5058 fprintf(stderr,
5059 "%s%s: Illegal -n arg (%s): Must be integer > 0\n",
5060 Prog, TagName, optarg);
5061 exit(E_USAGE);
5062 }
5063
5064 if (Npes > 1 && Nprocs > 1) {
5065 fprintf(stderr,
5066 "%s%s: Program has been built as a multi-pe app. -n1 is the only nprocs value allowed\n",
5067 Prog, TagName);
5068 exit(E_SETUP);
5069 }
5070 n_opt++;
5071 break;
5072
5073 case 'r':
5074 Release_Interval = strtol(optarg, &cp, 10);
5075 if (*cp != '\0' || Release_Interval < 0) {
5076 fprintf(stderr,
5077 "%s%s: Illegal -r arg (%s): Must be integer >= 0\n",
5078 Prog, TagName, optarg);
5079 exit(E_USAGE);
5080 }
5081
5082 r_opt++;
5083 break;
5084
5085 case 'w':
5086 Write_Log = optarg;
5087 w_opt++;
5088 break;
5089
5090 case 'v':
5091 v_opt++;
5092 break;
5093
5094 case 'V':
5095 if (strcasecmp(optarg, "sync") == 0) {
5096 Validation_Flags = O_SYNC;
5097 } else if (strcasecmp(optarg, "buffered") == 0) {
5098 Validation_Flags = 0;
5099 #ifdef CRAY
5100 } else if (strcasecmp(optarg, "parallel") == 0) {
5101 Validation_Flags = O_PARALLEL;
5102 } else if (strcasecmp(optarg, "ldraw") == 0) {
5103 Validation_Flags = O_LDRAW;
5104 } else if (strcasecmp(optarg, "raw") == 0) {
5105 Validation_Flags = O_RAW;
5106 #endif
5107 #ifdef sgi
5108 } else if (strcasecmp(optarg, "direct") == 0) {
5109 Validation_Flags = O_DIRECT;
5110 #endif
5111 } else {
5112 if (sscanf
5113 (optarg, "%i%c", &Validation_Flags,
5114 &cc) != 1) {
5115 fprintf(stderr,
5116 "%s: Invalid -V argument (%s) - must be a decimal, hex, or octal\n",
5117 Prog, optarg);
5118 fprintf(stderr,
5119 " number, or one of the following strings: 'sync',\n");
5120 fprintf(stderr,
5121 " 'buffered', 'parallel', 'ldraw', or 'raw'\n");
5122 exit(E_USAGE);
5123 }
5124 }
5125 V_opt++;
5126 break;
5127 case 'U':
5128 tok = strtok(optarg, ",");
5129 while (tok != NULL) {
5130 for (s = Upanic_Args; s->string != NULL; s++)
5131 if (strcmp(s->string, tok) == 0)
5132 break;
5133
5134 if (s->string == NULL) {
5135 fprintf(stderr,
5136 "%s%s: Illegal -U arg (%s). Must be one of: ",
5137 Prog, TagName, tok);
5138
5139 for (s = Upanic_Args; s->string != NULL;
5140 s++)
5141 fprintf(stderr, "%s ",
5142 s->string);
5143
5144 fprintf(stderr, "\n");
5145
5146 exit(1);
5147 }
5148
5149 Upanic_Conditions |= s->value;
5150 tok = strtok(NULL, ",");
5151 }
5152
5153 U_opt++;
5154 break;
5155
5156 case '?':
5157 usage(stderr);
5158 exit(E_USAGE);
5159 break;
5160 }
5161 }
5162
5163 /*
5164 * Supply defaults
5165 */
5166
5167 if (!C_opt) {
5168 Data_Fill = doio_pat_fill;
5169 Data_Check = doio_pat_check;
5170 }
5171
5172 if (!U_opt)
5173 Upanic_Conditions = 0;
5174
5175 if (!n_opt)
5176 Nprocs = 1;
5177
5178 if (!r_opt)
5179 Release_Interval = DEF_RELEASE_INTERVAL;
5180
5181 if (!M_opt) {
5182 Memalloc[Nmemalloc].memtype = MEM_DATA;
5183 Memalloc[Nmemalloc].flags = 0;
5184 Memalloc[Nmemalloc].name = NULL;
5185 Memalloc[Nmemalloc].space = NULL;
5186 Nmemalloc++;
5187 }
5188
5189 /*
5190 * Initialize input stream
5191 */
5192
5193 if (argc == optind) {
5194 Infile = NULL;
5195 } else {
5196 Infile = argv[optind++];
5197 }
5198
5199 if (argc != optind) {
5200 usage(stderr);
5201 exit(E_USAGE);
5202 }
5203
5204 return 0;
5205 }
5206
5207 /*
5208 * Parse memory allocation types
5209 *
5210 * Types are:
5211 * Data
5212 * T3E-shmem:blksize[:nblks]
5213 * SysV-shmem:shmid:blksize:nblks
5214 * if shmid is "private", use IPC_PRIVATE
5215 * and nblks is not required
5216 *
5217 * mmap:flags:filename:blksize[:nblks]
5218 * flags are one of:
5219 * p - private (MAP_PRIVATE)
5220 * a - private, MAP_AUTORESRV
5221 * l - local (MAP_LOCAL)
5222 * s - shared (nblks required)
5223 *
5224 * plus any of:
5225 * f - fixed address (MAP_FIXED)
5226 * A - use an address without MAP_FIXED
5227 * a - autogrow (map once at startup)
5228 *
5229 * mmap:flags:devzero
5230 * mmap /dev/zero (shared not allowd)
5231 * maps the first 4096 bytes of /dev/zero
5232 *
5233 * - put a directory at the beginning of the shared
5234 * regions saying what pid has what region.
5235 * DIRMAGIC
5236 * BLKSIZE
5237 * NBLKS
5238 * nblks worth of directories - 1 int pids
5239 */
5240 #ifndef CRAY
parse_memalloc(char * arg)5241 void parse_memalloc(char *arg)
5242 {
5243 char *allocargs[NMEMALLOC];
5244 int nalloc;
5245 struct memalloc *M;
5246
5247 if (Nmemalloc >= NMEMALLOC) {
5248 doio_fprintf(stderr, "Error - too many memory types (%d).\n",
5249 Nmemalloc);
5250 return;
5251 }
5252
5253 M = &Memalloc[Nmemalloc];
5254
5255 nalloc = string_to_tokens(arg, allocargs, 32, ":");
5256 if (!strcmp(allocargs[0], "data")) {
5257 M->memtype = MEM_DATA;
5258 M->flags = 0;
5259 M->name = NULL;
5260 M->space = NULL;
5261 Nmemalloc++;
5262 if (nalloc >= 2) {
5263 if (strchr(allocargs[1], 'p'))
5264 M->flags |= MEMF_MPIN;
5265 }
5266 } else if (!strcmp(allocargs[0], "mmap")) {
5267 /* mmap:flags:filename[:size] */
5268 M->memtype = MEM_MMAP;
5269 M->flags = 0;
5270 M->space = NULL;
5271 if (nalloc >= 1) {
5272 if (strchr(allocargs[1], 'p'))
5273 M->flags |= MEMF_PRIVATE;
5274 if (strchr(allocargs[1], 'a'))
5275 M->flags |= MEMF_AUTORESRV;
5276 if (strchr(allocargs[1], 'l'))
5277 M->flags |= MEMF_LOCAL;
5278 if (strchr(allocargs[1], 's'))
5279 M->flags |= MEMF_SHARED;
5280
5281 if (strchr(allocargs[1], 'f'))
5282 M->flags |= MEMF_FIXADDR;
5283 if (strchr(allocargs[1], 'A'))
5284 M->flags |= MEMF_ADDR;
5285 if (strchr(allocargs[1], 'G'))
5286 M->flags |= MEMF_AUTOGROW;
5287
5288 if (strchr(allocargs[1], 'U'))
5289 M->flags |= MEMF_FILE;
5290 } else {
5291 M->flags |= MEMF_PRIVATE;
5292 }
5293
5294 if (nalloc > 2) {
5295 if (!strcmp(allocargs[2], "devzero")) {
5296 M->name = "/dev/zero";
5297 if (M->flags &
5298 ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
5299 M->flags |= MEMF_PRIVATE;
5300 } else {
5301 M->name = allocargs[2];
5302 }
5303 } else {
5304 M->name = "/dev/zero";
5305 if (M->flags & ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
5306 M->flags |= MEMF_PRIVATE;
5307 }
5308 Nmemalloc++;
5309
5310 } else if (!strcmp(allocargs[0], "shmem")) {
5311 /* shmem:shmid:size */
5312 M->memtype = MEM_SHMEM;
5313 M->flags = 0;
5314 M->space = NULL;
5315 if (nalloc >= 2) {
5316 M->name = allocargs[1];
5317 } else {
5318 M->name = NULL;
5319 }
5320 if (nalloc >= 3) {
5321 sscanf(allocargs[2], "%i", &M->nblks);
5322 } else {
5323 M->nblks = 0;
5324 }
5325 if (nalloc >= 4) {
5326 if (strchr(allocargs[3], 'p'))
5327 M->flags |= MEMF_MPIN;
5328 }
5329
5330 Nmemalloc++;
5331 } else {
5332 doio_fprintf(stderr, "Error - unknown memory type '%s'.\n",
5333 allocargs[0]);
5334 exit(1);
5335 }
5336 }
5337
dump_memalloc(void)5338 void dump_memalloc(void)
5339 {
5340 int ma;
5341 char *mt;
5342
5343 if (Nmemalloc == 0) {
5344 printf("No memory allocation strategies devined\n");
5345 return;
5346 }
5347
5348 for (ma = 0; ma < Nmemalloc; ma++) {
5349 switch (Memalloc[ma].memtype) {
5350 case MEM_DATA:
5351 mt = "data";
5352 break;
5353 case MEM_SHMEM:
5354 mt = "shmem";
5355 break;
5356 case MEM_MMAP:
5357 mt = "mmap";
5358 break;
5359 default:
5360 mt = "unknown";
5361 break;
5362 }
5363 printf("mstrat[%d] = %d %s\n", ma, Memalloc[ma].memtype, mt);
5364 printf("\tflags=%#o name='%s' nblks=%d\n",
5365 Memalloc[ma].flags,
5366 Memalloc[ma].name, Memalloc[ma].nblks);
5367 }
5368 }
5369
5370 #endif /* !CRAY */
5371
5372 /*
5373 * -d <op>:<time> - doio inter-operation delay
5374 * currently this permits ONE type of delay between operations.
5375 */
5376
parse_delay(char * arg)5377 void parse_delay(char *arg)
5378 {
5379 char *delayargs[NMEMALLOC];
5380 int ndelay;
5381 struct smap *s;
5382
5383 ndelay = string_to_tokens(arg, delayargs, 32, ":");
5384 if (ndelay < 2) {
5385 doio_fprintf(stderr,
5386 "Illegal delay arg (%s). Must be operation:time\n",
5387 arg);
5388 exit(1);
5389 }
5390 for (s = delaymap; s->string != NULL; s++)
5391 if (!strcmp(s->string, delayargs[0]))
5392 break;
5393 if (s->string == NULL) {
5394 fprintf(stderr,
5395 "Illegal Delay arg (%s). Must be one of: ", arg);
5396
5397 for (s = delaymap; s->string != NULL; s++)
5398 fprintf(stderr, "%s ", s->string);
5399 fprintf(stderr, "\n");
5400 exit(1);
5401 }
5402
5403 delayop = s->value;
5404
5405 sscanf(delayargs[1], "%i", &delaytime);
5406
5407 if (ndelay > 2) {
5408 fprintf(stderr, "Warning: extra delay arguments ignored.\n");
5409 }
5410 }
5411
5412 /*
5413 * Usage clause - obvious
5414 */
5415
usage(FILE * stream)5416 int usage(FILE * stream)
5417 {
5418 /*
5419 * Only do this if we are on vpe 0, to avoid seeing it from every
5420 * process in the application.
5421 */
5422
5423 if (Npes > 1 && Vpe != 0) {
5424 return 0;
5425 }
5426
5427 fprintf(stream,
5428 "usage%s: %s [-aekv] [-m message_interval] [-n nprocs] [-r release_interval] [-w write_log] [-V validation_ftype] [-U upanic_cond] [infile]\n",
5429 TagName, Prog);
5430 return 0;
5431 }
5432
help(FILE * stream)5433 void help(FILE * stream)
5434 {
5435 /*
5436 * Only the app running on vpe 0 gets to issue help - this prevents
5437 * everybody in the application from doing this.
5438 */
5439
5440 if (Npes > 1 && Vpe != 0) {
5441 return;
5442 }
5443
5444 usage(stream);
5445 fprintf(stream, "\n");
5446 fprintf(stream,
5447 "\t-a abort - kill all doio processes on data compare\n");
5448 fprintf(stream,
5449 "\t errors. Normally only the erroring process exits\n");
5450 fprintf(stream, "\t-C data-pattern-type \n");
5451 fprintf(stream,
5452 "\t Available data patterns are:\n");
5453 fprintf(stream, "\t default - repeating pattern\n");
5454 fprintf(stream, "\t-d Operation:Time Inter-operation delay.\n");
5455 fprintf(stream, "\t Operations are:\n");
5456 fprintf(stream,
5457 "\t select:time (1 second=1000000)\n");
5458 fprintf(stream, "\t sleep:time (1 second=1)\n");
5459 #ifdef sgi
5460 fprintf(stream,
5461 "\t sginap:time (1 second=CLK_TCK=100)\n");
5462 #endif
5463 fprintf(stream, "\t alarm:time (1 second=1)\n");
5464 fprintf(stream,
5465 "\t-e Re-exec children before entering the main\n");
5466 fprintf(stream,
5467 "\t loop. This is useful for spreading\n");
5468 fprintf(stream,
5469 "\t procs around on multi-pe systems.\n");
5470 fprintf(stream,
5471 "\t-k Lock file regions during writes using fcntl()\n");
5472 fprintf(stream,
5473 "\t-v Verify writes - this is done by doing a buffered\n");
5474 fprintf(stream,
5475 "\t read() of the data if file io was done, or\n");
5476 fprintf(stream,
5477 "\t an ssread()of the data if sds io was done\n");
5478 #ifndef CRAY
5479 fprintf(stream,
5480 "\t-M Data buffer allocation method\n");
5481 fprintf(stream, "\t alloc-type[,type]\n");
5482 #ifdef sgi
5483 fprintf(stream, "\t data:flags\n");
5484 fprintf(stream, "\t p - mpin buffer\n");
5485 fprintf(stream, "\t shmem:shmid:size:flags\n");
5486 fprintf(stream, "\t p - mpin buffer\n");
5487 #else
5488 fprintf(stream, "\t data\n");
5489 fprintf(stream, "\t shmem:shmid:size\n");
5490 #endif /* sgi */
5491 fprintf(stream, "\t mmap:flags:filename\n");
5492 fprintf(stream, "\t p - private\n");
5493 #ifdef sgi
5494 fprintf(stream, "\t s - shared\n");
5495 fprintf(stream, "\t l - local\n");
5496 fprintf(stream, "\t a - autoresrv\n");
5497 fprintf(stream, "\t G - autogrow\n");
5498 #else
5499 fprintf(stream,
5500 "\t s - shared (shared file must exist\n"),
5501 fprintf(stream,
5502 "\t and have needed length)\n");
5503 #endif
5504 fprintf(stream,
5505 "\t f - fixed address (not used)\n");
5506 fprintf(stream,
5507 "\t a - specify address (not used)\n");
5508 fprintf(stream,
5509 "\t U - Unlink file when done\n");
5510 fprintf(stream,
5511 "\t The default flag is private\n");
5512 fprintf(stream, "\n");
5513 #endif /* !CRAY */
5514 fprintf(stream,
5515 "\t-m message_interval Generate a message every 'message_interval'\n");
5516 fprintf(stream,
5517 "\t requests. An interval of 0 suppresses\n");
5518 fprintf(stream,
5519 "\t messages. The default is 0.\n");
5520 fprintf(stream, "\t-N tagname Tag name, for Monster.\n");
5521 fprintf(stream, "\t-n nprocs # of processes to start up\n");
5522 fprintf(stream,
5523 "\t-r release_interval Release all memory and close\n");
5524 fprintf(stream,
5525 "\t files every release_interval operations.\n");
5526 fprintf(stream,
5527 "\t By default procs never release memory\n");
5528 fprintf(stream,
5529 "\t or close fds unless they have to.\n");
5530 fprintf(stream,
5531 "\t-V validation_ftype The type of file descriptor to use for doing data\n");
5532 fprintf(stream,
5533 "\t validation. validation_ftype may be an octal,\n");
5534 fprintf(stream,
5535 "\t hex, or decimal number representing the open()\n");
5536 fprintf(stream,
5537 "\t flags, or may be one of the following strings:\n");
5538 fprintf(stream,
5539 "\t 'buffered' - validate using bufferd read\n");
5540 fprintf(stream,
5541 "\t 'sync' - validate using O_SYNC read\n");
5542 #ifdef sgi
5543 fprintf(stream,
5544 "\t 'direct - validate using O_DIRECT read'\n");
5545 #endif
5546 #ifdef CRAY
5547 fprintf(stream,
5548 "\t 'ldraw' - validate using O_LDRAW read\n");
5549 fprintf(stream,
5550 "\t 'parallel' - validate using O_PARALLEL read\n");
5551 fprintf(stream,
5552 "\t 'raw' - validate using O_RAW read\n");
5553 #endif
5554 fprintf(stream, "\t By default, 'parallel'\n");
5555 fprintf(stream,
5556 "\t is used if the write was done with O_PARALLEL\n");
5557 fprintf(stream,
5558 "\t or 'buffered' for all other writes.\n");
5559 fprintf(stream,
5560 "\t-w write_log File to log file writes to. The doio_check\n");
5561 fprintf(stream,
5562 "\t program can reconstruct datafiles using the\n");
5563 fprintf(stream,
5564 "\t write_log, and detect if a file is corrupt\n");
5565 fprintf(stream,
5566 "\t after all procs have exited.\n");
5567 fprintf(stream,
5568 "\t-U upanic_cond Comma separated list of conditions that will\n");
5569 fprintf(stream,
5570 "\t cause a call to upanic(PA_PANIC).\n");
5571 fprintf(stream,
5572 "\t 'corruption' -> upanic on bad data comparisons\n");
5573 fprintf(stream,
5574 "\t 'iosw' ---> upanic on unexpected async iosw\n");
5575 fprintf(stream,
5576 "\t 'rval' ---> upanic on unexpected syscall rvals\n");
5577 fprintf(stream,
5578 "\t 'all' ---> all of the above\n");
5579 fprintf(stream, "\n");
5580 fprintf(stream,
5581 "\tinfile Input stream - default is stdin - must be a list\n");
5582 fprintf(stream,
5583 "\t of io_req structures (see doio.h). Currently\n");
5584 fprintf(stream,
5585 "\t only the iogen program generates the proper\n");
5586 fprintf(stream, "\t format\n");
5587 }
5588