1 /*
2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
31 *
32 */
33 /* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
34 /**************************************************************
35 *
36 * OS Testing - Silicon Graphics, Inc.
37 *
38 * TOOL IDENTIFIER : rand_lines
39 *
40 * DESCRIPTION : prints lines from a file in random order
41 *
42 * SYNOPSIS:
43 * rand_line [-hg][-S seed][-l numlines] [files...]
44 *
45 * AUTHOR : Richard Logan
46 *
47 * CO-PILOT(s) :
48 *
49 * DATE STARTED : 05/94
50 *
51 * INPUT SPECIFICATIONS
52 * This tool will print lines of a file in random order.
53 * The max line length is 4096.
54 * The options supported are:
55 * -h This option prints an help message then exits.
56 *
57 * -g This option specifies to count the number of lines
58 * in the file before randomizing. This option overrides
59 * -l option. Using this option, will give you the best
60 * randomization, but it requires processing
61 * the file an additional time.
62 *
63 * -l numlines : This option specifies to randomize file in
64 * numlines chucks. The default size is 4096.
65 *
66 * -S seed : sets randomization seed to seed.
67 * The default is time(0). If seed is zero, time(0) is used.
68 *
69 * file A readable, seekable filename. The cmd allows the user
70 * to specify multiple files, but each file is dealt with
71 * separately.
72 *
73 * DESIGN DESCRIPTION
74 * This tool uses a simple algorithm where the file is read.
75 * The offset to the each line is randomly placed into an
76 * array. The array is then processed sequentially. The infile's
77 * line who's offset in the array element is thus reread then printed.
78 * This output will thus be infile's lines in random order.
79 *
80 * SPECIAL REQUIREMENTS
81 * None.
82 *
83 * UPDATE HISTORY
84 * This should contain the description, author, and date of any
85 * "interesting" modifications (i.e. info should helpful in
86 * maintaining/enhancing this tool).
87 * username description
88 * ----------------------------------------------------------------
89 * rrl Creatation of program
90 * rrl 06/02 Fixed bug and some cleanup. Changed default chunk
91 * and line size to 4096 characters.
92 *
93 * BUGS/LIMITATIONS
94 * This program can not deal with non-seekable file like
95 * stdin or a pipe. If more than one file is specified,
96 * each file is randomized one at a time. The max line
97 * length is 4096 characters.
98 *
99 **************************************************************/
100
101 #include <err.h>
102 #include <errno.h>
103 #include <stdio.h>
104 #include <stdlib.h>
105 #include <string.h>
106 #include <time.h>
107 #include <unistd.h>
108
109 #include "random_range.h"
110
111 /*
112 * Structure used to hold file line offset.
113 */
114 struct offset_t {
115 long used;
116 long offset;
117 };
118
119 void usage(FILE * stream);
120 void help(void);
121 int rnd_file(FILE * infile, int numlines, long seed);
122 int get_numlines(FILE * infile);
123 int rnd_insert(struct offset_t offsets[], long offset, int size);
124
125 #define DEF_SIZE 4096 /* default chunk size */
126 #define MAX_LN_SZ 4096 /* max line size */
127
128 #ifndef SEEK_SET
129 #define SEEK_SET 0
130 #endif
131
132 char *Progname = NULL;
133
134 /***********************************************************************
135 * MAIN
136 ***********************************************************************/
main(int argc,char * argv[])137 int main(int argc, char *argv[])
138 {
139 FILE *infile;
140 int c;
141 long seed = -1; /* use time as seed */
142 int lsize = DEF_SIZE; /* num lines to randomize */
143 int getfilelines = 0; /* if set, count lines first */
144
145 if ((Progname = strrchr(argv[0], '/')) == NULL)
146 Progname = argv[0];
147 else
148 Progname++;
149
150 while ((c = getopt(argc, argv, "hgS:l:")) != EOF) {
151 switch (c) {
152 case 'h':
153 help();
154 exit(0);
155 break;
156 case 'S': /* seed */
157 if (sscanf(optarg, "%li", &seed) != 1) {
158 fprintf(stderr,
159 "%s: --S option argument is invalid\n",
160 Progname);
161 exit(1);
162 }
163 break;
164
165 case 'l': /* number of lines */
166 if (sscanf(optarg, "%i", &lsize) != 1) {
167 fprintf(stderr,
168 "%s: --s option argument is invalid\n",
169 Progname);
170 exit(1);
171 }
172 break;
173
174 case 'g':
175 getfilelines++;
176 break;
177
178 case '?':
179 usage(stderr);
180 exit(1);
181 break;
182 }
183 }
184
185 if (optind + 1 != argc) {
186 fprintf(stderr, "%s: Missing argument.\n", Progname);
187 usage(stderr);
188 exit(1);
189 }
190
191 if (seed == -1) {
192 seed = time(0);
193 }
194
195 if (strcmp(argv[argc - 1], "-") == 0) {
196 infile = stdin;
197 fprintf(stderr, "%s: Can not support stdin processing.\n",
198 Progname);
199 exit(2);
200 } else {
201
202 if ((infile = fopen(argv[argc - 1], "r")) == NULL) {
203 fprintf(stderr, "%s: Unable to open file %s: %s\n",
204 Progname, argv[argc - 1], strerror(errno));
205 exit(1);
206 }
207
208 if (getfilelines) {
209 lsize = get_numlines(infile);
210 }
211
212 rnd_file(infile, lsize, seed);
213 }
214
215 exit(0);
216 }
217
218 /***********************************************************************
219 * Print usage message to stream.
220 ***********************************************************************/
usage(FILE * stream)221 void usage(FILE * stream)
222 {
223 fprintf(stream,
224 "Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);
225
226 }
227
228 /***********************************************************************
229 * Print help message to stdout.
230 ***********************************************************************/
help(void)231 void help(void)
232 {
233 usage(stdout);
234 printf("This tool will print lines in random order (max line len %d).\n\
235 -h : print this help and exit\n\
236 -g : count the number of lines in the file before randomizing\n\
237 This option overrides -l option.\n\
238 -l numlines : randoms lines in numlines chuncks (def %d)\n\
239 -S seed : sets seed to seed (def time(0))\n", MAX_LN_SZ, DEF_SIZE);
240
241 }
242
243 /***********************************************************************
244 * counts the number of lines in already open file.
245 * Note: File must be seekable (not stdin or a pipe).
246 ***********************************************************************/
get_numlines(FILE * infile)247 int get_numlines(FILE *infile)
248 {
249 char line[MAX_LN_SZ]; /* max size of a line */
250 int cnt = 0;
251
252 while (fgets(line, MAX_LN_SZ, infile) != NULL) {
253 cnt++;
254 }
255
256 /* rewind the file */
257 fseek(infile, 0, SEEK_SET);
258
259 return cnt;
260 }
261
262 /***********************************************************************
263 *
264 * infile must be a fseekable file. Thus, it can not be stdin.
265 * It will read each line in the file, randomly saving the offset
266 * of each line in a array of struct offset_t.
267 * It will then print each line in the array stored order.
268 *
269 ***********************************************************************/
rnd_file(FILE * infile,int numlines,long seed)270 int rnd_file(FILE *infile,
271 int numlines, /* can be more or less than num lines in file */
272 /* most opt randomized when num lines in files */
273 /* or just a bit bigger */
274 long seed)
275 {
276
277 char line[MAX_LN_SZ]; /* max size of a line */
278 int cnt;
279 long coffset; /* current line offset */
280
281 struct offset_t *offsets;
282 int memsize;
283
284 if (numlines <= 0) { /*use default */
285 numlines = DEF_SIZE;
286 }
287
288 /*
289 * Malloc space for numlines copies the offset_t structure.
290 * This is where the randomization takes place.
291 */
292 memsize = sizeof(struct offset_t) * numlines;
293
294 if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
295 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
296 errno);
297 return -1;
298 }
299
300 random_range_seed(seed);
301
302 coffset = 0;
303
304 while (!feof(infile)) {
305
306 fseek(infile, coffset, SEEK_SET);
307 coffset = ftell(infile);
308 memset(offsets, 0, memsize);
309 cnt = 0;
310
311 /*
312 * read the file in and place offset of each line randomly
313 * into offsets array. Only numlines line can be randomized
314 * at a time.
315 */
316 while (cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL) {
317
318 if (rnd_insert(offsets, coffset, numlines) < 0) {
319 fprintf(stderr,
320 "%s:%d rnd_insert() returned -1 (fatal error)!\n",
321 __FILE__, __LINE__);
322 abort();
323 }
324 cnt++;
325
326 coffset = ftell(infile);
327 }
328
329 if (cnt == 0) {
330 continue;
331 }
332
333 /*
334 * print out lines based on offset.
335 */
336 for (cnt = 0; cnt < numlines; cnt++) {
337
338 if (offsets[cnt].used) {
339 fseek(infile, offsets[cnt].offset, SEEK_SET);
340 if (fgets(line, MAX_LN_SZ, infile) == NULL)
341 err(1, "fgets");
342 fputs(line, stdout);
343 }
344 }
345
346 } /* end of file */
347
348 return 0;
349 }
350
351 /***********************************************************************
352 * This function randomly inserts offset information into
353 * the offsets array. The array has a size of size.
354 * It will attempt 75 random array indexes before finding the first
355 * open array element.
356 *
357 ***********************************************************************/
rnd_insert(struct offset_t offsets[],long offset,int size)358 int rnd_insert(struct offset_t offsets[], long offset, int size)
359 {
360 int rand_num;
361 int quick = 0;
362 int ind;
363
364 /*
365 * Loop looking for random unused index.
366 * It will only be attempted 75 times.
367 */
368 while (quick < 75) {
369
370 rand_num = random_range(0, size - 1, 1, NULL);
371
372 if (!offsets[rand_num].used) {
373 offsets[rand_num].offset = offset;
374 offsets[rand_num].used++;
375 return rand_num;
376 }
377 quick++;
378 }
379
380 /*
381 * an randomly choosen index was not found, find
382 * first open index and use it.
383 */
384 for (ind = 0; ind < size && offsets[ind].used != 0; ind++) ; /* do nothing */
385
386 if (ind >= size) {
387 /*
388 * If called with an array where all offsets are used,
389 * we won't be able to find an open array location.
390 * Thus, return -1 indicating the error.
391 * This should never happen if called correctly.
392 */
393 return -1;
394 }
395
396 offsets[ind].offset = offset;
397 offsets[ind].used++;
398 return ind;
399
400 }
401
402 /***********************************************************************
403 *
404 * CODE NOT TESTED AT ALL - it must be tested before it is used.
405 *
406 * This function was written to allow rand_lines to work on non-seekable
407 * file (i.e stdin).
408 *
409 ***********************************************************************/
rnd_stdin(FILE * infile,int space,int numlines,long seed)410 int rnd_stdin(FILE *infile,
411 int space, /* amount of space to use to read file into memory, */
412 /* randomized and print. randomize in chunks */
413 int numlines, /* can be more or less than num lines in file */
414 /* most opt randomized when num lines in files */
415 /* or just a bit bigger */
416 long seed)
417 {
418
419 char line[MAX_LN_SZ]; /* max size of a line */
420 int cnt; /* offset printer counter */
421 long loffset; /* last line address */
422 char *buffer; /* malloc space for file reads */
423 char *rdbuff; /* where to start read */
424 long stopaddr; /* end of read space (address) */
425 int rdsz; /* amount read */
426 int sztord;
427 char *chr; /* buffer processing pointer */
428 char *ptr; /* printing processing pointer */
429 char *lptr; /* printing processing pointer */
430 int loopcntl = 1; /* main loop control flag */
431 struct offset_t *offsets; /* pointer to offset space */
432 int memsize; /* amount of offset space to malloc */
433 int newbuffer = 1; /* need new buffer */
434
435 if (numlines <= 0) { /*use default */
436 numlines = DEF_SIZE;
437 }
438
439 /*
440 * Malloc space for file contents
441 */
442 if ((buffer = (char *)malloc(space)) == NULL) {
443 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space,
444 errno);
445 return -1;
446 }
447
448 /*
449 * Malloc space for numlines copies the offset_t structure.
450 * This is where the randomization takes place.
451 */
452 memsize = sizeof(struct offset_t) * numlines;
453
454 if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
455 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
456 errno);
457 return -1;
458 }
459
460 random_range_seed(seed);
461 rdbuff = buffer; /* read into start of buffer */
462 sztord = space; /* amount of space left in buffer */
463
464 /*
465 * Loop until read doesn't read anything
466 * If last line does not end in newline, it is not printed
467 */
468 while (loopcntl) {
469 /*
470 * read in file up to space size
471 * only works if used as filter.
472 * The code will randomize one reads worth at a time.
473 * If typing in lines, read will read only one line - no randomizing.
474 */
475
476 chr = buffer;
477 if ((rdsz = fread((void *)rdbuff, sztord, 1, infile)) == 0) {
478 fprintf(stderr,
479 "input file is empty, done randomizing\n");
480 loopcntl = 0;
481 return 0;
482 }
483
484 stopaddr = ((long)buffer + rdsz);
485
486 loffset = (long)buffer;
487
488 while (!newbuffer) {
489
490 while ((long)chr < stopaddr && *chr != '\n')
491 chr++;
492
493 chr++;
494
495 if ((long)chr >= stopaddr) {
496
497 fprintf(stderr, "end of read in buffer\n");
498
499 /*
500 * print out lines based on offset.
501 */
502 for (cnt = 0; cnt < numlines; cnt++) {
503
504 if (offsets[cnt].used) {
505 ptr =
506 (char *)offsets[cnt].offset;
507 /*
508 * copy buffer characters into line for printing
509 */
510 lptr = line;
511 while (*ptr != '\n')
512 *lptr++ = *ptr++;
513
514 printf("%s\n", line);
515 }
516 }
517
518 /*
519 * move start of partically read line to beginning of buffer
520 * and adjust rdbuff to end of partically read line
521 */
522 memcpy((void *)loffset, buffer,
523 (stopaddr - loffset));
524 rdbuff = buffer + (stopaddr - loffset);
525 sztord = space - (stopaddr - loffset);
526
527 newbuffer++;
528 }
529
530 if (rnd_insert(offsets, loffset, numlines) < 0) {
531 fprintf(stderr,
532 "%s:%d rnd_insert() returned -1 (fatal error)!\n",
533 __FILE__, __LINE__);
534 abort();
535 }
536
537 loffset = (long)chr;
538 }
539 }
540
541 return 0;
542
543 }
544