• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
31  *
32  */
33 /* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
34 /**************************************************************
35  *
36  *    OS Testing - Silicon Graphics, Inc.
37  *
38  *    TOOL IDENTIFIER   : rand_lines
39  *
40  *    DESCRIPTION       : prints lines from a file in random order
41  *
42  *    SYNOPSIS:
43  *      rand_line [-hg][-S seed][-l numlines] [files...]
44  *
45  *    AUTHOR            : Richard Logan
46  *
47  *    CO-PILOT(s)       :
48  *
49  *    DATE STARTED      : 05/94
50  *
51  *    INPUT SPECIFICATIONS
52  *     This tool will print lines of a file in random order.
53  *     The max line length is 4096.
54  *     The options supported are:
55  *       -h     This option prints an help message then exits.
56  *
57  *       -g     This option specifies to count the number of lines
58  *		in the file before randomizing.  This option overrides
59  *		-l option.  Using this option, will give you the best
60  *		randomization, but it requires processing
61  *		the file an additional time.
62  *
63  *       -l numlines : This option specifies to randomize file in
64  *		numlines chucks.  The default size is 4096.
65  *
66  *       -S seed     : sets randomization seed to seed.
67  *		The default is time(0).  If seed is zero, time(0) is used.
68  *
69  *	 file   A readable, seekable filename.  The cmd allows the user
70  *	 	to specify multiple files, but each file is dealt with
71  *		separately.
72  *
73  *    DESIGN DESCRIPTION
74  *	This tool uses a simple algorithm where the file is read.
75  *	The offset to the each line is randomly placed into an
76  *	array.  The array is then processed sequentially.  The infile's
77  *	line who's offset in the array element is thus reread then printed.
78  *	This output will thus be infile's lines in random order.
79  *
80  *    SPECIAL REQUIREMENTS
81  *	None.
82  *
83  *    UPDATE HISTORY
84  *      This should contain the description, author, and date of any
85  *      "interesting" modifications (i.e. info should helpful in
86  *      maintaining/enhancing this tool).
87  *      username     description
88  *      ----------------------------------------------------------------
89  *	rrl 	    Creatation of program
90  *	rrl  06/02  Fixed bug and some cleanup. Changed default chunk
91  *	            and line size to 4096 characters.
92  *
93  *    BUGS/LIMITATIONS
94  *	This program can not deal with non-seekable file like
95  *	stdin or a pipe.  If more than one file is specified,
96  *	each file is randomized one at a time.  The max line
97  *	length is 4096 characters.
98  *
99  **************************************************************/
100 
101 #include <err.h>
102 #include <errno.h>
103 #include <stdio.h>
104 #include <stdlib.h>
105 #include <string.h>
106 #include <time.h>
107 #include <unistd.h>
108 
109 #include "random_range.h"
110 
111 /*
112  * Structure used to hold file line offset.
113  */
114 struct offset_t {
115 	long used;
116 	long offset;
117 };
118 
119 void usage(FILE * stream);
120 void help(void);
121 int rnd_file(FILE * infile, int numlines, long seed);
122 int get_numlines(FILE * infile);
123 int rnd_insert(struct offset_t offsets[], long offset, int size);
124 
125 #define DEF_SIZE	4096	/* default chunk size */
126 #define MAX_LN_SZ	4096	/* max line size */
127 
128 #ifndef SEEK_SET
129 #define SEEK_SET	0
130 #endif
131 
132 char *Progname = NULL;
133 
134 /***********************************************************************
135  *  MAIN
136  ***********************************************************************/
main(int argc,char * argv[])137 int main(int argc, char *argv[])
138 {
139 	FILE *infile;
140 	int c;
141 	long seed = -1;		/* use time as seed */
142 	int lsize = DEF_SIZE;	/* num lines to randomize */
143 	int getfilelines = 0;	/* if set, count lines first */
144 
145 	if ((Progname = strrchr(argv[0], '/')) == NULL)
146 		Progname = argv[0];
147 	else
148 		Progname++;
149 
150 	while ((c = getopt(argc, argv, "hgS:l:")) != EOF) {
151 		switch (c) {
152 		case 'h':
153 			help();
154 			exit(0);
155 			break;
156 		case 'S':	/* seed */
157 			if (sscanf(optarg, "%li", &seed) != 1) {
158 				fprintf(stderr,
159 					"%s: --S option argument is invalid\n",
160 					Progname);
161 				exit(1);
162 			}
163 			break;
164 
165 		case 'l':	/* number of lines */
166 			if (sscanf(optarg, "%i", &lsize) != 1) {
167 				fprintf(stderr,
168 					"%s: --s option argument is invalid\n",
169 					Progname);
170 				exit(1);
171 			}
172 			break;
173 
174 		case 'g':
175 			getfilelines++;
176 			break;
177 
178 		case '?':
179 			usage(stderr);
180 			exit(1);
181 			break;
182 		}
183 	}
184 
185 	if (optind + 1 != argc) {
186 		fprintf(stderr, "%s: Missing argument.\n", Progname);
187 		usage(stderr);
188 		exit(1);
189 	}
190 
191 	if (seed == -1) {
192 		seed = time(0);
193 	}
194 
195 	if (strcmp(argv[argc - 1], "-") == 0) {
196 		infile = stdin;
197 		fprintf(stderr, "%s: Can not support stdin processing.\n",
198 			Progname);
199 		exit(2);
200 	} else {
201 
202 		if ((infile = fopen(argv[argc - 1], "r")) == NULL) {
203 			fprintf(stderr, "%s: Unable to open file %s: %s\n",
204 				Progname, argv[argc - 1], strerror(errno));
205 			exit(1);
206 		}
207 
208 		if (getfilelines) {
209 			lsize = get_numlines(infile);
210 		}
211 
212 		rnd_file(infile, lsize, seed);
213 	}
214 
215 	exit(0);
216 }
217 
218 /***********************************************************************
219  * Print usage message to stream.
220  ***********************************************************************/
usage(FILE * stream)221 void usage(FILE * stream)
222 {
223 	fprintf(stream,
224 		"Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);
225 
226 }
227 
228 /***********************************************************************
229  * Print help message to stdout.
230  ***********************************************************************/
help(void)231 void help(void)
232 {
233 	usage(stdout);
234 	printf("This tool will print lines in random order (max line len %d).\n\
235   -h          : print this help and exit\n\
236   -g          : count the number of lines in the file before randomizing\n\
237 	        This option overrides -l option.\n\
238   -l numlines : randoms lines in numlines chuncks (def %d)\n\
239   -S seed     : sets seed to seed (def time(0))\n", MAX_LN_SZ, DEF_SIZE);
240 
241 }
242 
243 /***********************************************************************
244  * counts the number of lines in already open file.
245  * Note: File must be seekable (not stdin or a pipe).
246  ***********************************************************************/
get_numlines(FILE * infile)247 int get_numlines(FILE *infile)
248 {
249 	char line[MAX_LN_SZ];	/* max size of a line */
250 	int cnt = 0;
251 
252 	while (fgets(line, MAX_LN_SZ, infile) != NULL) {
253 		cnt++;
254 	}
255 
256 	/* rewind the file */
257 	fseek(infile, 0, SEEK_SET);
258 
259 	return cnt;
260 }
261 
262 /***********************************************************************
263  *
264  *  infile must be a fseekable file.  Thus, it can not be stdin.
265  * It will read each line in the file, randomly saving the offset
266  * of each line in a array of struct offset_t.
267  * It will then print each line in the array stored order.
268  *
269  ***********************************************************************/
rnd_file(FILE * infile,int numlines,long seed)270 int rnd_file(FILE *infile,
271 	int numlines,	/* can be more or less than num lines in file */
272 			/* most opt randomized when num lines in files */
273 			/* or just a bit bigger */
274 	long seed)
275 {
276 
277 	char line[MAX_LN_SZ];	/* max size of a line */
278 	int cnt;
279 	long coffset;		/* current line offset */
280 
281 	struct offset_t *offsets;
282 	int memsize;
283 
284 	if (numlines <= 0) {	/*use default */
285 		numlines = DEF_SIZE;
286 	}
287 
288 	/*
289 	 * Malloc space for numlines copies the offset_t structure.
290 	 * This is where the randomization takes place.
291 	 */
292 	memsize = sizeof(struct offset_t) * numlines;
293 
294 	if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
295 		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
296 			errno);
297 		return -1;
298 	}
299 
300 	random_range_seed(seed);
301 
302 	coffset = 0;
303 
304 	while (!feof(infile)) {
305 
306 		fseek(infile, coffset, SEEK_SET);
307 		coffset = ftell(infile);
308 		memset(offsets, 0, memsize);
309 		cnt = 0;
310 
311 		/*
312 		 * read the file in and place offset of each line randomly
313 		 * into offsets array.  Only numlines line can be randomized
314 		 * at a time.
315 		 */
316 		while (cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL) {
317 
318 			if (rnd_insert(offsets, coffset, numlines) < 0) {
319 				fprintf(stderr,
320 					"%s:%d rnd_insert() returned -1 (fatal error)!\n",
321 					__FILE__, __LINE__);
322 				abort();
323 			}
324 			cnt++;
325 
326 			coffset = ftell(infile);
327 		}
328 
329 		if (cnt == 0) {
330 			continue;
331 		}
332 
333 		/*
334 		 * print out lines based on offset.
335 		 */
336 		for (cnt = 0; cnt < numlines; cnt++) {
337 
338 			if (offsets[cnt].used) {
339 				fseek(infile, offsets[cnt].offset, SEEK_SET);
340 				if (fgets(line, MAX_LN_SZ, infile) == NULL)
341 					err(1, "fgets");
342 				fputs(line, stdout);
343 			}
344 		}
345 
346 	}			/* end of file */
347 
348 	return 0;
349 }
350 
351 /***********************************************************************
352  * This function randomly inserts offset information into
353  * the offsets array.  The array has a size of size.
354  * It will attempt 75 random array indexes before finding the first
355  * open array element.
356  *
357  ***********************************************************************/
rnd_insert(struct offset_t offsets[],long offset,int size)358 int rnd_insert(struct offset_t offsets[], long offset, int size)
359 {
360 	int rand_num;
361 	int quick = 0;
362 	int ind;
363 
364 	/*
365 	 * Loop looking for random unused index.
366 	 * It will only be attempted 75 times.
367 	 */
368 	while (quick < 75) {
369 
370 		rand_num = random_range(0, size - 1, 1, NULL);
371 
372 		if (!offsets[rand_num].used) {
373 			offsets[rand_num].offset = offset;
374 			offsets[rand_num].used++;
375 			return rand_num;
376 		}
377 		quick++;
378 	}
379 
380 	/*
381 	 * an randomly choosen index was not found, find
382 	 * first open index and use it.
383 	 */
384 	for (ind = 0; ind < size && offsets[ind].used != 0; ind++) ;	/* do nothing */
385 
386 	if (ind >= size) {
387 		/*
388 		 * If called with an array where all offsets are used,
389 		 * we won't be able to find an open array location.
390 		 * Thus, return -1 indicating the error.
391 		 * This should never happen if called correctly.
392 		 */
393 		return -1;
394 	}
395 
396 	offsets[ind].offset = offset;
397 	offsets[ind].used++;
398 	return ind;
399 
400 }
401 
402 /***********************************************************************
403  *
404  * CODE NOT TESTED AT ALL - it must be tested before it is used.
405  *
406  * This function was written to allow rand_lines to work on non-seekable
407  * file (i.e stdin).
408  *
409  ***********************************************************************/
rnd_stdin(FILE * infile,int space,int numlines,long seed)410 int rnd_stdin(FILE *infile,
411 	int space,	/* amount of space to use to read file into memory, */
412 			/* randomized and print.  randomize in chunks */
413 	int numlines,	/* can be more or less than num lines in file */
414 			/* most opt randomized when num lines in files */
415 			/* or just a bit bigger */
416 	long seed)
417 {
418 
419 	char line[MAX_LN_SZ];	/* max size of a line */
420 	int cnt;		/* offset printer counter */
421 	long loffset;		/* last line address */
422 	char *buffer;		/* malloc space for file reads */
423 	char *rdbuff;		/* where to start read */
424 	long stopaddr;		/* end of read space (address) */
425 	int rdsz;		/* amount read */
426 	int sztord;
427 	char *chr;		/* buffer processing pointer */
428 	char *ptr;		/* printing processing pointer */
429 	char *lptr;		/* printing processing pointer */
430 	int loopcntl = 1;	/* main loop control flag */
431 	struct offset_t *offsets;	/* pointer to offset space */
432 	int memsize;		/* amount of offset space to malloc */
433 	int newbuffer = 1;	/* need new buffer */
434 
435 	if (numlines <= 0) {	/*use default */
436 		numlines = DEF_SIZE;
437 	}
438 
439 	/*
440 	 * Malloc space for file contents
441 	 */
442 	if ((buffer = (char *)malloc(space)) == NULL) {
443 		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space,
444 			errno);
445 		return -1;
446 	}
447 
448 	/*
449 	 * Malloc space for numlines copies the offset_t structure.
450 	 * This is where the randomization takes place.
451 	 */
452 	memsize = sizeof(struct offset_t) * numlines;
453 
454 	if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
455 		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
456 			errno);
457 		return -1;
458 	}
459 
460 	random_range_seed(seed);
461 	rdbuff = buffer;	/* read into start of buffer */
462 	sztord = space;		/* amount of space left in buffer */
463 
464 	/*
465 	 *  Loop until read doesn't read anything
466 	 *  If last line does not end in newline, it is not printed
467 	 */
468 	while (loopcntl) {
469 		/*
470 		 *  read in file up to space size
471 		 *  only works if used as filter.
472 		 *  The code will randomize one reads worth at a time.
473 		 *  If typing in lines, read will read only one line - no randomizing.
474 		 */
475 
476 		chr = buffer;
477 		if ((rdsz = fread((void *)rdbuff, sztord, 1, infile)) == 0) {
478 			fprintf(stderr,
479 				"input file is empty, done randomizing\n");
480 			loopcntl = 0;
481 			return 0;
482 		}
483 
484 		stopaddr = ((long)buffer + rdsz);
485 
486 		loffset = (long)buffer;
487 
488 		while (!newbuffer) {
489 
490 			while ((long)chr < stopaddr && *chr != '\n')
491 				chr++;
492 
493 			chr++;
494 
495 			if ((long)chr >= stopaddr) {
496 
497 				fprintf(stderr, "end of read in buffer\n");
498 
499 				/*
500 				 * print out lines based on offset.
501 				 */
502 				for (cnt = 0; cnt < numlines; cnt++) {
503 
504 					if (offsets[cnt].used) {
505 						ptr =
506 						    (char *)offsets[cnt].offset;
507 						/*
508 						 * copy buffer characters into line for printing
509 						 */
510 						lptr = line;
511 						while (*ptr != '\n')
512 							*lptr++ = *ptr++;
513 
514 						printf("%s\n", line);
515 					}
516 				}
517 
518 				/*
519 				 * move start of partically read line to beginning of buffer
520 				 * and adjust rdbuff to end of partically read line
521 				 */
522 				memcpy((void *)loffset, buffer,
523 				       (stopaddr - loffset));
524 				rdbuff = buffer + (stopaddr - loffset);
525 				sztord = space - (stopaddr - loffset);
526 
527 				newbuffer++;
528 			}
529 
530 			if (rnd_insert(offsets, loffset, numlines) < 0) {
531 				fprintf(stderr,
532 					"%s:%d rnd_insert() returned -1 (fatal error)!\n",
533 					__FILE__, __LINE__);
534 				abort();
535 			}
536 
537 			loffset = (long)chr;
538 		}
539 	}
540 
541 	return 0;
542 
543 }
544