1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <math.h>
9 #include <sys/types.h>
10 #include <unistd.h>
11 #include <sys/stat.h>
12 #include <fcntl.h>
13 #include <errno.h>
14 char tdir[256];
15 char path[256];
16 long stats = 0;
17 
print_usage()18 void print_usage()
19 {
20 	printf("
21 This program creates files in a tree of random depth and branching. Files vary
22 in size randomly according to a distribution function which seems to model real
23 file systems.  This distribution function has a median size of median_file_size
24 (Median file size is hypothesized to be proportional to the average per file
25 space wastage. Notice how that implies that with a more efficient file system
26 file size usage patterns will in the long term move to a lower median file
27 size), and a maximum size of max_file_size.  Directories vary in size according
28 to the same distribution function but with separate parameters to control median
29 and maximum size for the number of files within them, and the number of
30 subdirectories within them.  This program prunes some empty subdirectories in a
31 way that causes parents of leaf directories to branch less than
32 median_dir_branching.
33 
34  To avoid having one large file distort the results such that you have
35 to benchmark many times set max_file_size to not more than
36 bytes_to_consume/10.  If maximum/median is a small integer, then
37 randomness is very poor.  This is a bug, Nikita, please find some
38 clever way to fix it.  If it is 0, then the program crashes....
39 
40 For isolating performance consequences of design variations on
41 particular file or directory size ranges, try setting their median size and
42 max_size to both equal the max size of the file size range you want
43 to test.
44 
45 To avoid having one large file distort the results set max_file_size to
46 not more than bytes_to_consume/10.  Using a distribution function for
47 the sizes of writes would be a natural next step in developing this program.\n\n");
48 
49 	printf
50 	    ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n");
51 }
52 
53 /* #define DEBUG */
54 
55 char *write_buffer;		/* buffer from which we write */
56 int write_buffer_size = 0;	/* gets reset to an argv  */
57 static int already_whined = 0;	/* keep out of disk space errors from being
58 				   endless by tracking whether we already
59 				   printed the message */
60 long bytes_to_consume = 0;	/* create files until their total number of
61 				   bytes exceeds this number, but not by more
62 				   than 1/10th */
63 long byte_total = 0;		/* bytes created so far */
64 
65 /* statistics on sizes of files we attempted to create */
66 int fsz_0_100 = 0;
67 int fsz_100_1k = 0;
68 int fsz_1k_10k = 0;
69 int fsz_10k_100k = 0;
70 int fsz_100k_1m = 0;
71 int fsz_1m_10m = 0;
72 int fsz_10m_larger = 0;
73 
chngdir(char * name)74 void chngdir(char *name)
75 {
76 	int i;
77 
78 	if (name[0] == '.' && name[1] == '.') {
79 		for (i = strlen(path); i > 0; i--) {
80 			if (path[i] == '/') {
81 				path[i] = 0;
82 				break;
83 			}
84 		}
85 	} else {
86 		strcat(path, "/");
87 		strcat(path, name);
88 	}
89 }
90 
91 /* this is the core statistical distribution function, and it is used for file
92    sizes, directory sizes, etc. */
determine_size(double median_size,double max_size)93 int determine_size(double median_size,
94 		   double max_size /* The maximal value of size */ )
95 {
96 	/* when x is half of its random range (max_size/median_size), result is
97 	   median_size */
98 	int nr_random, granularity_reducer;
99 	double size, double_nr_random;
100 
101 	/* it is a feature for us that this repeats identically every time it is run,
102 	   as otherwise meaningless variances would affect our results and require us
103 	   to use a higher number of benchmarks to achieve low noise results.  */
104 	nr_random = rand();
105 	median_size++;		/* avoids divide by zero errors */
106 
107 	/* this code does poorly when max_size is not a lot more than median size,
108 	   and that needs fixing */
109 
110 	/* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */
111 
112 	/* keep x below the value that when multiplied by median size on the next
113 	   line will equal max_size */
114 	/* the granularity_reducer is to handle the case where max_size is near
115 	   median_size, since '%' can only take ints, we need this complicated what
116 	   of handling that for small values of max_size/median_size by making
117 	   large ints out of small ints temporarily.  */
118 	if (max_size / median_size < 1024)
119 		granularity_reducer = 1024 * 1024;
120 	else
121 		granularity_reducer = 1;
122 	nr_random =
123 	    nr_random %
124 	    ((int)
125 	     (granularity_reducer *
126 	      (((double)max_size) / ((double)median_size))));
127 	double_nr_random = ((double)nr_random) / (granularity_reducer);
128 	size =
129 	    median_size * (1 /
130 			   (1 -
131 			    (double_nr_random) / (((double)max_size) /
132 						  ((double)median_size))) - 1);
133 	return ((int)size);
134 }
135 
136 /* generate a unique filename */
get_name_by_number(long this_files_number,char * str)137 void get_name_by_number(long this_files_number, char *str)
138 {
139 	sprintf(str, "%lu", this_files_number);
140 }
141 
142 /* make a file of a specified size */
make_file(int size)143 void make_file(int size)
144 {
145 	char string[128] = { 0 };
146 	char *str = string;
147 	char fname[256];
148 	int fd = 0;
149 	int error;
150 	static long this_files_number = 1;
151 
152 	/* collect statistics about the size of files created, or more precisely, the
153 	   size of files that we will attempt to create. */
154 	if (size <= 100)
155 		fsz_0_100++;
156 	else if (size <= 1000)
157 		fsz_100_1k++;
158 	else if (size <= 10 * 1000)
159 		fsz_1k_10k++;
160 	else if (size <= 100 * 1000)
161 		fsz_10k_100k++;
162 	else if (size <= 1000 * 1000)
163 		fsz_100k_1m++;
164 	else if (size <= 10 * 1000 * 1000)
165 		fsz_1m_10m++;
166 	else
167 		fsz_10m_larger++;
168 
169 	/* construct a name for the file */
170 	get_name_by_number(this_files_number++, str);
171 	strcpy(fname, path);
172 	strcat(fname, "/");
173 	strcat(fname, str);
174 
175 	/* open the file, and deal with the various errors that can occur */
176 
177 	if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) {
178 		if (errno == ENOSPC) {
179 			if (!already_whined) {
180 				printf
181 				    ("reiser-2021A: out of disk (or inodes) space, will keep trying\n");
182 				already_whined = 1;	/* we continue other file creation in out of
183 							   space conditions */
184 			}
185 			return;
186 		}
187 		/*  it is sometimes useful to be able to run this program more than once
188 		   inside the same directory, and that means skipping over filenames that
189 		   already exist.  Thus we ignore EEXIST, and pay attention to all
190 		   else. */
191 		if (errno == EEXIST) {	/* just skip existing file */
192 			return;
193 		}
194 		perror("open");
195 		exit(errno);
196 	}
197 	/* write to the file until it is the right size, handling the various error
198 	   conditions appropriately */
199 
200 	while (size > 0) {
201 		size -= (error =
202 			 write(fd, write_buffer,
203 			       (size <
204 				write_buffer_size -
205 				1) ? size : (write_buffer_size - 1)));
206 		if (error == -1) {
207 			if (errno == ENOSPC) {
208 				if (!already_whined) {
209 					printf
210 					    ("reiser-2022: out of disk space, will keep trying\n");
211 					already_whined = 1;
212 				}
213 				close(fd);
214 				return;
215 			}
216 			perror("write() failed");
217 			exit(errno);
218 		}
219 	}
220 
221 	/* close the file */
222 	if (close(fd)) {
223 		perror("close() failed");
224 		exit(errno);
225 	}
226 }
227 
228 /* print the statistics on how many files were created of what size */
229 
print_stats()230 void print_stats()
231 {
232 	if (!stats)
233 		return;
234 
235 	printf("\n");
236 	printf("File stats: Units are decimal (1k = 1000)\n");
237 	printf("files 0-100    : %i\n", fsz_0_100);
238 	printf("files 100-1K   : %i\n", fsz_100_1k);
239 	printf("files 1K-10K   : %i\n", fsz_1k_10k);
240 	printf("files 10K-100K : %i\n", fsz_10k_100k);
241 	printf("files 100K-1M  : %i\n", fsz_100k_1m);
242 	printf("files 1M-10M    : %i\n", fsz_1m_10m);
243 	printf("files 10M-larger    : %i\n", fsz_10m_larger);
244 	printf("total bytes written    : %lu\n", byte_total);
245 
246 }
247 
248 /* predict the number of files that will be created before max_bytes total
249    length of files is reached */
determine_nr_of_files(int median_file_size,double max_file_size,long bytes_to_consume)250 long determine_nr_of_files(int median_file_size, double max_file_size,
251 			   long bytes_to_consume)
252 {
253 	long nr_of_files = 0, byte_total = 0;
254 
255 	/* the next line is not necessary as 1 is the default, it is just cautious
256 	   coding */
257 	srand(1);
258 	while (byte_total < bytes_to_consume) {
259 		byte_total += determine_size(median_file_size, max_file_size);
260 		nr_of_files++;
261 	}
262 	/* reset the random number generator so that when we determine_size() of the
263 	   files later they will be created with the same "random" sequence used in
264 	   this calculation */
265 	srand(1);
266 #ifdef DEBUG
267 	printf("number of files is %d\n", (int)nr_of_files);
268 #endif /* DEBUG */
269 	fflush(NULL);
270 	return nr_of_files;
271 }
272 
273 /* fill the current working directory with nr_files_this_directory number of
274    files*/
275 
fill_this_directory(long nr_files_this_directory,long median_file_size,long maximum_size)276 void fill_this_directory(long nr_files_this_directory, long median_file_size,
277 			 long maximum_size)
278 {
279 	long size;
280 
281 #ifdef DEBUG
282 	printf("filling with %lu files, ", nr_files_this_directory);
283 #endif
284 	while (nr_files_this_directory--) {
285 		size = determine_size(median_file_size, maximum_size);
286 		byte_total += size;
287 		make_file(size);
288 	}
289 }
290 
291 /* this will unfortunately handle out of disk space by forever trying */
292 /* What we should do in out of space situaltion ? I think we must skip this
293    directory and continue files/dirs creation process. Error value (!= 0)
294    indicates that we can't go to this directory. -zam */
make_directory(char * dirname)295 int make_directory(char *dirname)
296 {
297 	static long this_directory_number = 0;
298 
299 	strcpy(tdir, path);
300 	strcat(tdir, "/");
301 	strcat(tdir, dirname);
302 
303 	if (mkdir(tdir, 0755) == -1) {
304 		if (errno == ENOSPC) {
305 			if (!already_whined) {
306 				printf("reiser-2021: out of disk space, ");
307 				already_whined = 1;
308 			}
309 			return errno;
310 		}
311 		/*  it is sometimes useful to be able to run this program more than once
312 		   inside the same directory, and that means skipping over filenames that
313 		   already exist.  Thus we ignore EEXIST, and pay attention to all else. */
314 		if (errno != EEXIST) {
315 			perror("mkdir");
316 			exit(errno);
317 		}
318 	}
319 	sprintf(dirname, "d%lu", this_directory_number++);
320 	strcpy(tdir, path);
321 	strcat(tdir, "/");
322 	strcat(tdir, dirname);
323 
324 	return 0;
325 }
326 
327 /* assumes we are already chdir'd into a directory that the subtree is rooted
328    at.  Fills the directory with files and subdirectories, cd's into those
329    subdirectories, and recurses upon itself */
330 
do_subtree(long * sizes_start,long * sizes_end,long median_file_size,long maximum_file_size,long median_dir_branching,long max_dir_branching)331 void do_subtree(
332 		       /* the start and end of the portion of the directory sizes
333 		          array which corresponds to the sizes of the directories
334 		          composing this subtree */
335 		       /* sizes_end minus sizes_start is equal to the number of
336 		          directories in this subtree */
337 		       long *sizes_start, long *sizes_end,
338 		       long median_file_size, long maximum_file_size,
339 		       long median_dir_branching, long max_dir_branching)
340 {
341 	long *p;
342 	long *sub_start;
343 	long *sub_end;
344 	int index_subdirectory_to_add_directory_to;
345 	long *dirs_in_subtrees;
346 	char *subtree_name;
347 	long *sizes_index = sizes_start;
348 	char subtree_name_array[128];
349 	long this_directory_branching;
350 	static long this_directorys_number;
351 
352 	subtree_name = subtree_name_array;
353 	/* fill this directory with its number of files */
354 	fill_this_directory(*sizes_index, median_file_size, maximum_file_size);
355 	sizes_index++;
356 	/* ok, now randomly assign directories (and their number of files) among the
357 	   subdirectories that will be created if at least one directory is assigned
358 	   to it */
359 
360 	/* this will cause the random number sequence to not match the one used in
361 	   determine_nr_files() I need to accumulate my values in an array
362 	   beforehand. I'll code that later.  */
363 	/* worry about whether 0 or 1 is a problem value */
364 	this_directory_branching =
365 	    determine_size(median_dir_branching, max_dir_branching) + 1;
366 
367 	/* create an array holding the number of directories assigned to each
368 	   potential subdirectory */
369 	dirs_in_subtrees = calloc(this_directory_branching, sizeof(long));
370 	while (sizes_index <= sizes_end) {
371 		index_subdirectory_to_add_directory_to =
372 		    (rand() % this_directory_branching);
373 		(*
374 		 (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++;
375 		sizes_index++;
376 	}
377 	/* the +1 is for the fill_directory() we did above */
378 	sizes_index = sizes_start + 1;
379 
380 	/* go through each potential subdirectory, and if at least one directory has
381 	   been assigned to it, create it and recurse */
382 	for (p = dirs_in_subtrees;
383 	     p < (dirs_in_subtrees + this_directory_branching); p++) {
384 		if (*p) {
385 			int nocd;
386 			sprintf(subtree_name, "d%lu", this_directorys_number++);
387 			nocd = make_directory(subtree_name);
388 			/* if make_dir.. may fails (in out of space situation), we continue
389 			   creation process in same dir */
390 			if (!nocd)
391 				chngdir(subtree_name);
392 			sub_start = sizes_index;
393 			/* the minus one is because *p is the number of elements and arrays start at 0 */
394 			sub_end = (sizes_index + (*p - 1));
395 
396 #ifdef DEBUG
397 			/* comment this back in if the array logic has you going cross-eyed */
398 			/*      printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */
399 #endif
400 			do_subtree(sub_start, sub_end, median_file_size,
401 				   maximum_file_size, median_dir_branching,
402 				   max_dir_branching);
403 			if (!nocd)
404 				chngdir("..");
405 		}
406 		sizes_index += *p;
407 	}
408 }
409 
410 /* We have already determined that nr_files can fit in bytes_to_consume space.
411    Fill the sizes array with the number of files to be in each directory, and
412    then call do_subtree to fill the tree with files and directories.  */
413 
make_fractal_tree(long median_file_size,long maximum_file_size,long median_dir_nr_files,long max_dir_nr_files,long median_dir_branching,long max_dir_branching,long nr_files)414 void make_fractal_tree(long median_file_size, long maximum_file_size,
415 		       long median_dir_nr_files, long max_dir_nr_files,
416 		       long median_dir_branching, long max_dir_branching,
417 		       long nr_files)
418 {
419 	long *sizes_start;
420 	long *sizes_end;
421 	long *sizes_index;
422 	long remaining_files = nr_files;
423 
424 	/* collect together array of directory sizes for whole filesystem.  This
425 	   cannot easily be done recursively without distorting the directory sizes
426 	   and making deeper directories smaller.  Send me the code if you
427 	   disagree.:-) */
428 	/* we almost certainly don't need this much space, but so what.... */
429 	sizes_index = sizes_start = malloc(nr_files * sizeof(long));
430 	for (; remaining_files > 0;) {
431 		*sizes_index =
432 		    determine_size(median_dir_nr_files, max_dir_nr_files);
433 		// we alloc space for nr_files, so we should avoid
434 		// number of files in directory = 0 -grev.
435 		if (*sizes_index == 0)
436 			*sizes_index = 1;
437 		*sizes_index =
438 		    (*sizes_index <
439 		     remaining_files) ? *sizes_index : remaining_files;
440 
441 #ifdef DEBUG
442 		printf("*sizes_index == %lu, ", *sizes_index);
443 #endif
444 		remaining_files -= *sizes_index;
445 		sizes_index++;
446 	}
447 	/* don't decrement below sizes_start if nr_files is 0 */
448 	sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start;
449 
450 	sizes_index = sizes_start;
451 	srand(1);
452 	do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size,
453 		   median_dir_branching, max_dir_branching);
454 
455 }
456 
main(int argc,char * argv[])457 int main(int argc, char *argv[])
458 {
459 	/* initialized from argv[] */
460 	long median_file_size,
461 	    median_dir_branching,
462 	    median_dir_nr_files,
463 	    max_dir_nr_files, max_dir_branching, max_file_size;
464 	long nr_of_files = 0;	/* files to be created */
465 
466 	if (argc != 11) {
467 		print_usage();
468 		exit(1);
469 	}
470 
471 	write_buffer_size = atoi(argv[8]);
472 	write_buffer = malloc(write_buffer_size);
473 	memset(write_buffer, 'a', write_buffer_size);
474 
475 	/* the number of bytes that we desire this tree to consume.  It will actually
476 	   consume more, because the last file will overshoot by a random amount, and
477 	   because the directories and metadata will consume space.  */
478 	bytes_to_consume = atol(argv[1]);
479 	max_file_size = atol(argv[3]);
480 	median_file_size = atol(argv[2]);
481 	/* Figure out how many random files will fit into bytes_to_consume bytes. We
482 	   depend on resetting rand() to get the same result later. */
483 	nr_of_files =
484 	    determine_nr_of_files(median_file_size, max_file_size,
485 				  bytes_to_consume);
486 
487 	strcpy(path, argv[9]);
488 	mkdir(path, 0755);
489 	stats = atol(argv[10]);
490 	median_dir_branching = atol(argv[6]);
491 	max_dir_branching = atol(argv[7]);
492 	median_dir_nr_files = atol(argv[4]);
493 	max_dir_nr_files = atol(argv[5]);
494 	make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files,
495 			  max_dir_nr_files, median_dir_branching,
496 			  max_dir_branching, nr_of_files);
497 	print_stats();
498 	if (stats)
499 		printf("\nreiser_fract_tree finished\n");
500 
501 	return 0;
502 }
503