1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <math.h>
9 #include <sys/types.h>
10 #include <unistd.h>
11 #include <sys/stat.h>
12 #include <fcntl.h>
13 #include <errno.h>
14 char tdir[256];
15 char path[256];
16 long stats = 0;
17
print_usage()18 void print_usage()
19 {
20 printf("
21 This program creates files in a tree of random depth and branching. Files vary
22 in size randomly according to a distribution function which seems to model real
23 file systems. This distribution function has a median size of median_file_size
24 (Median file size is hypothesized to be proportional to the average per file
25 space wastage. Notice how that implies that with a more efficient file system
26 file size usage patterns will in the long term move to a lower median file
27 size), and a maximum size of max_file_size. Directories vary in size according
28 to the same distribution function but with separate parameters to control median
29 and maximum size for the number of files within them, and the number of
30 subdirectories within them. This program prunes some empty subdirectories in a
31 way that causes parents of leaf directories to branch less than
32 median_dir_branching.
33
34 To avoid having one large file distort the results such that you have
35 to benchmark many times set max_file_size to not more than
36 bytes_to_consume/10. If maximum/median is a small integer, then
37 randomness is very poor. This is a bug, Nikita, please find some
38 clever way to fix it. If it is 0, then the program crashes....
39
40 For isolating performance consequences of design variations on
41 particular file or directory size ranges, try setting their median size and
42 max_size to both equal the max size of the file size range you want
43 to test.
44
45 To avoid having one large file distort the results set max_file_size to
46 not more than bytes_to_consume/10. Using a distribution function for
47 the sizes of writes would be a natural next step in developing this program.\n\n");
48
49 printf
50 ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n");
51 }
52
53 /* #define DEBUG */
54
55 char *write_buffer; /* buffer from which we write */
56 int write_buffer_size = 0; /* gets reset to an argv */
57 static int already_whined = 0; /* keep out of disk space errors from being
58 endless by tracking whether we already
59 printed the message */
60 long bytes_to_consume = 0; /* create files until their total number of
61 bytes exceeds this number, but not by more
62 than 1/10th */
63 long byte_total = 0; /* bytes created so far */
64
65 /* statistics on sizes of files we attempted to create */
66 int fsz_0_100 = 0;
67 int fsz_100_1k = 0;
68 int fsz_1k_10k = 0;
69 int fsz_10k_100k = 0;
70 int fsz_100k_1m = 0;
71 int fsz_1m_10m = 0;
72 int fsz_10m_larger = 0;
73
chngdir(char * name)74 void chngdir(char *name)
75 {
76 int i;
77
78 if (name[0] == '.' && name[1] == '.') {
79 for (i = strlen(path); i > 0; i--) {
80 if (path[i] == '/') {
81 path[i] = 0;
82 break;
83 }
84 }
85 } else {
86 strcat(path, "/");
87 strcat(path, name);
88 }
89 }
90
91 /* this is the core statistical distribution function, and it is used for file
92 sizes, directory sizes, etc. */
determine_size(double median_size,double max_size)93 int determine_size(double median_size,
94 double max_size /* The maximal value of size */ )
95 {
96 /* when x is half of its random range (max_size/median_size), result is
97 median_size */
98 int nr_random, granularity_reducer;
99 double size, double_nr_random;
100
101 /* it is a feature for us that this repeats identically every time it is run,
102 as otherwise meaningless variances would affect our results and require us
103 to use a higher number of benchmarks to achieve low noise results. */
104 nr_random = rand();
105 median_size++; /* avoids divide by zero errors */
106
107 /* this code does poorly when max_size is not a lot more than median size,
108 and that needs fixing */
109
110 /* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */
111
112 /* keep x below the value that when multiplied by median size on the next
113 line will equal max_size */
114 /* the granularity_reducer is to handle the case where max_size is near
115 median_size, since '%' can only take ints, we need this complicated what
116 of handling that for small values of max_size/median_size by making
117 large ints out of small ints temporarily. */
118 if (max_size / median_size < 1024)
119 granularity_reducer = 1024 * 1024;
120 else
121 granularity_reducer = 1;
122 nr_random =
123 nr_random %
124 ((int)
125 (granularity_reducer *
126 (((double)max_size) / ((double)median_size))));
127 double_nr_random = ((double)nr_random) / (granularity_reducer);
128 size =
129 median_size * (1 /
130 (1 -
131 (double_nr_random) / (((double)max_size) /
132 ((double)median_size))) - 1);
133 return ((int)size);
134 }
135
136 /* generate a unique filename */
get_name_by_number(long this_files_number,char * str)137 void get_name_by_number(long this_files_number, char *str)
138 {
139 sprintf(str, "%lu", this_files_number);
140 }
141
142 /* make a file of a specified size */
make_file(int size)143 void make_file(int size)
144 {
145 char string[128] = { 0 };
146 char *str = string;
147 char fname[256];
148 int fd = 0;
149 int error;
150 static long this_files_number = 1;
151
152 /* collect statistics about the size of files created, or more precisely, the
153 size of files that we will attempt to create. */
154 if (size <= 100)
155 fsz_0_100++;
156 else if (size <= 1000)
157 fsz_100_1k++;
158 else if (size <= 10 * 1000)
159 fsz_1k_10k++;
160 else if (size <= 100 * 1000)
161 fsz_10k_100k++;
162 else if (size <= 1000 * 1000)
163 fsz_100k_1m++;
164 else if (size <= 10 * 1000 * 1000)
165 fsz_1m_10m++;
166 else
167 fsz_10m_larger++;
168
169 /* construct a name for the file */
170 get_name_by_number(this_files_number++, str);
171 strcpy(fname, path);
172 strcat(fname, "/");
173 strcat(fname, str);
174
175 /* open the file, and deal with the various errors that can occur */
176
177 if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) {
178 if (errno == ENOSPC) {
179 if (!already_whined) {
180 printf
181 ("reiser-2021A: out of disk (or inodes) space, will keep trying\n");
182 already_whined = 1; /* we continue other file creation in out of
183 space conditions */
184 }
185 return;
186 }
187 /* it is sometimes useful to be able to run this program more than once
188 inside the same directory, and that means skipping over filenames that
189 already exist. Thus we ignore EEXIST, and pay attention to all
190 else. */
191 if (errno == EEXIST) { /* just skip existing file */
192 return;
193 }
194 perror("open");
195 exit(errno);
196 }
197 /* write to the file until it is the right size, handling the various error
198 conditions appropriately */
199
200 while (size > 0) {
201 size -= (error =
202 write(fd, write_buffer,
203 (size <
204 write_buffer_size -
205 1) ? size : (write_buffer_size - 1)));
206 if (error == -1) {
207 if (errno == ENOSPC) {
208 if (!already_whined) {
209 printf
210 ("reiser-2022: out of disk space, will keep trying\n");
211 already_whined = 1;
212 }
213 close(fd);
214 return;
215 }
216 perror("write() failed");
217 exit(errno);
218 }
219 }
220
221 /* close the file */
222 if (close(fd)) {
223 perror("close() failed");
224 exit(errno);
225 }
226 }
227
228 /* print the statistics on how many files were created of what size */
229
print_stats()230 void print_stats()
231 {
232 if (!stats)
233 return;
234
235 printf("\n");
236 printf("File stats: Units are decimal (1k = 1000)\n");
237 printf("files 0-100 : %i\n", fsz_0_100);
238 printf("files 100-1K : %i\n", fsz_100_1k);
239 printf("files 1K-10K : %i\n", fsz_1k_10k);
240 printf("files 10K-100K : %i\n", fsz_10k_100k);
241 printf("files 100K-1M : %i\n", fsz_100k_1m);
242 printf("files 1M-10M : %i\n", fsz_1m_10m);
243 printf("files 10M-larger : %i\n", fsz_10m_larger);
244 printf("total bytes written : %lu\n", byte_total);
245
246 }
247
248 /* predict the number of files that will be created before max_bytes total
249 length of files is reached */
determine_nr_of_files(int median_file_size,double max_file_size,long bytes_to_consume)250 long determine_nr_of_files(int median_file_size, double max_file_size,
251 long bytes_to_consume)
252 {
253 long nr_of_files = 0, byte_total = 0;
254
255 /* the next line is not necessary as 1 is the default, it is just cautious
256 coding */
257 srand(1);
258 while (byte_total < bytes_to_consume) {
259 byte_total += determine_size(median_file_size, max_file_size);
260 nr_of_files++;
261 }
262 /* reset the random number generator so that when we determine_size() of the
263 files later they will be created with the same "random" sequence used in
264 this calculation */
265 srand(1);
266 #ifdef DEBUG
267 printf("number of files is %d\n", (int)nr_of_files);
268 #endif /* DEBUG */
269 fflush(NULL);
270 return nr_of_files;
271 }
272
273 /* fill the current working directory with nr_files_this_directory number of
274 files*/
275
fill_this_directory(long nr_files_this_directory,long median_file_size,long maximum_size)276 void fill_this_directory(long nr_files_this_directory, long median_file_size,
277 long maximum_size)
278 {
279 long size;
280
281 #ifdef DEBUG
282 printf("filling with %lu files, ", nr_files_this_directory);
283 #endif
284 while (nr_files_this_directory--) {
285 size = determine_size(median_file_size, maximum_size);
286 byte_total += size;
287 make_file(size);
288 }
289 }
290
291 /* this will unfortunately handle out of disk space by forever trying */
292 /* What we should do in out of space situaltion ? I think we must skip this
293 directory and continue files/dirs creation process. Error value (!= 0)
294 indicates that we can't go to this directory. -zam */
make_directory(char * dirname)295 int make_directory(char *dirname)
296 {
297 static long this_directory_number = 0;
298
299 strcpy(tdir, path);
300 strcat(tdir, "/");
301 strcat(tdir, dirname);
302
303 if (mkdir(tdir, 0755) == -1) {
304 if (errno == ENOSPC) {
305 if (!already_whined) {
306 printf("reiser-2021: out of disk space, ");
307 already_whined = 1;
308 }
309 return errno;
310 }
311 /* it is sometimes useful to be able to run this program more than once
312 inside the same directory, and that means skipping over filenames that
313 already exist. Thus we ignore EEXIST, and pay attention to all else. */
314 if (errno != EEXIST) {
315 perror("mkdir");
316 exit(errno);
317 }
318 }
319 sprintf(dirname, "d%lu", this_directory_number++);
320 strcpy(tdir, path);
321 strcat(tdir, "/");
322 strcat(tdir, dirname);
323
324 return 0;
325 }
326
327 /* assumes we are already chdir'd into a directory that the subtree is rooted
328 at. Fills the directory with files and subdirectories, cd's into those
329 subdirectories, and recurses upon itself */
330
do_subtree(long * sizes_start,long * sizes_end,long median_file_size,long maximum_file_size,long median_dir_branching,long max_dir_branching)331 void do_subtree(
332 /* the start and end of the portion of the directory sizes
333 array which corresponds to the sizes of the directories
334 composing this subtree */
335 /* sizes_end minus sizes_start is equal to the number of
336 directories in this subtree */
337 long *sizes_start, long *sizes_end,
338 long median_file_size, long maximum_file_size,
339 long median_dir_branching, long max_dir_branching)
340 {
341 long *p;
342 long *sub_start;
343 long *sub_end;
344 int index_subdirectory_to_add_directory_to;
345 long *dirs_in_subtrees;
346 char *subtree_name;
347 long *sizes_index = sizes_start;
348 char subtree_name_array[128];
349 long this_directory_branching;
350 static long this_directorys_number;
351
352 subtree_name = subtree_name_array;
353 /* fill this directory with its number of files */
354 fill_this_directory(*sizes_index, median_file_size, maximum_file_size);
355 sizes_index++;
356 /* ok, now randomly assign directories (and their number of files) among the
357 subdirectories that will be created if at least one directory is assigned
358 to it */
359
360 /* this will cause the random number sequence to not match the one used in
361 determine_nr_files() I need to accumulate my values in an array
362 beforehand. I'll code that later. */
363 /* worry about whether 0 or 1 is a problem value */
364 this_directory_branching =
365 determine_size(median_dir_branching, max_dir_branching) + 1;
366
367 /* create an array holding the number of directories assigned to each
368 potential subdirectory */
369 dirs_in_subtrees = calloc(this_directory_branching, sizeof(long));
370 while (sizes_index <= sizes_end) {
371 index_subdirectory_to_add_directory_to =
372 (rand() % this_directory_branching);
373 (*
374 (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++;
375 sizes_index++;
376 }
377 /* the +1 is for the fill_directory() we did above */
378 sizes_index = sizes_start + 1;
379
380 /* go through each potential subdirectory, and if at least one directory has
381 been assigned to it, create it and recurse */
382 for (p = dirs_in_subtrees;
383 p < (dirs_in_subtrees + this_directory_branching); p++) {
384 if (*p) {
385 int nocd;
386 sprintf(subtree_name, "d%lu", this_directorys_number++);
387 nocd = make_directory(subtree_name);
388 /* if make_dir.. may fails (in out of space situation), we continue
389 creation process in same dir */
390 if (!nocd)
391 chngdir(subtree_name);
392 sub_start = sizes_index;
393 /* the minus one is because *p is the number of elements and arrays start at 0 */
394 sub_end = (sizes_index + (*p - 1));
395
396 #ifdef DEBUG
397 /* comment this back in if the array logic has you going cross-eyed */
398 /* printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */
399 #endif
400 do_subtree(sub_start, sub_end, median_file_size,
401 maximum_file_size, median_dir_branching,
402 max_dir_branching);
403 if (!nocd)
404 chngdir("..");
405 }
406 sizes_index += *p;
407 }
408 }
409
410 /* We have already determined that nr_files can fit in bytes_to_consume space.
411 Fill the sizes array with the number of files to be in each directory, and
412 then call do_subtree to fill the tree with files and directories. */
413
make_fractal_tree(long median_file_size,long maximum_file_size,long median_dir_nr_files,long max_dir_nr_files,long median_dir_branching,long max_dir_branching,long nr_files)414 void make_fractal_tree(long median_file_size, long maximum_file_size,
415 long median_dir_nr_files, long max_dir_nr_files,
416 long median_dir_branching, long max_dir_branching,
417 long nr_files)
418 {
419 long *sizes_start;
420 long *sizes_end;
421 long *sizes_index;
422 long remaining_files = nr_files;
423
424 /* collect together array of directory sizes for whole filesystem. This
425 cannot easily be done recursively without distorting the directory sizes
426 and making deeper directories smaller. Send me the code if you
427 disagree.:-) */
428 /* we almost certainly don't need this much space, but so what.... */
429 sizes_index = sizes_start = malloc(nr_files * sizeof(long));
430 for (; remaining_files > 0;) {
431 *sizes_index =
432 determine_size(median_dir_nr_files, max_dir_nr_files);
433 // we alloc space for nr_files, so we should avoid
434 // number of files in directory = 0 -grev.
435 if (*sizes_index == 0)
436 *sizes_index = 1;
437 *sizes_index =
438 (*sizes_index <
439 remaining_files) ? *sizes_index : remaining_files;
440
441 #ifdef DEBUG
442 printf("*sizes_index == %lu, ", *sizes_index);
443 #endif
444 remaining_files -= *sizes_index;
445 sizes_index++;
446 }
447 /* don't decrement below sizes_start if nr_files is 0 */
448 sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start;
449
450 sizes_index = sizes_start;
451 srand(1);
452 do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size,
453 median_dir_branching, max_dir_branching);
454
455 }
456
main(int argc,char * argv[])457 int main(int argc, char *argv[])
458 {
459 /* initialized from argv[] */
460 long median_file_size,
461 median_dir_branching,
462 median_dir_nr_files,
463 max_dir_nr_files, max_dir_branching, max_file_size;
464 long nr_of_files = 0; /* files to be created */
465
466 if (argc != 11) {
467 print_usage();
468 exit(1);
469 }
470
471 write_buffer_size = atoi(argv[8]);
472 write_buffer = malloc(write_buffer_size);
473 memset(write_buffer, 'a', write_buffer_size);
474
475 /* the number of bytes that we desire this tree to consume. It will actually
476 consume more, because the last file will overshoot by a random amount, and
477 because the directories and metadata will consume space. */
478 bytes_to_consume = atol(argv[1]);
479 max_file_size = atol(argv[3]);
480 median_file_size = atol(argv[2]);
481 /* Figure out how many random files will fit into bytes_to_consume bytes. We
482 depend on resetting rand() to get the same result later. */
483 nr_of_files =
484 determine_nr_of_files(median_file_size, max_file_size,
485 bytes_to_consume);
486
487 strcpy(path, argv[9]);
488 mkdir(path, 0755);
489 stats = atol(argv[10]);
490 median_dir_branching = atol(argv[6]);
491 max_dir_branching = atol(argv[7]);
492 median_dir_nr_files = atol(argv[4]);
493 max_dir_nr_files = atol(argv[5]);
494 make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files,
495 max_dir_nr_files, median_dir_branching,
496 max_dir_branching, nr_of_files);
497 print_stats();
498 if (stats)
499 printf("\nreiser_fract_tree finished\n");
500
501 return 0;
502 }
503