1 /*
2 * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* largeNbDicts
12 * This is a benchmark test tool
13 * dedicated to the specific case of dictionary decompression
14 * using a very large nb of dictionaries
15 * thus suffering latency from lots of cache misses.
16 * It's created in a bid to investigate performance and find optimizations. */
17
18
19 /*--- Dependencies ---*/
20
21 #include <stddef.h> /* size_t */
22 #include <stdlib.h> /* malloc, free, abort */
23 #include <stdio.h> /* fprintf */
24 #include <limits.h> /* UINT_MAX */
25 #include <assert.h> /* assert */
26
27 #include "util.h"
28 #include "benchfn.h"
29 #define ZSTD_STATIC_LINKING_ONLY
30 #include "zstd.h"
31 #include "zdict.h"
32
33
34 /*--- Constants --- */
35
36 #define KB *(1<<10)
37 #define MB *(1<<20)
38
39 #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
40 #define DICTSIZE (4 KB)
41 #define CLEVEL_DEFAULT 3
42
43 #define BENCH_TIME_DEFAULT_S 6
44 #define RUN_TIME_DEFAULT_MS 1000
45 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
46
47 #define DISPLAY_LEVEL_DEFAULT 3
48
49 #define BENCH_SIZE_MAX (1200 MB)
50
51
52 /*--- Macros ---*/
53
54 #define CONTROL(c) { if (!(c)) abort(); }
55 #undef MIN
56 #define MIN(a,b) ((a) < (b) ? (a) : (b))
57
58
59 /*--- Display Macros ---*/
60
61 #define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
62 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
63 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
64
65
66 /*--- buffer_t ---*/
67
68 typedef struct {
69 void* ptr;
70 size_t size;
71 size_t capacity;
72 } buffer_t;
73
74 static const buffer_t kBuffNull = { NULL, 0, 0 };
75
76 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)77 static buffer_t createBuffer(size_t capacity)
78 {
79 assert(capacity > 0);
80 void* const ptr = malloc(capacity);
81 if (ptr==NULL) return kBuffNull;
82
83 buffer_t buffer;
84 buffer.ptr = ptr;
85 buffer.capacity = capacity;
86 buffer.size = 0;
87 return buffer;
88 }
89
freeBuffer(buffer_t buff)90 static void freeBuffer(buffer_t buff)
91 {
92 free(buff.ptr);
93 }
94
95
fillBuffer_fromHandle(buffer_t * buff,FILE * f)96 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
97 {
98 size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
99 buff->size = readSize;
100 }
101
102
103 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)104 static buffer_t createBuffer_fromFile(const char* fileName)
105 {
106 U64 const fileSize = UTIL_getFileSize(fileName);
107 size_t const bufferSize = (size_t) fileSize;
108
109 if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
110 assert((U64)bufferSize == fileSize); /* check overflow */
111
112 { FILE* const f = fopen(fileName, "rb");
113 if (f == NULL) return kBuffNull;
114
115 buffer_t buff = createBuffer(bufferSize);
116 CONTROL(buff.ptr != NULL);
117
118 fillBuffer_fromHandle(&buff, f);
119 CONTROL(buff.size == buff.capacity);
120
121 fclose(f); /* do nothing specific if fclose() fails */
122 return buff;
123 }
124 }
125
126
127 /* @return : kBuffNull if any error */
128 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,size_t nbBlocks,size_t requestedDictSize)129 createDictionaryBuffer(const char* dictionaryName,
130 const void* srcBuffer,
131 const size_t* srcBlockSizes, size_t nbBlocks,
132 size_t requestedDictSize)
133 {
134 if (dictionaryName) {
135 DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
136 return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
137
138 } else {
139
140 DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
141 (unsigned)requestedDictSize);
142 void* const dictBuffer = malloc(requestedDictSize);
143 CONTROL(dictBuffer != NULL);
144
145 assert(nbBlocks <= UINT_MAX);
146 size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
147 srcBuffer,
148 srcBlockSizes, (unsigned)nbBlocks);
149 CONTROL(!ZSTD_isError(dictSize));
150
151 buffer_t result;
152 result.ptr = dictBuffer;
153 result.capacity = requestedDictSize;
154 result.size = dictSize;
155 return result;
156 }
157 }
158
createCDictForDedicatedDictSearch(const void * dict,size_t dictSize,int compressionLevel)159 static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel)
160 {
161 ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
162 ZSTD_CCtxParams_init(params, compressionLevel);
163 ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1);
164 ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel);
165
166 ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem);
167
168 ZSTD_freeCCtxParams(params);
169 return cdict;
170 }
171
172 /*! BMK_loadFiles() :
173 * Loads `buffer`, with content from files listed within `fileNamesTable`.
174 * Fills `buffer` entirely.
175 * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)176 static int loadFiles(void* buffer, size_t bufferSize,
177 size_t* fileSizes,
178 const char* const * fileNamesTable, unsigned nbFiles)
179 {
180 size_t pos = 0, totalSize = 0;
181
182 for (unsigned n=0; n<nbFiles; n++) {
183 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
184 if (UTIL_isDirectory(fileNamesTable[n])) {
185 fileSizes[n] = 0;
186 continue;
187 }
188 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
189 fileSizes[n] = 0;
190 continue;
191 }
192
193 FILE* const f = fopen(fileNamesTable[n], "rb");
194 assert(f!=NULL);
195
196 assert(pos <= bufferSize);
197 assert(fileSize <= bufferSize - pos);
198
199 { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
200 assert(readSize == fileSize);
201 pos += readSize;
202 }
203 fileSizes[n] = (size_t)fileSize;
204 totalSize += (size_t)fileSize;
205 fclose(f);
206 }
207
208 assert(totalSize == bufferSize);
209 return 0;
210 }
211
212
213
214 /*--- slice_collection_t ---*/
215
216 typedef struct {
217 void** slicePtrs;
218 size_t* capacities;
219 size_t nbSlices;
220 } slice_collection_t;
221
222 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
223
freeSliceCollection(slice_collection_t collection)224 static void freeSliceCollection(slice_collection_t collection)
225 {
226 free(collection.slicePtrs);
227 free(collection.capacities);
228 }
229
230 /* shrinkSizes() :
231 * downsizes sizes of slices within collection, according to `newSizes`.
232 * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)233 void shrinkSizes(slice_collection_t collection,
234 const size_t* newSizes) /* presumed same size as collection */
235 {
236 size_t const nbSlices = collection.nbSlices;
237 for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
238 assert(newSizes[blockNb] <= collection.capacities[blockNb]);
239 collection.capacities[blockNb] = newSizes[blockNb];
240 }
241 }
242
243
244 /* splitSlices() :
245 * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
246 * otherwise, creates exactly nbSlices slices,
247 * by either truncating input (when smaller)
248 * or repeating input from beginning */
249 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)250 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
251 {
252 if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
253 size_t nbSrcBlocks = 0;
254 for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
255 size_t pos = 0;
256 while (pos <= srcSlices.capacities[ssnb]) {
257 nbSrcBlocks++;
258 pos += blockSize;
259 }
260 }
261
262 if (nbSlices == 0) nbSlices = nbSrcBlocks;
263
264 void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
265 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
266 if (sliceTable == NULL || capacities == NULL) {
267 free(sliceTable);
268 free(capacities);
269 return kNullCollection;
270 }
271
272 size_t ssnb = 0;
273 for (size_t sliceNb=0; sliceNb < nbSlices; ) {
274 ssnb = (ssnb + 1) % srcSlices.nbSlices;
275 size_t pos = 0;
276 char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
277 while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
278 size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
279 sliceTable[sliceNb] = ptr + pos;
280 capacities[sliceNb] = size;
281 sliceNb++;
282 pos += blockSize;
283 }
284 }
285
286 slice_collection_t result;
287 result.nbSlices = nbSlices;
288 result.slicePtrs = sliceTable;
289 result.capacities = capacities;
290 return result;
291 }
292
293
sliceCollection_totalCapacity(slice_collection_t sc)294 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
295 {
296 size_t totalSize = 0;
297 for (size_t n=0; n<sc.nbSlices; n++)
298 totalSize += sc.capacities[n];
299 return totalSize;
300 }
301
302
303 /* --- buffer collection --- */
304
305 typedef struct {
306 buffer_t buffer;
307 slice_collection_t slices;
308 } buffer_collection_t;
309
310
freeBufferCollection(buffer_collection_t bc)311 static void freeBufferCollection(buffer_collection_t bc)
312 {
313 freeBuffer(bc.buffer);
314 freeSliceCollection(bc.slices);
315 }
316
317
318 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)319 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
320 {
321 size_t const bufferSize = sliceCollection_totalCapacity(sc);
322
323 buffer_t buffer = createBuffer(bufferSize);
324 CONTROL(buffer.ptr != NULL);
325
326 size_t const nbSlices = sc.nbSlices;
327 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
328 CONTROL(slices != NULL);
329
330 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
331 CONTROL(capacities != NULL);
332
333 char* const ptr = (char*)buffer.ptr;
334 size_t pos = 0;
335 for (size_t n=0; n < nbSlices; n++) {
336 capacities[n] = sc.capacities[n];
337 slices[n] = ptr + pos;
338 pos += capacities[n];
339 }
340
341 buffer_collection_t result;
342 result.buffer = buffer;
343 result.slices.nbSlices = nbSlices;
344 result.slices.capacities = capacities;
345 result.slices.slicePtrs = slices;
346 return result;
347 }
348
349 static buffer_collection_t
createBufferCollection_fromSliceCollection(slice_collection_t sc)350 createBufferCollection_fromSliceCollection(slice_collection_t sc)
351 {
352 size_t const bufferSize = sliceCollection_totalCapacity(sc);
353
354 buffer_t buffer = createBuffer(bufferSize);
355 CONTROL(buffer.ptr != NULL);
356
357 size_t const nbSlices = sc.nbSlices;
358 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
359 CONTROL(slices != NULL);
360
361 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
362 CONTROL(capacities != NULL);
363
364 char* const ptr = (char*)buffer.ptr;
365 size_t pos = 0;
366 for (size_t n=0; n < nbSlices; n++) {
367 capacities[n] = sc.capacities[n];
368 slices[n] = ptr + pos;
369 pos += capacities[n];
370 }
371
372 for (size_t i = 0; i < nbSlices; i++) {
373 memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
374 capacities[i] = sc.capacities[i];
375 }
376
377 buffer_collection_t result;
378 result.buffer = buffer;
379 result.slices.nbSlices = nbSlices;
380 result.slices.capacities = capacities;
381 result.slices.slicePtrs = slices;
382
383 return result;
384 }
385
386 /* @return : kBuffNull if any error */
387 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)388 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
389 {
390 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
391 assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
392 assert(totalSizeToLoad <= BENCH_SIZE_MAX);
393 size_t const loadedSize = (size_t)totalSizeToLoad;
394 assert(loadedSize > 0);
395 void* const srcBuffer = malloc(loadedSize);
396 assert(srcBuffer != NULL);
397
398 assert(nbFiles > 0);
399 size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
400 assert(fileSizes != NULL);
401
402 /* Load input buffer */
403 int const errorCode = loadFiles(srcBuffer, loadedSize,
404 fileSizes,
405 fileNamesTable, nbFiles);
406 assert(errorCode == 0);
407
408 void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
409 assert(sliceTable != NULL);
410
411 char* const ptr = (char*)srcBuffer;
412 size_t pos = 0;
413 unsigned fileNb = 0;
414 for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
415 sliceTable[fileNb] = ptr + pos;
416 pos += fileSizes[fileNb];
417 }
418 assert(pos == loadedSize);
419 assert(fileNb == nbFiles);
420
421
422 buffer_t buffer;
423 buffer.ptr = srcBuffer;
424 buffer.capacity = loadedSize;
425 buffer.size = loadedSize;
426
427 slice_collection_t slices;
428 slices.slicePtrs = sliceTable;
429 slices.capacities = fileSizes;
430 slices.nbSlices = nbFiles;
431
432 buffer_collection_t bc;
433 bc.buffer = buffer;
434 bc.slices = slices;
435 return bc;
436 }
437
438
439
440
441 /*--- ddict_collection_t ---*/
442
443 typedef struct {
444 ZSTD_DDict** ddicts;
445 size_t nbDDict;
446 } ddict_collection_t;
447
448 typedef struct {
449 ZSTD_CDict** cdicts;
450 size_t nbCDict;
451 } cdict_collection_t;
452
453 static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
454
freeCDictCollection(cdict_collection_t cdictc)455 static void freeCDictCollection(cdict_collection_t cdictc)
456 {
457 for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
458 ZSTD_freeCDict(cdictc.cdicts[dictNb]);
459 }
460 free(cdictc.cdicts);
461 }
462
463 /* returns .buffers=NULL if operation fails */
createCDictCollection(const void * dictBuffer,size_t dictSize,size_t nbCDict,int cLevel,int dedicatedDictSearch)464 static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch)
465 {
466 ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
467 if (cdicts==NULL) return kNullCDictCollection;
468 for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
469 cdicts[dictNb] = dedicatedDictSearch ?
470 createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) :
471 ZSTD_createCDict(dictBuffer, dictSize, cLevel);
472 CONTROL(cdicts[dictNb] != NULL);
473 }
474 cdict_collection_t cdictc;
475 cdictc.cdicts = cdicts;
476 cdictc.nbCDict = nbCDict;
477 return cdictc;
478 }
479
480 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
481
freeDDictCollection(ddict_collection_t ddictc)482 static void freeDDictCollection(ddict_collection_t ddictc)
483 {
484 for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
485 ZSTD_freeDDict(ddictc.ddicts[dictNb]);
486 }
487 free(ddictc.ddicts);
488 }
489
490 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)491 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
492 {
493 ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
494 assert(ddicts != NULL);
495 if (ddicts==NULL) return kNullDDictCollection;
496 for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
497 ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
498 assert(ddicts[dictNb] != NULL);
499 }
500 ddict_collection_t ddictc;
501 ddictc.ddicts = ddicts;
502 ddictc.nbDDict = nbDDict;
503 return ddictc;
504 }
505
506
507 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleCDictionaries(cdict_collection_t dicts)508 void shuffleCDictionaries(cdict_collection_t dicts)
509 {
510 size_t const nbDicts = dicts.nbCDict;
511 for (size_t r=0; r<nbDicts; r++) {
512 size_t const d = (size_t)rand() % nbDicts;
513 ZSTD_CDict* tmpd = dicts.cdicts[d];
514 dicts.cdicts[d] = dicts.cdicts[r];
515 dicts.cdicts[r] = tmpd;
516 }
517 for (size_t r=0; r<nbDicts; r++) {
518 size_t const d1 = (size_t)rand() % nbDicts;
519 size_t const d2 = (size_t)rand() % nbDicts;
520 ZSTD_CDict* tmpd = dicts.cdicts[d1];
521 dicts.cdicts[d1] = dicts.cdicts[d2];
522 dicts.cdicts[d2] = tmpd;
523 }
524 }
525
526 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleDDictionaries(ddict_collection_t dicts)527 void shuffleDDictionaries(ddict_collection_t dicts)
528 {
529 size_t const nbDicts = dicts.nbDDict;
530 for (size_t r=0; r<nbDicts; r++) {
531 size_t const d = (size_t)rand() % nbDicts;
532 ZSTD_DDict* tmpd = dicts.ddicts[d];
533 dicts.ddicts[d] = dicts.ddicts[r];
534 dicts.ddicts[r] = tmpd;
535 }
536 for (size_t r=0; r<nbDicts; r++) {
537 size_t const d1 = (size_t)rand() % nbDicts;
538 size_t const d2 = (size_t)rand() % nbDicts;
539 ZSTD_DDict* tmpd = dicts.ddicts[d1];
540 dicts.ddicts[d1] = dicts.ddicts[d2];
541 dicts.ddicts[d2] = tmpd;
542 }
543 }
544
545
546 /* --- Compression --- */
547
548 /* compressBlocks() :
549 * @return : total compressed size of all blocks,
550 * or 0 if error.
551 */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)552 static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
553 slice_collection_t dstBlockBuffers,
554 slice_collection_t srcBlockBuffers,
555 ZSTD_CDict* cdict, int cLevel)
556 {
557 size_t const nbBlocks = srcBlockBuffers.nbSlices;
558 assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
559
560 ZSTD_CCtx* const cctx = ZSTD_createCCtx();
561 assert(cctx != NULL);
562
563 size_t totalCSize = 0;
564 for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
565 size_t cBlockSize;
566 if (cdict == NULL) {
567 cBlockSize = ZSTD_compressCCtx(cctx,
568 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
569 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
570 cLevel);
571 } else {
572 cBlockSize = ZSTD_compress_usingCDict(cctx,
573 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
574 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
575 cdict);
576 }
577 CONTROL(!ZSTD_isError(cBlockSize));
578 if (cSizes) cSizes[blockNb] = cBlockSize;
579 totalCSize += cBlockSize;
580 }
581 return totalCSize;
582 }
583
584
585 /* --- Benchmark --- */
586
587 typedef struct {
588 ZSTD_CCtx* cctx;
589 size_t nbDicts;
590 size_t dictNb;
591 cdict_collection_t dictionaries;
592 } compressInstructions;
593
createCompressInstructions(cdict_collection_t dictionaries)594 compressInstructions createCompressInstructions(cdict_collection_t dictionaries)
595 {
596 compressInstructions ci;
597 ci.cctx = ZSTD_createCCtx();
598 CONTROL(ci.cctx != NULL);
599 ci.nbDicts = dictionaries.nbCDict;
600 ci.dictNb = 0;
601 ci.dictionaries = dictionaries;
602 return ci;
603 }
604
freeCompressInstructions(compressInstructions ci)605 void freeCompressInstructions(compressInstructions ci)
606 {
607 ZSTD_freeCCtx(ci.cctx);
608 }
609
610 typedef struct {
611 ZSTD_DCtx* dctx;
612 size_t nbDicts;
613 size_t dictNb;
614 ddict_collection_t dictionaries;
615 } decompressInstructions;
616
createDecompressInstructions(ddict_collection_t dictionaries)617 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
618 {
619 decompressInstructions di;
620 di.dctx = ZSTD_createDCtx();
621 assert(di.dctx != NULL);
622 di.nbDicts = dictionaries.nbDDict;
623 di.dictNb = 0;
624 di.dictionaries = dictionaries;
625 return di;
626 }
627
freeDecompressInstructions(decompressInstructions di)628 void freeDecompressInstructions(decompressInstructions di)
629 {
630 ZSTD_freeDCtx(di.dctx);
631 }
632
633 /* benched function */
compress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)634 size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
635 {
636 compressInstructions* const ci = (compressInstructions*) payload;
637 (void)dstCapacity;
638
639 ZSTD_compress_usingCDict(ci->cctx,
640 dst, srcSize,
641 src, srcSize,
642 ci->dictionaries.cdicts[ci->dictNb]);
643
644 ci->dictNb = ci->dictNb + 1;
645 if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
646
647 return srcSize;
648 }
649
650 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)651 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
652 {
653 decompressInstructions* const di = (decompressInstructions*) payload;
654
655 size_t const result = ZSTD_decompress_usingDDict(di->dctx,
656 dst, dstCapacity,
657 src, srcSize,
658 di->dictionaries.ddicts[di->dictNb]);
659
660 di->dictNb = di->dictNb + 1;
661 if (di->dictNb >= di->nbDicts) di->dictNb = 0;
662
663 return result;
664 }
665
666
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t ddictionaries,cdict_collection_t cdictionaries,unsigned nbRounds,int benchCompression)667 static int benchMem(slice_collection_t dstBlocks,
668 slice_collection_t srcBlocks,
669 ddict_collection_t ddictionaries,
670 cdict_collection_t cdictionaries,
671 unsigned nbRounds, int benchCompression)
672 {
673 assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
674
675 unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
676 unsigned const total_time_ms = nbRounds * ms_per_round;
677
678 double bestSpeed = 0.;
679
680 BMK_timedFnState_t* const benchState =
681 BMK_createTimedFnState(total_time_ms, ms_per_round);
682
683 decompressInstructions di = createDecompressInstructions(ddictionaries);
684 compressInstructions ci = createCompressInstructions(cdictionaries);
685 void* payload = benchCompression ? (void*)&ci : (void*)&di;
686 BMK_benchParams_t const bp = {
687 .benchFn = benchCompression ? compress : decompress,
688 .benchPayload = payload,
689 .initFn = NULL,
690 .initPayload = NULL,
691 .errorFn = ZSTD_isError,
692 .blockCount = dstBlocks.nbSlices,
693 .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
694 .srcSizes = srcBlocks.capacities,
695 .dstBuffers = dstBlocks.slicePtrs,
696 .dstCapacities = dstBlocks.capacities,
697 .blockResults = NULL
698 };
699
700 for (;;) {
701 BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
702 CONTROL(BMK_isSuccessful_runOutcome(outcome));
703
704 BMK_runTime_t const result = BMK_extract_runTime(outcome);
705 double const dTime_ns = result.nanoSecPerRun;
706 double const dTime_sec = (double)dTime_ns / 1000000000;
707 size_t const srcSize = result.sumOfReturn;
708 double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
709 if (speed_MBps > bestSpeed) bestSpeed = speed_MBps;
710 if (benchCompression)
711 DISPLAY("Compression Speed : %.1f MB/s \r", bestSpeed);
712 else
713 DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
714
715 fflush(stdout);
716 if (BMK_isCompleted_TimedFn(benchState)) break;
717 }
718 DISPLAY("\n");
719
720 freeDecompressInstructions(di);
721 freeCompressInstructions(ci);
722 BMK_freeTimedFnState(benchState);
723
724 return 0; /* success */
725 }
726
727
728 /*! bench() :
729 * fileName : file to load for benchmarking purpose
730 * dictionary : optional (can be NULL), file to load as dictionary,
731 * if none provided : will be calculated on the fly by the program.
732 * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,unsigned nbRounds,int benchCompression,int dedicatedDictSearch)733 int bench(const char** fileNameTable, unsigned nbFiles,
734 const char* dictionary,
735 size_t blockSize, int clevel,
736 unsigned nbDictMax, unsigned nbBlocks,
737 unsigned nbRounds, int benchCompression,
738 int dedicatedDictSearch)
739 {
740 int result = 0;
741
742 DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
743 buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
744 CONTROL(srcs.buffer.ptr != NULL);
745 buffer_t srcBuffer = srcs.buffer;
746 size_t const srcSize = srcBuffer.size;
747 DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
748 (double)srcSize / (1 MB));
749
750 slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
751 nbBlocks = (unsigned)(srcSlices.nbSlices);
752 DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
753 if (blockSize)
754 DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
755 DISPLAYLEVEL(3, "\n");
756 size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
757
758
759 size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
760 CONTROL(dstCapacities != NULL);
761 size_t dstBufferCapacity = 0;
762 for (size_t bnb=0; bnb<nbBlocks; bnb++) {
763 dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
764 dstBufferCapacity += dstCapacities[bnb];
765 }
766
767 buffer_t dstBuffer = createBuffer(dstBufferCapacity);
768 CONTROL(dstBuffer.ptr != NULL);
769
770 void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
771 CONTROL(sliceTable != NULL);
772
773 { char* const ptr = dstBuffer.ptr;
774 size_t pos = 0;
775 for (size_t snb=0; snb < nbBlocks; snb++) {
776 sliceTable[snb] = ptr + pos;
777 pos += dstCapacities[snb];
778 } }
779
780 slice_collection_t dstSlices;
781 dstSlices.capacities = dstCapacities;
782 dstSlices.slicePtrs = sliceTable;
783 dstSlices.nbSlices = nbBlocks;
784
785
786 /* dictionary determination */
787 buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
788 srcs.buffer.ptr,
789 srcs.slices.capacities, srcs.slices.nbSlices,
790 DICTSIZE);
791 CONTROL(dictBuffer.ptr != NULL);
792
793 ZSTD_CDict* const cdict = dedicatedDictSearch ?
794 createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) :
795 ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
796 CONTROL(cdict != NULL);
797
798 size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
799 CONTROL(cTotalSizeNoDict != 0);
800 DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
801 clevel,
802 (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
803
804 size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
805 CONTROL(cSizes != NULL);
806
807 size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
808 CONTROL(cTotalSize != 0);
809 DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
810 (unsigned)dictBuffer.size,
811 (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
812
813 /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
814 shrinkSizes(dstSlices, cSizes);
815
816 unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
817
818 cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch);
819 CONTROL(cdictionaries.cdicts != NULL);
820
821 ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
822 CONTROL(ddictionaries.ddicts != NULL);
823
824 if (benchCompression) {
825 size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
826 size_t const allDictMem = dictMem * nbDicts;
827 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
828 nbDicts, (double)allDictMem / (1 MB));
829
830 shuffleCDictionaries(cdictionaries);
831
832 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
833 CONTROL(resultCollection.buffer.ptr != NULL);
834
835 result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression);
836
837 freeBufferCollection(resultCollection);
838 } else {
839 size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
840 size_t const allDictMem = dictMem * nbDicts;
841 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
842 nbDicts, (double)allDictMem / (1 MB));
843
844 shuffleDDictionaries(ddictionaries);
845
846 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
847 CONTROL(resultCollection.buffer.ptr != NULL);
848
849 result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression);
850
851 freeBufferCollection(resultCollection);
852 }
853
854 /* free all heap objects in reverse order */
855 freeCDictCollection(cdictionaries);
856 freeDDictCollection(ddictionaries);
857 free(cSizes);
858 ZSTD_freeCDict(cdict);
859 freeBuffer(dictBuffer);
860 freeSliceCollection(dstSlices);
861 freeBuffer(dstBuffer);
862 freeSliceCollection(srcSlices);
863 freeBufferCollection(srcs);
864
865 return result;
866 }
867
868
869
870 /* --- Command Line --- */
871
872 /*! readU32FromChar() :
873 * @return : unsigned integer value read from input in `char` format.
874 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
875 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
876 * Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)877 static unsigned readU32FromChar(const char** stringPtr)
878 {
879 unsigned result = 0;
880 while ((**stringPtr >='0') && (**stringPtr <='9')) {
881 unsigned const max = (((unsigned)(-1)) / 10) - 1;
882 assert(result <= max); /* check overflow */
883 result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
884 }
885 if ((**stringPtr=='K') || (**stringPtr=='M')) {
886 unsigned const maxK = ((unsigned)(-1)) >> 10;
887 assert(result <= maxK); /* check overflow */
888 result <<= 10;
889 if (**stringPtr=='M') {
890 assert(result <= maxK); /* check overflow */
891 result <<= 10;
892 }
893 (*stringPtr)++; /* skip `K` or `M` */
894 if (**stringPtr=='i') (*stringPtr)++;
895 if (**stringPtr=='B') (*stringPtr)++;
896 }
897 return result;
898 }
899
900 /** longCommandWArg() :
901 * check if *stringPtr is the same as longCommand.
902 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
903 * @return 0 and doesn't modify *stringPtr otherwise.
904 */
longCommandWArg(const char ** stringPtr,const char * longCommand)905 static int longCommandWArg(const char** stringPtr, const char* longCommand)
906 {
907 size_t const comSize = strlen(longCommand);
908 int const result = !strncmp(*stringPtr, longCommand, comSize);
909 if (result) *stringPtr += comSize;
910 return result;
911 }
912
913
usage(const char * exeName)914 int usage(const char* exeName)
915 {
916 DISPLAY (" \n");
917 DISPLAY (" %s [Options] filename(s) \n", exeName);
918 DISPLAY (" \n");
919 DISPLAY ("Options : \n");
920 DISPLAY ("-z : benchmark compression (default) \n");
921 DISPLAY ("-d : benchmark decompression \n");
922 DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
923 DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
924 DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
925 DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
926 DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
927 DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
928 DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
929 DISPLAY ("-h : help (this text) \n");
930 return 0;
931 }
932
bad_usage(const char * exeName)933 int bad_usage(const char* exeName)
934 {
935 DISPLAY (" bad usage : \n");
936 usage(exeName);
937 return 1;
938 }
939
main(int argc,const char ** argv)940 int main (int argc, const char** argv)
941 {
942 int recursiveMode = 0;
943 int benchCompression = 1;
944 int dedicatedDictSearch = 0;
945 unsigned nbRounds = BENCH_TIME_DEFAULT_S;
946 const char* const exeName = argv[0];
947
948 if (argc < 2) return bad_usage(exeName);
949
950 const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
951 assert(nameTable != NULL);
952 unsigned nameIdx = 0;
953
954 const char* dictionary = NULL;
955 int cLevel = CLEVEL_DEFAULT;
956 size_t blockSize = BLOCKSIZE_DEFAULT;
957 unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
958 unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
959
960 for (int argNb = 1; argNb < argc ; argNb++) {
961 const char* argument = argv[argNb];
962 if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
963 if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
964 if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
965 if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
966 if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
967 if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
968 if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
969 if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
970 if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
971 if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
972 if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
973 if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
974 if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
975 if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
976 /* anything that's not a command is a filename */
977 nameTable[nameIdx++] = argument;
978 }
979
980 FileNamesTable* filenameTable;
981
982 if (recursiveMode) {
983 #ifndef UTIL_HAS_CREATEFILELIST
984 assert(0); /* missing capability, do not run */
985 #endif
986 filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
987 } else {
988 filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
989 nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
990 }
991
992 int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch);
993
994 UTIL_freeFileNamesTable(filenameTable);
995 free(nameTable);
996
997 return result;
998 }
999