1 /*
2  * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* largeNbDicts
12  * This is a benchmark test tool
13  * dedicated to the specific case of dictionary decompression
14  * using a very large nb of dictionaries
15  * thus suffering latency from lots of cache misses.
16  * It's created in a bid to investigate performance and find optimizations. */
17 
18 
19 /*---  Dependencies  ---*/
20 
21 #include <stddef.h>   /* size_t */
22 #include <stdlib.h>   /* malloc, free, abort */
23 #include <stdio.h>    /* fprintf */
24 #include <limits.h>   /* UINT_MAX */
25 #include <assert.h>   /* assert */
26 
27 #include "util.h"
28 #include "benchfn.h"
29 #define ZSTD_STATIC_LINKING_ONLY
30 #include "zstd.h"
31 #include "zdict.h"
32 
33 
34 /*---  Constants  --- */
35 
36 #define KB  *(1<<10)
37 #define MB  *(1<<20)
38 
39 #define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
40 #define DICTSIZE  (4 KB)
41 #define CLEVEL_DEFAULT 3
42 
43 #define BENCH_TIME_DEFAULT_S   6
44 #define RUN_TIME_DEFAULT_MS    1000
45 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
46 
47 #define DISPLAY_LEVEL_DEFAULT 3
48 
49 #define BENCH_SIZE_MAX (1200 MB)
50 
51 
52 /*---  Macros  ---*/
53 
54 #define CONTROL(c)   { if (!(c)) abort(); }
55 #undef MIN
56 #define MIN(a,b)     ((a) < (b) ? (a) : (b))
57 
58 
59 /*---  Display Macros  ---*/
60 
61 #define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
62 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
63 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
64 
65 
66 /*---  buffer_t  ---*/
67 
68 typedef struct {
69     void* ptr;
70     size_t size;
71     size_t capacity;
72 } buffer_t;
73 
74 static const buffer_t kBuffNull = { NULL, 0, 0 };
75 
76 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)77 static buffer_t createBuffer(size_t capacity)
78 {
79     assert(capacity > 0);
80     void* const ptr = malloc(capacity);
81     if (ptr==NULL) return kBuffNull;
82 
83     buffer_t buffer;
84     buffer.ptr = ptr;
85     buffer.capacity = capacity;
86     buffer.size = 0;
87     return buffer;
88 }
89 
freeBuffer(buffer_t buff)90 static void freeBuffer(buffer_t buff)
91 {
92     free(buff.ptr);
93 }
94 
95 
fillBuffer_fromHandle(buffer_t * buff,FILE * f)96 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
97 {
98     size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
99     buff->size = readSize;
100 }
101 
102 
103 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)104 static buffer_t createBuffer_fromFile(const char* fileName)
105 {
106     U64 const fileSize = UTIL_getFileSize(fileName);
107     size_t const bufferSize = (size_t) fileSize;
108 
109     if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
110     assert((U64)bufferSize == fileSize);   /* check overflow */
111 
112     {   FILE* const f = fopen(fileName, "rb");
113         if (f == NULL) return kBuffNull;
114 
115         buffer_t buff = createBuffer(bufferSize);
116         CONTROL(buff.ptr != NULL);
117 
118         fillBuffer_fromHandle(&buff, f);
119         CONTROL(buff.size == buff.capacity);
120 
121         fclose(f);   /* do nothing specific if fclose() fails */
122         return buff;
123     }
124 }
125 
126 
127 /* @return : kBuffNull if any error */
128 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,size_t nbBlocks,size_t requestedDictSize)129 createDictionaryBuffer(const char* dictionaryName,
130                        const void* srcBuffer,
131                        const size_t* srcBlockSizes, size_t nbBlocks,
132                        size_t requestedDictSize)
133 {
134     if (dictionaryName) {
135         DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
136         return createBuffer_fromFile(dictionaryName);  /* note : result might be kBuffNull */
137 
138     } else {
139 
140         DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
141                         (unsigned)requestedDictSize);
142         void* const dictBuffer = malloc(requestedDictSize);
143         CONTROL(dictBuffer != NULL);
144 
145         assert(nbBlocks <= UINT_MAX);
146         size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
147                                                       srcBuffer,
148                                                       srcBlockSizes, (unsigned)nbBlocks);
149         CONTROL(!ZSTD_isError(dictSize));
150 
151         buffer_t result;
152         result.ptr = dictBuffer;
153         result.capacity = requestedDictSize;
154         result.size = dictSize;
155         return result;
156     }
157 }
158 
createCDictForDedicatedDictSearch(const void * dict,size_t dictSize,int compressionLevel)159 static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel)
160 {
161     ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
162     ZSTD_CCtxParams_init(params, compressionLevel);
163     ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1);
164     ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel);
165 
166     ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem);
167 
168     ZSTD_freeCCtxParams(params);
169     return cdict;
170 }
171 
172 /*! BMK_loadFiles() :
173  *  Loads `buffer`, with content from files listed within `fileNamesTable`.
174  *  Fills `buffer` entirely.
175  * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)176 static int loadFiles(void* buffer, size_t bufferSize,
177                      size_t* fileSizes,
178                      const char* const * fileNamesTable, unsigned nbFiles)
179 {
180     size_t pos = 0, totalSize = 0;
181 
182     for (unsigned n=0; n<nbFiles; n++) {
183         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
184         if (UTIL_isDirectory(fileNamesTable[n])) {
185             fileSizes[n] = 0;
186             continue;
187         }
188         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
189             fileSizes[n] = 0;
190             continue;
191         }
192 
193         FILE* const f = fopen(fileNamesTable[n], "rb");
194         assert(f!=NULL);
195 
196         assert(pos <= bufferSize);
197         assert(fileSize <= bufferSize - pos);
198 
199         {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
200             assert(readSize == fileSize);
201             pos += readSize;
202         }
203         fileSizes[n] = (size_t)fileSize;
204         totalSize += (size_t)fileSize;
205         fclose(f);
206     }
207 
208     assert(totalSize == bufferSize);
209     return 0;
210 }
211 
212 
213 
214 /*---  slice_collection_t  ---*/
215 
216 typedef struct {
217     void** slicePtrs;
218     size_t* capacities;
219     size_t nbSlices;
220 } slice_collection_t;
221 
222 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
223 
freeSliceCollection(slice_collection_t collection)224 static void freeSliceCollection(slice_collection_t collection)
225 {
226     free(collection.slicePtrs);
227     free(collection.capacities);
228 }
229 
230 /* shrinkSizes() :
231  * downsizes sizes of slices within collection, according to `newSizes`.
232  * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)233 void shrinkSizes(slice_collection_t collection,
234                  const size_t* newSizes)  /* presumed same size as collection */
235 {
236     size_t const nbSlices = collection.nbSlices;
237     for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
238         assert(newSizes[blockNb] <= collection.capacities[blockNb]);
239         collection.capacities[blockNb] = newSizes[blockNb];
240     }
241 }
242 
243 
244 /* splitSlices() :
245  * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
246  *            otherwise, creates exactly nbSlices slices,
247  *            by either truncating input (when smaller)
248  *            or repeating input from beginning */
249 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)250 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
251 {
252     if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
253     size_t nbSrcBlocks = 0;
254     for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
255         size_t pos = 0;
256         while (pos <= srcSlices.capacities[ssnb]) {
257             nbSrcBlocks++;
258             pos += blockSize;
259         }
260     }
261 
262     if (nbSlices == 0) nbSlices = nbSrcBlocks;
263 
264     void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
265     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
266     if (sliceTable == NULL || capacities == NULL) {
267         free(sliceTable);
268         free(capacities);
269         return kNullCollection;
270     }
271 
272     size_t ssnb = 0;
273     for (size_t sliceNb=0; sliceNb < nbSlices; ) {
274         ssnb = (ssnb + 1) % srcSlices.nbSlices;
275         size_t pos = 0;
276         char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
277         while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
278             size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
279             sliceTable[sliceNb] = ptr + pos;
280             capacities[sliceNb] = size;
281             sliceNb++;
282             pos += blockSize;
283         }
284     }
285 
286     slice_collection_t result;
287     result.nbSlices = nbSlices;
288     result.slicePtrs = sliceTable;
289     result.capacities = capacities;
290     return result;
291 }
292 
293 
sliceCollection_totalCapacity(slice_collection_t sc)294 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
295 {
296     size_t totalSize = 0;
297     for (size_t n=0; n<sc.nbSlices; n++)
298         totalSize += sc.capacities[n];
299     return totalSize;
300 }
301 
302 
303 /* ---  buffer collection  --- */
304 
305 typedef struct {
306     buffer_t buffer;
307     slice_collection_t slices;
308 } buffer_collection_t;
309 
310 
freeBufferCollection(buffer_collection_t bc)311 static void freeBufferCollection(buffer_collection_t bc)
312 {
313     freeBuffer(bc.buffer);
314     freeSliceCollection(bc.slices);
315 }
316 
317 
318 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)319 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
320 {
321     size_t const bufferSize = sliceCollection_totalCapacity(sc);
322 
323     buffer_t buffer = createBuffer(bufferSize);
324     CONTROL(buffer.ptr != NULL);
325 
326     size_t const nbSlices = sc.nbSlices;
327     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
328     CONTROL(slices != NULL);
329 
330     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
331     CONTROL(capacities != NULL);
332 
333     char* const ptr = (char*)buffer.ptr;
334     size_t pos = 0;
335     for (size_t n=0; n < nbSlices; n++) {
336         capacities[n] = sc.capacities[n];
337         slices[n] = ptr + pos;
338         pos += capacities[n];
339     }
340 
341     buffer_collection_t result;
342     result.buffer = buffer;
343     result.slices.nbSlices = nbSlices;
344     result.slices.capacities = capacities;
345     result.slices.slicePtrs = slices;
346     return result;
347 }
348 
349 static buffer_collection_t
createBufferCollection_fromSliceCollection(slice_collection_t sc)350 createBufferCollection_fromSliceCollection(slice_collection_t sc)
351 {
352     size_t const bufferSize = sliceCollection_totalCapacity(sc);
353 
354     buffer_t buffer = createBuffer(bufferSize);
355     CONTROL(buffer.ptr != NULL);
356 
357     size_t const nbSlices = sc.nbSlices;
358     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
359     CONTROL(slices != NULL);
360 
361     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
362     CONTROL(capacities != NULL);
363 
364     char* const ptr = (char*)buffer.ptr;
365     size_t pos = 0;
366     for (size_t n=0; n < nbSlices; n++) {
367         capacities[n] = sc.capacities[n];
368         slices[n] = ptr + pos;
369         pos += capacities[n];
370     }
371 
372     for (size_t i = 0; i < nbSlices; i++) {
373         memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
374         capacities[i] = sc.capacities[i];
375     }
376 
377     buffer_collection_t result;
378     result.buffer = buffer;
379     result.slices.nbSlices = nbSlices;
380     result.slices.capacities = capacities;
381     result.slices.slicePtrs = slices;
382 
383     return result;
384 }
385 
386 /* @return : kBuffNull if any error */
387 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)388 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
389 {
390     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
391     assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
392     assert(totalSizeToLoad <= BENCH_SIZE_MAX);
393     size_t const loadedSize = (size_t)totalSizeToLoad;
394     assert(loadedSize > 0);
395     void* const srcBuffer = malloc(loadedSize);
396     assert(srcBuffer != NULL);
397 
398     assert(nbFiles > 0);
399     size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
400     assert(fileSizes != NULL);
401 
402     /* Load input buffer */
403     int const errorCode = loadFiles(srcBuffer, loadedSize,
404                                     fileSizes,
405                                     fileNamesTable, nbFiles);
406     assert(errorCode == 0);
407 
408     void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
409     assert(sliceTable != NULL);
410 
411     char* const ptr = (char*)srcBuffer;
412     size_t pos = 0;
413     unsigned fileNb = 0;
414     for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
415         sliceTable[fileNb] = ptr + pos;
416         pos += fileSizes[fileNb];
417     }
418     assert(pos == loadedSize);
419     assert(fileNb == nbFiles);
420 
421 
422     buffer_t buffer;
423     buffer.ptr = srcBuffer;
424     buffer.capacity = loadedSize;
425     buffer.size = loadedSize;
426 
427     slice_collection_t slices;
428     slices.slicePtrs = sliceTable;
429     slices.capacities = fileSizes;
430     slices.nbSlices = nbFiles;
431 
432     buffer_collection_t bc;
433     bc.buffer = buffer;
434     bc.slices = slices;
435     return bc;
436 }
437 
438 
439 
440 
441 /*---  ddict_collection_t  ---*/
442 
443 typedef struct {
444     ZSTD_DDict** ddicts;
445     size_t nbDDict;
446 } ddict_collection_t;
447 
448 typedef struct {
449     ZSTD_CDict** cdicts;
450     size_t nbCDict;
451 } cdict_collection_t;
452 
453 static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
454 
freeCDictCollection(cdict_collection_t cdictc)455 static void freeCDictCollection(cdict_collection_t cdictc)
456 {
457     for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
458         ZSTD_freeCDict(cdictc.cdicts[dictNb]);
459     }
460     free(cdictc.cdicts);
461 }
462 
463 /* returns .buffers=NULL if operation fails */
createCDictCollection(const void * dictBuffer,size_t dictSize,size_t nbCDict,int cLevel,int dedicatedDictSearch)464 static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch)
465 {
466     ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
467     if (cdicts==NULL) return kNullCDictCollection;
468     for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
469         cdicts[dictNb] = dedicatedDictSearch ?
470             createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) :
471             ZSTD_createCDict(dictBuffer, dictSize, cLevel);
472         CONTROL(cdicts[dictNb] != NULL);
473     }
474     cdict_collection_t cdictc;
475     cdictc.cdicts = cdicts;
476     cdictc.nbCDict = nbCDict;
477     return cdictc;
478 }
479 
480 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
481 
freeDDictCollection(ddict_collection_t ddictc)482 static void freeDDictCollection(ddict_collection_t ddictc)
483 {
484     for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
485         ZSTD_freeDDict(ddictc.ddicts[dictNb]);
486     }
487     free(ddictc.ddicts);
488 }
489 
490 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)491 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
492 {
493     ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
494     assert(ddicts != NULL);
495     if (ddicts==NULL) return kNullDDictCollection;
496     for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
497         ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
498         assert(ddicts[dictNb] != NULL);
499     }
500     ddict_collection_t ddictc;
501     ddictc.ddicts = ddicts;
502     ddictc.nbDDict = nbDDict;
503     return ddictc;
504 }
505 
506 
507 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleCDictionaries(cdict_collection_t dicts)508 void shuffleCDictionaries(cdict_collection_t dicts)
509 {
510     size_t const nbDicts = dicts.nbCDict;
511     for (size_t r=0; r<nbDicts; r++) {
512         size_t const d = (size_t)rand() % nbDicts;
513         ZSTD_CDict* tmpd = dicts.cdicts[d];
514         dicts.cdicts[d] = dicts.cdicts[r];
515         dicts.cdicts[r] = tmpd;
516     }
517     for (size_t r=0; r<nbDicts; r++) {
518         size_t const d1 = (size_t)rand() % nbDicts;
519         size_t const d2 = (size_t)rand() % nbDicts;
520         ZSTD_CDict* tmpd = dicts.cdicts[d1];
521         dicts.cdicts[d1] = dicts.cdicts[d2];
522         dicts.cdicts[d2] = tmpd;
523     }
524 }
525 
526 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleDDictionaries(ddict_collection_t dicts)527 void shuffleDDictionaries(ddict_collection_t dicts)
528 {
529     size_t const nbDicts = dicts.nbDDict;
530     for (size_t r=0; r<nbDicts; r++) {
531         size_t const d = (size_t)rand() % nbDicts;
532         ZSTD_DDict* tmpd = dicts.ddicts[d];
533         dicts.ddicts[d] = dicts.ddicts[r];
534         dicts.ddicts[r] = tmpd;
535     }
536     for (size_t r=0; r<nbDicts; r++) {
537         size_t const d1 = (size_t)rand() % nbDicts;
538         size_t const d2 = (size_t)rand() % nbDicts;
539         ZSTD_DDict* tmpd = dicts.ddicts[d1];
540         dicts.ddicts[d1] = dicts.ddicts[d2];
541         dicts.ddicts[d2] = tmpd;
542     }
543 }
544 
545 
546 /* ---   Compression  --- */
547 
548 /* compressBlocks() :
549  * @return : total compressed size of all blocks,
550  *        or 0 if error.
551  */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)552 static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
553                              slice_collection_t dstBlockBuffers,
554                              slice_collection_t srcBlockBuffers,
555                              ZSTD_CDict* cdict, int cLevel)
556 {
557     size_t const nbBlocks = srcBlockBuffers.nbSlices;
558     assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
559 
560     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
561     assert(cctx != NULL);
562 
563     size_t totalCSize = 0;
564     for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
565         size_t cBlockSize;
566         if (cdict == NULL) {
567             cBlockSize = ZSTD_compressCCtx(cctx,
568                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
569                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
570                             cLevel);
571         } else {
572             cBlockSize = ZSTD_compress_usingCDict(cctx,
573                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
574                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
575                             cdict);
576         }
577         CONTROL(!ZSTD_isError(cBlockSize));
578         if (cSizes) cSizes[blockNb] = cBlockSize;
579         totalCSize += cBlockSize;
580     }
581     return totalCSize;
582 }
583 
584 
585 /* ---  Benchmark  --- */
586 
587 typedef struct {
588     ZSTD_CCtx* cctx;
589     size_t nbDicts;
590     size_t dictNb;
591     cdict_collection_t dictionaries;
592 } compressInstructions;
593 
createCompressInstructions(cdict_collection_t dictionaries)594 compressInstructions createCompressInstructions(cdict_collection_t dictionaries)
595 {
596     compressInstructions ci;
597     ci.cctx = ZSTD_createCCtx();
598     CONTROL(ci.cctx != NULL);
599     ci.nbDicts = dictionaries.nbCDict;
600     ci.dictNb = 0;
601     ci.dictionaries = dictionaries;
602     return ci;
603 }
604 
freeCompressInstructions(compressInstructions ci)605 void freeCompressInstructions(compressInstructions ci)
606 {
607     ZSTD_freeCCtx(ci.cctx);
608 }
609 
610 typedef struct {
611     ZSTD_DCtx* dctx;
612     size_t nbDicts;
613     size_t dictNb;
614     ddict_collection_t dictionaries;
615 } decompressInstructions;
616 
createDecompressInstructions(ddict_collection_t dictionaries)617 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
618 {
619     decompressInstructions di;
620     di.dctx = ZSTD_createDCtx();
621     assert(di.dctx != NULL);
622     di.nbDicts = dictionaries.nbDDict;
623     di.dictNb = 0;
624     di.dictionaries = dictionaries;
625     return di;
626 }
627 
freeDecompressInstructions(decompressInstructions di)628 void freeDecompressInstructions(decompressInstructions di)
629 {
630     ZSTD_freeDCtx(di.dctx);
631 }
632 
633 /* benched function */
compress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)634 size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
635 {
636     compressInstructions* const ci = (compressInstructions*) payload;
637     (void)dstCapacity;
638 
639     ZSTD_compress_usingCDict(ci->cctx,
640                     dst, srcSize,
641                     src, srcSize,
642                     ci->dictionaries.cdicts[ci->dictNb]);
643 
644     ci->dictNb = ci->dictNb + 1;
645     if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
646 
647     return srcSize;
648 }
649 
650 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)651 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
652 {
653     decompressInstructions* const di = (decompressInstructions*) payload;
654 
655     size_t const result = ZSTD_decompress_usingDDict(di->dctx,
656                                         dst, dstCapacity,
657                                         src, srcSize,
658                                         di->dictionaries.ddicts[di->dictNb]);
659 
660     di->dictNb = di->dictNb + 1;
661     if (di->dictNb >= di->nbDicts) di->dictNb = 0;
662 
663     return result;
664 }
665 
666 
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t ddictionaries,cdict_collection_t cdictionaries,unsigned nbRounds,int benchCompression)667 static int benchMem(slice_collection_t dstBlocks,
668                     slice_collection_t srcBlocks,
669                     ddict_collection_t ddictionaries,
670                     cdict_collection_t cdictionaries,
671                     unsigned nbRounds, int benchCompression)
672 {
673     assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
674 
675     unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
676     unsigned const total_time_ms = nbRounds * ms_per_round;
677 
678     double bestSpeed = 0.;
679 
680     BMK_timedFnState_t* const benchState =
681             BMK_createTimedFnState(total_time_ms, ms_per_round);
682 
683     decompressInstructions di = createDecompressInstructions(ddictionaries);
684     compressInstructions ci = createCompressInstructions(cdictionaries);
685     void* payload = benchCompression ? (void*)&ci : (void*)&di;
686     BMK_benchParams_t const bp = {
687         .benchFn = benchCompression ? compress : decompress,
688         .benchPayload = payload,
689         .initFn = NULL,
690         .initPayload = NULL,
691         .errorFn = ZSTD_isError,
692         .blockCount = dstBlocks.nbSlices,
693         .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
694         .srcSizes = srcBlocks.capacities,
695         .dstBuffers = dstBlocks.slicePtrs,
696         .dstCapacities = dstBlocks.capacities,
697         .blockResults = NULL
698     };
699 
700     for (;;) {
701         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
702         CONTROL(BMK_isSuccessful_runOutcome(outcome));
703 
704         BMK_runTime_t const result = BMK_extract_runTime(outcome);
705         double const dTime_ns = result.nanoSecPerRun;
706         double const dTime_sec = (double)dTime_ns / 1000000000;
707         size_t const srcSize = result.sumOfReturn;
708         double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
709         if (speed_MBps > bestSpeed) bestSpeed = speed_MBps;
710         if (benchCompression)
711             DISPLAY("Compression Speed : %.1f MB/s \r", bestSpeed);
712         else
713             DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
714 
715         fflush(stdout);
716         if (BMK_isCompleted_TimedFn(benchState)) break;
717     }
718     DISPLAY("\n");
719 
720     freeDecompressInstructions(di);
721     freeCompressInstructions(ci);
722     BMK_freeTimedFnState(benchState);
723 
724     return 0;   /* success */
725 }
726 
727 
728 /*! bench() :
729  *  fileName : file to load for benchmarking purpose
730  *  dictionary : optional (can be NULL), file to load as dictionary,
731  *              if none provided : will be calculated on the fly by the program.
732  * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,unsigned nbRounds,int benchCompression,int dedicatedDictSearch)733 int bench(const char** fileNameTable, unsigned nbFiles,
734           const char* dictionary,
735           size_t blockSize, int clevel,
736           unsigned nbDictMax, unsigned nbBlocks,
737           unsigned nbRounds, int benchCompression,
738           int dedicatedDictSearch)
739 {
740     int result = 0;
741 
742     DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
743     buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
744     CONTROL(srcs.buffer.ptr != NULL);
745     buffer_t srcBuffer = srcs.buffer;
746     size_t const srcSize = srcBuffer.size;
747     DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
748                     (double)srcSize / (1 MB));
749 
750     slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
751     nbBlocks = (unsigned)(srcSlices.nbSlices);
752     DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
753     if (blockSize)
754         DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
755     DISPLAYLEVEL(3, "\n");
756     size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
757 
758 
759     size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
760     CONTROL(dstCapacities != NULL);
761     size_t dstBufferCapacity = 0;
762     for (size_t bnb=0; bnb<nbBlocks; bnb++) {
763         dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
764         dstBufferCapacity += dstCapacities[bnb];
765     }
766 
767     buffer_t dstBuffer = createBuffer(dstBufferCapacity);
768     CONTROL(dstBuffer.ptr != NULL);
769 
770     void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
771     CONTROL(sliceTable != NULL);
772 
773     {   char* const ptr = dstBuffer.ptr;
774         size_t pos = 0;
775         for (size_t snb=0; snb < nbBlocks; snb++) {
776             sliceTable[snb] = ptr + pos;
777             pos += dstCapacities[snb];
778     }   }
779 
780     slice_collection_t dstSlices;
781     dstSlices.capacities = dstCapacities;
782     dstSlices.slicePtrs = sliceTable;
783     dstSlices.nbSlices = nbBlocks;
784 
785 
786     /* dictionary determination */
787     buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
788                                 srcs.buffer.ptr,
789                                 srcs.slices.capacities, srcs.slices.nbSlices,
790                                 DICTSIZE);
791     CONTROL(dictBuffer.ptr != NULL);
792 
793     ZSTD_CDict* const cdict = dedicatedDictSearch ?
794         createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) :
795         ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
796     CONTROL(cdict != NULL);
797 
798     size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
799     CONTROL(cTotalSizeNoDict != 0);
800     DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
801                     clevel,
802                     (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
803 
804     size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
805     CONTROL(cSizes != NULL);
806 
807     size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
808     CONTROL(cTotalSize != 0);
809     DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
810                     (unsigned)dictBuffer.size,
811                     (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
812 
813     /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
814     shrinkSizes(dstSlices, cSizes);
815 
816     unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
817 
818     cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch);
819     CONTROL(cdictionaries.cdicts != NULL);
820 
821     ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
822     CONTROL(ddictionaries.ddicts != NULL);
823 
824     if (benchCompression) {
825         size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
826         size_t const allDictMem = dictMem * nbDicts;
827         DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
828                         nbDicts, (double)allDictMem / (1 MB));
829 
830         shuffleCDictionaries(cdictionaries);
831 
832         buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
833         CONTROL(resultCollection.buffer.ptr != NULL);
834 
835         result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression);
836 
837         freeBufferCollection(resultCollection);
838     } else {
839         size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
840         size_t const allDictMem = dictMem * nbDicts;
841         DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
842                         nbDicts, (double)allDictMem / (1 MB));
843 
844         shuffleDDictionaries(ddictionaries);
845 
846         buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
847         CONTROL(resultCollection.buffer.ptr != NULL);
848 
849         result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression);
850 
851         freeBufferCollection(resultCollection);
852     }
853 
854     /* free all heap objects in reverse order */
855     freeCDictCollection(cdictionaries);
856     freeDDictCollection(ddictionaries);
857     free(cSizes);
858     ZSTD_freeCDict(cdict);
859     freeBuffer(dictBuffer);
860     freeSliceCollection(dstSlices);
861     freeBuffer(dstBuffer);
862     freeSliceCollection(srcSlices);
863     freeBufferCollection(srcs);
864 
865     return result;
866 }
867 
868 
869 
870 /* ---  Command Line  --- */
871 
872 /*! readU32FromChar() :
873  * @return : unsigned integer value read from input in `char` format.
874  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
875  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
876  *  Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)877 static unsigned readU32FromChar(const char** stringPtr)
878 {
879     unsigned result = 0;
880     while ((**stringPtr >='0') && (**stringPtr <='9')) {
881         unsigned const max = (((unsigned)(-1)) / 10) - 1;
882         assert(result <= max);   /* check overflow */
883         result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
884     }
885     if ((**stringPtr=='K') || (**stringPtr=='M')) {
886         unsigned const maxK = ((unsigned)(-1)) >> 10;
887         assert(result <= maxK);   /* check overflow */
888         result <<= 10;
889         if (**stringPtr=='M') {
890             assert(result <= maxK);   /* check overflow */
891             result <<= 10;
892         }
893         (*stringPtr)++;  /* skip `K` or `M` */
894         if (**stringPtr=='i') (*stringPtr)++;
895         if (**stringPtr=='B') (*stringPtr)++;
896     }
897     return result;
898 }
899 
900 /** longCommandWArg() :
901  *  check if *stringPtr is the same as longCommand.
902  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
903  * @return 0 and doesn't modify *stringPtr otherwise.
904  */
longCommandWArg(const char ** stringPtr,const char * longCommand)905 static int longCommandWArg(const char** stringPtr, const char* longCommand)
906 {
907     size_t const comSize = strlen(longCommand);
908     int const result = !strncmp(*stringPtr, longCommand, comSize);
909     if (result) *stringPtr += comSize;
910     return result;
911 }
912 
913 
usage(const char * exeName)914 int usage(const char* exeName)
915 {
916     DISPLAY (" \n");
917     DISPLAY (" %s [Options] filename(s) \n", exeName);
918     DISPLAY (" \n");
919     DISPLAY ("Options : \n");
920     DISPLAY ("-z          : benchmark compression (default) \n");
921     DISPLAY ("-d          : benchmark decompression \n");
922     DISPLAY ("-r          : recursively load all files in subdirectories (default: off) \n");
923     DISPLAY ("-B#         : split input into blocks of size # (default: no split) \n");
924     DISPLAY ("-#          : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
925     DISPLAY ("-D #        : use # as a dictionary (default: create one) \n");
926     DISPLAY ("-i#         : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
927     DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
928     DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
929     DISPLAY ("-h          : help (this text) \n");
930     return 0;
931 }
932 
bad_usage(const char * exeName)933 int bad_usage(const char* exeName)
934 {
935     DISPLAY (" bad usage : \n");
936     usage(exeName);
937     return 1;
938 }
939 
main(int argc,const char ** argv)940 int main (int argc, const char** argv)
941 {
942     int recursiveMode = 0;
943     int benchCompression = 1;
944     int dedicatedDictSearch = 0;
945     unsigned nbRounds = BENCH_TIME_DEFAULT_S;
946     const char* const exeName = argv[0];
947 
948     if (argc < 2) return bad_usage(exeName);
949 
950     const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
951     assert(nameTable != NULL);
952     unsigned nameIdx = 0;
953 
954     const char* dictionary = NULL;
955     int cLevel = CLEVEL_DEFAULT;
956     size_t blockSize = BLOCKSIZE_DEFAULT;
957     unsigned nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
958     unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
959 
960     for (int argNb = 1; argNb < argc ; argNb++) {
961         const char* argument = argv[argNb];
962         if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
963         if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
964         if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
965         if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
966         if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
967         if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
968         if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
969         if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
970         if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
971         if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
972         if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
973         if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
974         if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
975         if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
976         /* anything that's not a command is a filename */
977         nameTable[nameIdx++] = argument;
978     }
979 
980     FileNamesTable* filenameTable;
981 
982     if (recursiveMode) {
983 #ifndef UTIL_HAS_CREATEFILELIST
984         assert(0);   /* missing capability, do not run */
985 #endif
986         filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
987     } else {
988         filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
989         nameTable = NULL;  /* UTIL_createFileNamesTable() takes ownership of nameTable */
990     }
991 
992     int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch);
993 
994     UTIL_freeFileNamesTable(filenameTable);
995     free(nameTable);
996 
997     return result;
998 }
999