1 /*
2  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /*-*************************************
12 *  Dependencies
13 ***************************************/
14 #include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15 #include "../common/cpu.h"
16 #include "../common/mem.h"
17 #include "hist.h"           /* HIST_countFast_wksp */
18 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
19 #include "../common/fse.h"
20 #define HUF_STATIC_LINKING_ONLY
21 #include "../common/huf.h"
22 #include "zstd_compress_internal.h"
23 #include "zstd_compress_sequences.h"
24 #include "zstd_compress_literals.h"
25 #include "zstd_fast.h"
26 #include "zstd_double_fast.h"
27 #include "zstd_lazy.h"
28 #include "zstd_opt.h"
29 #include "zstd_ldm.h"
30 #include "zstd_compress_superblock.h"
31 
32 /* ***************************************************************
33 *  Tuning parameters
34 *****************************************************************/
35 /*!
36  * COMPRESS_HEAPMODE :
37  * Select how default decompression function ZSTD_compress() allocates its context,
38  * on stack (0, default), or into heap (1).
39  * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
40  */
41 #ifndef ZSTD_COMPRESS_HEAPMODE
42 #  define ZSTD_COMPRESS_HEAPMODE 0
43 #endif
44 
45 
46 /*-*************************************
47 *  Helper functions
48 ***************************************/
49 /* ZSTD_compressBound()
50  * Note that the result from this function is only compatible with the "normal"
51  * full-block strategy.
52  * When there are a lot of small blocks due to frequent flush in streaming mode
53  * the overhead of headers can make the compressed data to be larger than the
54  * return value of ZSTD_compressBound().
55  */
ZSTD_compressBound(size_t srcSize)56 size_t ZSTD_compressBound(size_t srcSize) {
57     return ZSTD_COMPRESSBOUND(srcSize);
58 }
59 
60 
61 /*-*************************************
62 *  Context memory management
63 ***************************************/
64 struct ZSTD_CDict_s {
65     const void* dictContent;
66     size_t dictContentSize;
67     ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
68     U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
69     ZSTD_cwksp workspace;
70     ZSTD_matchState_t matchState;
71     ZSTD_compressedBlockState_t cBlockState;
72     ZSTD_customMem customMem;
73     U32 dictID;
74     int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
75 };  /* typedef'd to ZSTD_CDict within "zstd.h" */
76 
ZSTD_createCCtx(void)77 ZSTD_CCtx* ZSTD_createCCtx(void)
78 {
79     return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
80 }
81 
ZSTD_initCCtx(ZSTD_CCtx * cctx,ZSTD_customMem memManager)82 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
83 {
84     assert(cctx != NULL);
85     ZSTD_memset(cctx, 0, sizeof(*cctx));
86     cctx->customMem = memManager;
87     cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
88     {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
89         assert(!ZSTD_isError(err));
90         (void)err;
91     }
92 }
93 
ZSTD_createCCtx_advanced(ZSTD_customMem customMem)94 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
95 {
96     ZSTD_STATIC_ASSERT(zcss_init==0);
97     ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
98     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
99     {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
100         if (!cctx) return NULL;
101         ZSTD_initCCtx(cctx, customMem);
102         return cctx;
103     }
104 }
105 
ZSTD_initStaticCCtx(void * workspace,size_t workspaceSize)106 ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
107 {
108     ZSTD_cwksp ws;
109     ZSTD_CCtx* cctx;
110     if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
111     if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
112     ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
113 
114     cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
115     if (cctx == NULL) return NULL;
116 
117     ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
118     ZSTD_cwksp_move(&cctx->workspace, &ws);
119     cctx->staticSize = workspaceSize;
120 
121     /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
122     if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
123     cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
124     cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
125     cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
126     cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
127     return cctx;
128 }
129 
130 /**
131  * Clears and frees all of the dictionaries in the CCtx.
132  */
ZSTD_clearAllDicts(ZSTD_CCtx * cctx)133 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
134 {
135     ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
136     ZSTD_freeCDict(cctx->localDict.cdict);
137     ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
138     ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
139     cctx->cdict = NULL;
140 }
141 
ZSTD_sizeof_localDict(ZSTD_localDict dict)142 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
143 {
144     size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
145     size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
146     return bufferSize + cdictSize;
147 }
148 
ZSTD_freeCCtxContent(ZSTD_CCtx * cctx)149 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
150 {
151     assert(cctx != NULL);
152     assert(cctx->staticSize == 0);
153     ZSTD_clearAllDicts(cctx);
154 #ifdef ZSTD_MULTITHREAD
155     ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
156 #endif
157     ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
158 }
159 
ZSTD_freeCCtx(ZSTD_CCtx * cctx)160 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
161 {
162     if (cctx==NULL) return 0;   /* support free on NULL */
163     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
164                     "not compatible with static CCtx");
165     {
166         int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
167         ZSTD_freeCCtxContent(cctx);
168         if (!cctxInWorkspace) {
169             ZSTD_customFree(cctx, cctx->customMem);
170         }
171     }
172     return 0;
173 }
174 
175 
ZSTD_sizeof_mtctx(const ZSTD_CCtx * cctx)176 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
177 {
178 #ifdef ZSTD_MULTITHREAD
179     return ZSTDMT_sizeof_CCtx(cctx->mtctx);
180 #else
181     (void)cctx;
182     return 0;
183 #endif
184 }
185 
186 
ZSTD_sizeof_CCtx(const ZSTD_CCtx * cctx)187 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
188 {
189     if (cctx==NULL) return 0;   /* support sizeof on NULL */
190     /* cctx may be in the workspace */
191     return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
192            + ZSTD_cwksp_sizeof(&cctx->workspace)
193            + ZSTD_sizeof_localDict(cctx->localDict)
194            + ZSTD_sizeof_mtctx(cctx);
195 }
196 
ZSTD_sizeof_CStream(const ZSTD_CStream * zcs)197 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
198 {
199     return ZSTD_sizeof_CCtx(zcs);  /* same object */
200 }
201 
202 /* private API call, for dictBuilder only */
ZSTD_getSeqStore(const ZSTD_CCtx * ctx)203 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
204 
205 /* Returns 1 if compression parameters are such that we should
206  * enable long distance matching (wlog >= 27, strategy >= btopt).
207  * Returns 0 otherwise.
208  */
ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters * const cParams)209 static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
210     return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
211 }
212 
ZSTD_makeCCtxParamsFromCParams(ZSTD_compressionParameters cParams)213 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
214         ZSTD_compressionParameters cParams)
215 {
216     ZSTD_CCtx_params cctxParams;
217     /* should not matter, as all cParams are presumed properly defined */
218     ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
219     cctxParams.cParams = cParams;
220 
221     if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
222         DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
223         cctxParams.ldmParams.enableLdm = 1;
224         /* LDM is enabled by default for optimal parser and window size >= 128MB */
225         ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
226         assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
227         assert(cctxParams.ldmParams.hashRateLog < 32);
228     }
229 
230     assert(!ZSTD_checkCParams(cParams));
231     return cctxParams;
232 }
233 
ZSTD_createCCtxParams_advanced(ZSTD_customMem customMem)234 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
235         ZSTD_customMem customMem)
236 {
237     ZSTD_CCtx_params* params;
238     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
239     params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
240             sizeof(ZSTD_CCtx_params), customMem);
241     if (!params) { return NULL; }
242     ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
243     params->customMem = customMem;
244     return params;
245 }
246 
ZSTD_createCCtxParams(void)247 ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
248 {
249     return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
250 }
251 
ZSTD_freeCCtxParams(ZSTD_CCtx_params * params)252 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
253 {
254     if (params == NULL) { return 0; }
255     ZSTD_customFree(params, params->customMem);
256     return 0;
257 }
258 
ZSTD_CCtxParams_reset(ZSTD_CCtx_params * params)259 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
260 {
261     return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
262 }
263 
ZSTD_CCtxParams_init(ZSTD_CCtx_params * cctxParams,int compressionLevel)264 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
265     RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
266     ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
267     cctxParams->compressionLevel = compressionLevel;
268     cctxParams->fParams.contentSizeFlag = 1;
269     return 0;
270 }
271 
ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params * cctxParams,ZSTD_parameters params)272 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
273 {
274     RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
275     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
276     ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
277     assert(!ZSTD_checkCParams(params.cParams));
278     cctxParams->cParams = params.cParams;
279     cctxParams->fParams = params.fParams;
280     cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
281     return 0;
282 }
283 
284 /* ZSTD_assignParamsToCCtxParams() :
285  * params is presumed valid at this stage */
ZSTD_assignParamsToCCtxParams(const ZSTD_CCtx_params * cctxParams,const ZSTD_parameters * params)286 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
287         const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
288 {
289     ZSTD_CCtx_params ret = *cctxParams;
290     assert(!ZSTD_checkCParams(params->cParams));
291     ret.cParams = params->cParams;
292     ret.fParams = params->fParams;
293     ret.compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
294     return ret;
295 }
296 
ZSTD_cParam_getBounds(ZSTD_cParameter param)297 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
298 {
299     ZSTD_bounds bounds = { 0, 0, 0 };
300 
301     switch(param)
302     {
303     case ZSTD_c_compressionLevel:
304         bounds.lowerBound = ZSTD_minCLevel();
305         bounds.upperBound = ZSTD_maxCLevel();
306         return bounds;
307 
308     case ZSTD_c_windowLog:
309         bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
310         bounds.upperBound = ZSTD_WINDOWLOG_MAX;
311         return bounds;
312 
313     case ZSTD_c_hashLog:
314         bounds.lowerBound = ZSTD_HASHLOG_MIN;
315         bounds.upperBound = ZSTD_HASHLOG_MAX;
316         return bounds;
317 
318     case ZSTD_c_chainLog:
319         bounds.lowerBound = ZSTD_CHAINLOG_MIN;
320         bounds.upperBound = ZSTD_CHAINLOG_MAX;
321         return bounds;
322 
323     case ZSTD_c_searchLog:
324         bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
325         bounds.upperBound = ZSTD_SEARCHLOG_MAX;
326         return bounds;
327 
328     case ZSTD_c_minMatch:
329         bounds.lowerBound = ZSTD_MINMATCH_MIN;
330         bounds.upperBound = ZSTD_MINMATCH_MAX;
331         return bounds;
332 
333     case ZSTD_c_targetLength:
334         bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
335         bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
336         return bounds;
337 
338     case ZSTD_c_strategy:
339         bounds.lowerBound = ZSTD_STRATEGY_MIN;
340         bounds.upperBound = ZSTD_STRATEGY_MAX;
341         return bounds;
342 
343     case ZSTD_c_contentSizeFlag:
344         bounds.lowerBound = 0;
345         bounds.upperBound = 1;
346         return bounds;
347 
348     case ZSTD_c_checksumFlag:
349         bounds.lowerBound = 0;
350         bounds.upperBound = 1;
351         return bounds;
352 
353     case ZSTD_c_dictIDFlag:
354         bounds.lowerBound = 0;
355         bounds.upperBound = 1;
356         return bounds;
357 
358     case ZSTD_c_nbWorkers:
359         bounds.lowerBound = 0;
360 #ifdef ZSTD_MULTITHREAD
361         bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
362 #else
363         bounds.upperBound = 0;
364 #endif
365         return bounds;
366 
367     case ZSTD_c_jobSize:
368         bounds.lowerBound = 0;
369 #ifdef ZSTD_MULTITHREAD
370         bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
371 #else
372         bounds.upperBound = 0;
373 #endif
374         return bounds;
375 
376     case ZSTD_c_overlapLog:
377 #ifdef ZSTD_MULTITHREAD
378         bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
379         bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
380 #else
381         bounds.lowerBound = 0;
382         bounds.upperBound = 0;
383 #endif
384         return bounds;
385 
386     case ZSTD_c_enableDedicatedDictSearch:
387         bounds.lowerBound = 0;
388         bounds.upperBound = 1;
389         return bounds;
390 
391     case ZSTD_c_enableLongDistanceMatching:
392         bounds.lowerBound = 0;
393         bounds.upperBound = 1;
394         return bounds;
395 
396     case ZSTD_c_ldmHashLog:
397         bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
398         bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
399         return bounds;
400 
401     case ZSTD_c_ldmMinMatch:
402         bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
403         bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
404         return bounds;
405 
406     case ZSTD_c_ldmBucketSizeLog:
407         bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
408         bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
409         return bounds;
410 
411     case ZSTD_c_ldmHashRateLog:
412         bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
413         bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
414         return bounds;
415 
416     /* experimental parameters */
417     case ZSTD_c_rsyncable:
418         bounds.lowerBound = 0;
419         bounds.upperBound = 1;
420         return bounds;
421 
422     case ZSTD_c_forceMaxWindow :
423         bounds.lowerBound = 0;
424         bounds.upperBound = 1;
425         return bounds;
426 
427     case ZSTD_c_format:
428         ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
429         bounds.lowerBound = ZSTD_f_zstd1;
430         bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
431         return bounds;
432 
433     case ZSTD_c_forceAttachDict:
434         ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
435         bounds.lowerBound = ZSTD_dictDefaultAttach;
436         bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
437         return bounds;
438 
439     case ZSTD_c_literalCompressionMode:
440         ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
441         bounds.lowerBound = ZSTD_lcm_auto;
442         bounds.upperBound = ZSTD_lcm_uncompressed;
443         return bounds;
444 
445     case ZSTD_c_targetCBlockSize:
446         bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
447         bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
448         return bounds;
449 
450     case ZSTD_c_srcSizeHint:
451         bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
452         bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
453         return bounds;
454 
455     case ZSTD_c_stableInBuffer:
456     case ZSTD_c_stableOutBuffer:
457         bounds.lowerBound = (int)ZSTD_bm_buffered;
458         bounds.upperBound = (int)ZSTD_bm_stable;
459         return bounds;
460 
461     case ZSTD_c_blockDelimiters:
462         bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
463         bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
464         return bounds;
465 
466     case ZSTD_c_validateSequences:
467         bounds.lowerBound = 0;
468         bounds.upperBound = 1;
469         return bounds;
470 
471     default:
472         bounds.error = ERROR(parameter_unsupported);
473         return bounds;
474     }
475 }
476 
477 /* ZSTD_cParam_clampBounds:
478  * Clamps the value into the bounded range.
479  */
ZSTD_cParam_clampBounds(ZSTD_cParameter cParam,int * value)480 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
481 {
482     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
483     if (ZSTD_isError(bounds.error)) return bounds.error;
484     if (*value < bounds.lowerBound) *value = bounds.lowerBound;
485     if (*value > bounds.upperBound) *value = bounds.upperBound;
486     return 0;
487 }
488 
489 #define BOUNDCHECK(cParam, val) { \
490     RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
491                     parameter_outOfBound, "Param out of bounds"); \
492 }
493 
494 
ZSTD_isUpdateAuthorized(ZSTD_cParameter param)495 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
496 {
497     switch(param)
498     {
499     case ZSTD_c_compressionLevel:
500     case ZSTD_c_hashLog:
501     case ZSTD_c_chainLog:
502     case ZSTD_c_searchLog:
503     case ZSTD_c_minMatch:
504     case ZSTD_c_targetLength:
505     case ZSTD_c_strategy:
506         return 1;
507 
508     case ZSTD_c_format:
509     case ZSTD_c_windowLog:
510     case ZSTD_c_contentSizeFlag:
511     case ZSTD_c_checksumFlag:
512     case ZSTD_c_dictIDFlag:
513     case ZSTD_c_forceMaxWindow :
514     case ZSTD_c_nbWorkers:
515     case ZSTD_c_jobSize:
516     case ZSTD_c_overlapLog:
517     case ZSTD_c_rsyncable:
518     case ZSTD_c_enableDedicatedDictSearch:
519     case ZSTD_c_enableLongDistanceMatching:
520     case ZSTD_c_ldmHashLog:
521     case ZSTD_c_ldmMinMatch:
522     case ZSTD_c_ldmBucketSizeLog:
523     case ZSTD_c_ldmHashRateLog:
524     case ZSTD_c_forceAttachDict:
525     case ZSTD_c_literalCompressionMode:
526     case ZSTD_c_targetCBlockSize:
527     case ZSTD_c_srcSizeHint:
528     case ZSTD_c_stableInBuffer:
529     case ZSTD_c_stableOutBuffer:
530     case ZSTD_c_blockDelimiters:
531     case ZSTD_c_validateSequences:
532     default:
533         return 0;
534     }
535 }
536 
ZSTD_CCtx_setParameter(ZSTD_CCtx * cctx,ZSTD_cParameter param,int value)537 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
538 {
539     DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
540     if (cctx->streamStage != zcss_init) {
541         if (ZSTD_isUpdateAuthorized(param)) {
542             cctx->cParamsChanged = 1;
543         } else {
544             RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
545     }   }
546 
547     switch(param)
548     {
549     case ZSTD_c_nbWorkers:
550         RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
551                         "MT not compatible with static alloc");
552         break;
553 
554     case ZSTD_c_compressionLevel:
555     case ZSTD_c_windowLog:
556     case ZSTD_c_hashLog:
557     case ZSTD_c_chainLog:
558     case ZSTD_c_searchLog:
559     case ZSTD_c_minMatch:
560     case ZSTD_c_targetLength:
561     case ZSTD_c_strategy:
562     case ZSTD_c_ldmHashRateLog:
563     case ZSTD_c_format:
564     case ZSTD_c_contentSizeFlag:
565     case ZSTD_c_checksumFlag:
566     case ZSTD_c_dictIDFlag:
567     case ZSTD_c_forceMaxWindow:
568     case ZSTD_c_forceAttachDict:
569     case ZSTD_c_literalCompressionMode:
570     case ZSTD_c_jobSize:
571     case ZSTD_c_overlapLog:
572     case ZSTD_c_rsyncable:
573     case ZSTD_c_enableDedicatedDictSearch:
574     case ZSTD_c_enableLongDistanceMatching:
575     case ZSTD_c_ldmHashLog:
576     case ZSTD_c_ldmMinMatch:
577     case ZSTD_c_ldmBucketSizeLog:
578     case ZSTD_c_targetCBlockSize:
579     case ZSTD_c_srcSizeHint:
580     case ZSTD_c_stableInBuffer:
581     case ZSTD_c_stableOutBuffer:
582     case ZSTD_c_blockDelimiters:
583     case ZSTD_c_validateSequences:
584         break;
585 
586     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
587     }
588     return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
589 }
590 
ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params * CCtxParams,ZSTD_cParameter param,int value)591 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
592                                     ZSTD_cParameter param, int value)
593 {
594     DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
595     switch(param)
596     {
597     case ZSTD_c_format :
598         BOUNDCHECK(ZSTD_c_format, value);
599         CCtxParams->format = (ZSTD_format_e)value;
600         return (size_t)CCtxParams->format;
601 
602     case ZSTD_c_compressionLevel : {
603         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
604         if (value == 0)
605             CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
606         else
607             CCtxParams->compressionLevel = value;
608         if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
609         return 0;  /* return type (size_t) cannot represent negative values */
610     }
611 
612     case ZSTD_c_windowLog :
613         if (value!=0)   /* 0 => use default */
614             BOUNDCHECK(ZSTD_c_windowLog, value);
615         CCtxParams->cParams.windowLog = (U32)value;
616         return CCtxParams->cParams.windowLog;
617 
618     case ZSTD_c_hashLog :
619         if (value!=0)   /* 0 => use default */
620             BOUNDCHECK(ZSTD_c_hashLog, value);
621         CCtxParams->cParams.hashLog = (U32)value;
622         return CCtxParams->cParams.hashLog;
623 
624     case ZSTD_c_chainLog :
625         if (value!=0)   /* 0 => use default */
626             BOUNDCHECK(ZSTD_c_chainLog, value);
627         CCtxParams->cParams.chainLog = (U32)value;
628         return CCtxParams->cParams.chainLog;
629 
630     case ZSTD_c_searchLog :
631         if (value!=0)   /* 0 => use default */
632             BOUNDCHECK(ZSTD_c_searchLog, value);
633         CCtxParams->cParams.searchLog = (U32)value;
634         return (size_t)value;
635 
636     case ZSTD_c_minMatch :
637         if (value!=0)   /* 0 => use default */
638             BOUNDCHECK(ZSTD_c_minMatch, value);
639         CCtxParams->cParams.minMatch = value;
640         return CCtxParams->cParams.minMatch;
641 
642     case ZSTD_c_targetLength :
643         BOUNDCHECK(ZSTD_c_targetLength, value);
644         CCtxParams->cParams.targetLength = value;
645         return CCtxParams->cParams.targetLength;
646 
647     case ZSTD_c_strategy :
648         if (value!=0)   /* 0 => use default */
649             BOUNDCHECK(ZSTD_c_strategy, value);
650         CCtxParams->cParams.strategy = (ZSTD_strategy)value;
651         return (size_t)CCtxParams->cParams.strategy;
652 
653     case ZSTD_c_contentSizeFlag :
654         /* Content size written in frame header _when known_ (default:1) */
655         DEBUGLOG(4, "set content size flag = %u", (value!=0));
656         CCtxParams->fParams.contentSizeFlag = value != 0;
657         return CCtxParams->fParams.contentSizeFlag;
658 
659     case ZSTD_c_checksumFlag :
660         /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
661         CCtxParams->fParams.checksumFlag = value != 0;
662         return CCtxParams->fParams.checksumFlag;
663 
664     case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
665         DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
666         CCtxParams->fParams.noDictIDFlag = !value;
667         return !CCtxParams->fParams.noDictIDFlag;
668 
669     case ZSTD_c_forceMaxWindow :
670         CCtxParams->forceWindow = (value != 0);
671         return CCtxParams->forceWindow;
672 
673     case ZSTD_c_forceAttachDict : {
674         const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
675         BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
676         CCtxParams->attachDictPref = pref;
677         return CCtxParams->attachDictPref;
678     }
679 
680     case ZSTD_c_literalCompressionMode : {
681         const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
682         BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
683         CCtxParams->literalCompressionMode = lcm;
684         return CCtxParams->literalCompressionMode;
685     }
686 
687     case ZSTD_c_nbWorkers :
688 #ifndef ZSTD_MULTITHREAD
689         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
690         return 0;
691 #else
692         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
693         CCtxParams->nbWorkers = value;
694         return CCtxParams->nbWorkers;
695 #endif
696 
697     case ZSTD_c_jobSize :
698 #ifndef ZSTD_MULTITHREAD
699         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
700         return 0;
701 #else
702         /* Adjust to the minimum non-default value. */
703         if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
704             value = ZSTDMT_JOBSIZE_MIN;
705         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
706         assert(value >= 0);
707         CCtxParams->jobSize = value;
708         return CCtxParams->jobSize;
709 #endif
710 
711     case ZSTD_c_overlapLog :
712 #ifndef ZSTD_MULTITHREAD
713         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
714         return 0;
715 #else
716         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
717         CCtxParams->overlapLog = value;
718         return CCtxParams->overlapLog;
719 #endif
720 
721     case ZSTD_c_rsyncable :
722 #ifndef ZSTD_MULTITHREAD
723         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
724         return 0;
725 #else
726         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
727         CCtxParams->rsyncable = value;
728         return CCtxParams->rsyncable;
729 #endif
730 
731     case ZSTD_c_enableDedicatedDictSearch :
732         CCtxParams->enableDedicatedDictSearch = (value!=0);
733         return CCtxParams->enableDedicatedDictSearch;
734 
735     case ZSTD_c_enableLongDistanceMatching :
736         CCtxParams->ldmParams.enableLdm = (value!=0);
737         return CCtxParams->ldmParams.enableLdm;
738 
739     case ZSTD_c_ldmHashLog :
740         if (value!=0)   /* 0 ==> auto */
741             BOUNDCHECK(ZSTD_c_ldmHashLog, value);
742         CCtxParams->ldmParams.hashLog = value;
743         return CCtxParams->ldmParams.hashLog;
744 
745     case ZSTD_c_ldmMinMatch :
746         if (value!=0)   /* 0 ==> default */
747             BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
748         CCtxParams->ldmParams.minMatchLength = value;
749         return CCtxParams->ldmParams.minMatchLength;
750 
751     case ZSTD_c_ldmBucketSizeLog :
752         if (value!=0)   /* 0 ==> default */
753             BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
754         CCtxParams->ldmParams.bucketSizeLog = value;
755         return CCtxParams->ldmParams.bucketSizeLog;
756 
757     case ZSTD_c_ldmHashRateLog :
758         RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
759                         parameter_outOfBound, "Param out of bounds!");
760         CCtxParams->ldmParams.hashRateLog = value;
761         return CCtxParams->ldmParams.hashRateLog;
762 
763     case ZSTD_c_targetCBlockSize :
764         if (value!=0)   /* 0 ==> default */
765             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
766         CCtxParams->targetCBlockSize = value;
767         return CCtxParams->targetCBlockSize;
768 
769     case ZSTD_c_srcSizeHint :
770         if (value!=0)    /* 0 ==> default */
771             BOUNDCHECK(ZSTD_c_srcSizeHint, value);
772         CCtxParams->srcSizeHint = value;
773         return CCtxParams->srcSizeHint;
774 
775     case ZSTD_c_stableInBuffer:
776         BOUNDCHECK(ZSTD_c_stableInBuffer, value);
777         CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
778         return CCtxParams->inBufferMode;
779 
780     case ZSTD_c_stableOutBuffer:
781         BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
782         CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
783         return CCtxParams->outBufferMode;
784 
785     case ZSTD_c_blockDelimiters:
786         BOUNDCHECK(ZSTD_c_blockDelimiters, value);
787         CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
788         return CCtxParams->blockDelimiters;
789 
790     case ZSTD_c_validateSequences:
791         BOUNDCHECK(ZSTD_c_validateSequences, value);
792         CCtxParams->validateSequences = value;
793         return CCtxParams->validateSequences;
794 
795     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
796     }
797 }
798 
ZSTD_CCtx_getParameter(ZSTD_CCtx * cctx,ZSTD_cParameter param,int * value)799 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
800 {
801     return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
802 }
803 
ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params * CCtxParams,ZSTD_cParameter param,int * value)804 size_t ZSTD_CCtxParams_getParameter(
805         ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
806 {
807     switch(param)
808     {
809     case ZSTD_c_format :
810         *value = CCtxParams->format;
811         break;
812     case ZSTD_c_compressionLevel :
813         *value = CCtxParams->compressionLevel;
814         break;
815     case ZSTD_c_windowLog :
816         *value = (int)CCtxParams->cParams.windowLog;
817         break;
818     case ZSTD_c_hashLog :
819         *value = (int)CCtxParams->cParams.hashLog;
820         break;
821     case ZSTD_c_chainLog :
822         *value = (int)CCtxParams->cParams.chainLog;
823         break;
824     case ZSTD_c_searchLog :
825         *value = CCtxParams->cParams.searchLog;
826         break;
827     case ZSTD_c_minMatch :
828         *value = CCtxParams->cParams.minMatch;
829         break;
830     case ZSTD_c_targetLength :
831         *value = CCtxParams->cParams.targetLength;
832         break;
833     case ZSTD_c_strategy :
834         *value = (unsigned)CCtxParams->cParams.strategy;
835         break;
836     case ZSTD_c_contentSizeFlag :
837         *value = CCtxParams->fParams.contentSizeFlag;
838         break;
839     case ZSTD_c_checksumFlag :
840         *value = CCtxParams->fParams.checksumFlag;
841         break;
842     case ZSTD_c_dictIDFlag :
843         *value = !CCtxParams->fParams.noDictIDFlag;
844         break;
845     case ZSTD_c_forceMaxWindow :
846         *value = CCtxParams->forceWindow;
847         break;
848     case ZSTD_c_forceAttachDict :
849         *value = CCtxParams->attachDictPref;
850         break;
851     case ZSTD_c_literalCompressionMode :
852         *value = CCtxParams->literalCompressionMode;
853         break;
854     case ZSTD_c_nbWorkers :
855 #ifndef ZSTD_MULTITHREAD
856         assert(CCtxParams->nbWorkers == 0);
857 #endif
858         *value = CCtxParams->nbWorkers;
859         break;
860     case ZSTD_c_jobSize :
861 #ifndef ZSTD_MULTITHREAD
862         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
863 #else
864         assert(CCtxParams->jobSize <= INT_MAX);
865         *value = (int)CCtxParams->jobSize;
866         break;
867 #endif
868     case ZSTD_c_overlapLog :
869 #ifndef ZSTD_MULTITHREAD
870         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
871 #else
872         *value = CCtxParams->overlapLog;
873         break;
874 #endif
875     case ZSTD_c_rsyncable :
876 #ifndef ZSTD_MULTITHREAD
877         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
878 #else
879         *value = CCtxParams->rsyncable;
880         break;
881 #endif
882     case ZSTD_c_enableDedicatedDictSearch :
883         *value = CCtxParams->enableDedicatedDictSearch;
884         break;
885     case ZSTD_c_enableLongDistanceMatching :
886         *value = CCtxParams->ldmParams.enableLdm;
887         break;
888     case ZSTD_c_ldmHashLog :
889         *value = CCtxParams->ldmParams.hashLog;
890         break;
891     case ZSTD_c_ldmMinMatch :
892         *value = CCtxParams->ldmParams.minMatchLength;
893         break;
894     case ZSTD_c_ldmBucketSizeLog :
895         *value = CCtxParams->ldmParams.bucketSizeLog;
896         break;
897     case ZSTD_c_ldmHashRateLog :
898         *value = CCtxParams->ldmParams.hashRateLog;
899         break;
900     case ZSTD_c_targetCBlockSize :
901         *value = (int)CCtxParams->targetCBlockSize;
902         break;
903     case ZSTD_c_srcSizeHint :
904         *value = (int)CCtxParams->srcSizeHint;
905         break;
906     case ZSTD_c_stableInBuffer :
907         *value = (int)CCtxParams->inBufferMode;
908         break;
909     case ZSTD_c_stableOutBuffer :
910         *value = (int)CCtxParams->outBufferMode;
911         break;
912     case ZSTD_c_blockDelimiters :
913         *value = (int)CCtxParams->blockDelimiters;
914         break;
915     case ZSTD_c_validateSequences :
916         *value = (int)CCtxParams->validateSequences;
917         break;
918     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
919     }
920     return 0;
921 }
922 
923 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
924  *  just applies `params` into `cctx`
925  *  no action is performed, parameters are merely stored.
926  *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
927  *    This is possible even if a compression is ongoing.
928  *    In which case, new parameters will be applied on the fly, starting with next compression job.
929  */
ZSTD_CCtx_setParametersUsingCCtxParams(ZSTD_CCtx * cctx,const ZSTD_CCtx_params * params)930 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
931         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
932 {
933     DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
934     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
935                     "The context is in the wrong stage!");
936     RETURN_ERROR_IF(cctx->cdict, stage_wrong,
937                     "Can't override parameters with cdict attached (some must "
938                     "be inherited from the cdict).");
939 
940     cctx->requestedParams = *params;
941     return 0;
942 }
943 
ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx * cctx,unsigned long long pledgedSrcSize)944 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
945 {
946     DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
947     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
948                     "Can't set pledgedSrcSize when not in init stage.");
949     cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
950     return 0;
951 }
952 
953 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
954         int const compressionLevel,
955         size_t const dictSize);
956 static int ZSTD_dedicatedDictSearch_isSupported(
957         const ZSTD_compressionParameters* cParams);
958 static void ZSTD_dedicatedDictSearch_revertCParams(
959         ZSTD_compressionParameters* cParams);
960 
961 /**
962  * Initializes the local dict using the requested parameters.
963  * NOTE: This does not use the pledged src size, because it may be used for more
964  * than one compression.
965  */
ZSTD_initLocalDict(ZSTD_CCtx * cctx)966 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
967 {
968     ZSTD_localDict* const dl = &cctx->localDict;
969     if (dl->dict == NULL) {
970         /* No local dictionary. */
971         assert(dl->dictBuffer == NULL);
972         assert(dl->cdict == NULL);
973         assert(dl->dictSize == 0);
974         return 0;
975     }
976     if (dl->cdict != NULL) {
977         assert(cctx->cdict == dl->cdict);
978         /* Local dictionary already initialized. */
979         return 0;
980     }
981     assert(dl->dictSize > 0);
982     assert(cctx->cdict == NULL);
983     assert(cctx->prefixDict.dict == NULL);
984 
985     dl->cdict = ZSTD_createCDict_advanced2(
986             dl->dict,
987             dl->dictSize,
988             ZSTD_dlm_byRef,
989             dl->dictContentType,
990             &cctx->requestedParams,
991             cctx->customMem);
992     RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
993     cctx->cdict = dl->cdict;
994     return 0;
995 }
996 
ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)997 size_t ZSTD_CCtx_loadDictionary_advanced(
998         ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
999         ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
1000 {
1001     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1002                     "Can't load a dictionary when ctx is not in init stage.");
1003     DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
1004     ZSTD_clearAllDicts(cctx);  /* in case one already exists */
1005     if (dict == NULL || dictSize == 0)  /* no dictionary mode */
1006         return 0;
1007     if (dictLoadMethod == ZSTD_dlm_byRef) {
1008         cctx->localDict.dict = dict;
1009     } else {
1010         void* dictBuffer;
1011         RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
1012                         "no malloc for static CCtx");
1013         dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
1014         RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
1015         ZSTD_memcpy(dictBuffer, dict, dictSize);
1016         cctx->localDict.dictBuffer = dictBuffer;
1017         cctx->localDict.dict = dictBuffer;
1018     }
1019     cctx->localDict.dictSize = dictSize;
1020     cctx->localDict.dictContentType = dictContentType;
1021     return 0;
1022 }
1023 
ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)1024 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
1025       ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
1026 {
1027     return ZSTD_CCtx_loadDictionary_advanced(
1028             cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
1029 }
1030 
ZSTD_CCtx_loadDictionary(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)1031 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
1032 {
1033     return ZSTD_CCtx_loadDictionary_advanced(
1034             cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
1035 }
1036 
1037 
ZSTD_CCtx_refCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)1038 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
1039 {
1040     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1041                     "Can't ref a dict when ctx not in init stage.");
1042     /* Free the existing local cdict (if any) to save memory. */
1043     ZSTD_clearAllDicts(cctx);
1044     cctx->cdict = cdict;
1045     return 0;
1046 }
1047 
ZSTD_CCtx_refThreadPool(ZSTD_CCtx * cctx,ZSTD_threadPool * pool)1048 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
1049 {
1050     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1051                     "Can't ref a pool when ctx not in init stage.");
1052     cctx->pool = pool;
1053     return 0;
1054 }
1055 
ZSTD_CCtx_refPrefix(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize)1056 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
1057 {
1058     return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
1059 }
1060 
ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize,ZSTD_dictContentType_e dictContentType)1061 size_t ZSTD_CCtx_refPrefix_advanced(
1062         ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
1063 {
1064     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1065                     "Can't ref a prefix when ctx not in init stage.");
1066     ZSTD_clearAllDicts(cctx);
1067     if (prefix != NULL && prefixSize > 0) {
1068         cctx->prefixDict.dict = prefix;
1069         cctx->prefixDict.dictSize = prefixSize;
1070         cctx->prefixDict.dictContentType = dictContentType;
1071     }
1072     return 0;
1073 }
1074 
1075 /*! ZSTD_CCtx_reset() :
1076  *  Also dumps dictionary */
ZSTD_CCtx_reset(ZSTD_CCtx * cctx,ZSTD_ResetDirective reset)1077 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
1078 {
1079     if ( (reset == ZSTD_reset_session_only)
1080       || (reset == ZSTD_reset_session_and_parameters) ) {
1081         cctx->streamStage = zcss_init;
1082         cctx->pledgedSrcSizePlusOne = 0;
1083     }
1084     if ( (reset == ZSTD_reset_parameters)
1085       || (reset == ZSTD_reset_session_and_parameters) ) {
1086         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1087                         "Can't reset parameters only when not in init stage.");
1088         ZSTD_clearAllDicts(cctx);
1089         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
1090     }
1091     return 0;
1092 }
1093 
1094 
1095 /** ZSTD_checkCParams() :
1096     control CParam values remain within authorized range.
1097     @return : 0, or an error code if one value is beyond authorized range */
ZSTD_checkCParams(ZSTD_compressionParameters cParams)1098 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
1099 {
1100     BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
1101     BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
1102     BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
1103     BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
1104     BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
1105     BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
1106     BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
1107     return 0;
1108 }
1109 
1110 /** ZSTD_clampCParams() :
1111  *  make CParam values within valid range.
1112  *  @return : valid CParams */
1113 static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)1114 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
1115 {
1116 #   define CLAMP_TYPE(cParam, val, type) {                                \
1117         ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
1118         if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
1119         else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
1120     }
1121 #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
1122     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
1123     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
1124     CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
1125     CLAMP(ZSTD_c_searchLog, cParams.searchLog);
1126     CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
1127     CLAMP(ZSTD_c_targetLength,cParams.targetLength);
1128     CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
1129     return cParams;
1130 }
1131 
1132 /** ZSTD_cycleLog() :
1133  *  condition for correct operation : hashLog > 1 */
ZSTD_cycleLog(U32 hashLog,ZSTD_strategy strat)1134 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
1135 {
1136     U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
1137     return hashLog - btScale;
1138 }
1139 
1140 /** ZSTD_dictAndWindowLog() :
1141  * Returns an adjusted window log that is large enough to fit the source and the dictionary.
1142  * The zstd format says that the entire dictionary is valid if one byte of the dictionary
1143  * is within the window. So the hashLog and chainLog should be large enough to reference both
1144  * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
1145  * the hashLog and windowLog.
1146  * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
1147  */
ZSTD_dictAndWindowLog(U32 windowLog,U64 srcSize,U64 dictSize)1148 static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
1149 {
1150     const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
1151     /* No dictionary ==> No change */
1152     if (dictSize == 0) {
1153         return windowLog;
1154     }
1155     assert(windowLog <= ZSTD_WINDOWLOG_MAX);
1156     assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
1157     {
1158         U64 const windowSize = 1ULL << windowLog;
1159         U64 const dictAndWindowSize = dictSize + windowSize;
1160         /* If the window size is already large enough to fit both the source and the dictionary
1161          * then just use the window size. Otherwise adjust so that it fits the dictionary and
1162          * the window.
1163          */
1164         if (windowSize >= dictSize + srcSize) {
1165             return windowLog; /* Window size large enough already */
1166         } else if (dictAndWindowSize >= maxWindowSize) {
1167             return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
1168         } else  {
1169             return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
1170         }
1171     }
1172 }
1173 
1174 /** ZSTD_adjustCParams_internal() :
1175  *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
1176  *  mostly downsize to reduce memory consumption and initialization latency.
1177  * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
1178  * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
1179  *  note : `srcSize==0` means 0!
1180  *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
1181 static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize,ZSTD_cParamMode_e mode)1182 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
1183                             unsigned long long srcSize,
1184                             size_t dictSize,
1185                             ZSTD_cParamMode_e mode)
1186 {
1187     const U64 minSrcSize = 513; /* (1<<9) + 1 */
1188     const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
1189     assert(ZSTD_checkCParams(cPar)==0);
1190 
1191     if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1192         srcSize = minSrcSize;
1193 
1194     switch (mode) {
1195     case ZSTD_cpm_noAttachDict:
1196     case ZSTD_cpm_unknown:
1197     case ZSTD_cpm_createCDict:
1198         break;
1199     case ZSTD_cpm_attachDict:
1200         dictSize = 0;
1201         break;
1202     default:
1203         assert(0);
1204         break;
1205     }
1206 
1207     /* resize windowLog if input is small enough, to use less memory */
1208     if ( (srcSize < maxWindowResize)
1209       && (dictSize < maxWindowResize) )  {
1210         U32 const tSize = (U32)(srcSize + dictSize);
1211         static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
1212         U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
1213                             ZSTD_highbit32(tSize-1) + 1;
1214         if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
1215     }
1216     {   U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
1217         U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
1218         if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
1219         if (cycleLog > dictAndWindowLog)
1220             cPar.chainLog -= (cycleLog - dictAndWindowLog);
1221     }
1222 
1223     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
1224         cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
1225 
1226     return cPar;
1227 }
1228 
1229 ZSTD_compressionParameters
ZSTD_adjustCParams(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize)1230 ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
1231                    unsigned long long srcSize,
1232                    size_t dictSize)
1233 {
1234     cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
1235     if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1236     return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
1237 }
1238 
1239 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1240 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1241 
ZSTD_overrideCParams(ZSTD_compressionParameters * cParams,const ZSTD_compressionParameters * overrides)1242 static void ZSTD_overrideCParams(
1243               ZSTD_compressionParameters* cParams,
1244         const ZSTD_compressionParameters* overrides)
1245 {
1246     if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
1247     if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
1248     if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
1249     if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
1250     if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
1251     if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
1252     if (overrides->strategy)     cParams->strategy     = overrides->strategy;
1253 }
1254 
ZSTD_getCParamsFromCCtxParams(const ZSTD_CCtx_params * CCtxParams,U64 srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)1255 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1256         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
1257 {
1258     ZSTD_compressionParameters cParams;
1259     if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
1260       srcSizeHint = CCtxParams->srcSizeHint;
1261     }
1262     cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
1263     if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
1264     ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
1265     assert(!ZSTD_checkCParams(cParams));
1266     /* srcSizeHint == 0 means 0 */
1267     return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
1268 }
1269 
1270 static size_t
ZSTD_sizeof_matchState(const ZSTD_compressionParameters * const cParams,const U32 forCCtx)1271 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1272                        const U32 forCCtx)
1273 {
1274     size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1275     size_t const hSize = ((size_t)1) << cParams->hashLog;
1276     U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1277     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1278     /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1279      * surrounded by redzones in ASAN. */
1280     size_t const tableSpace = chainSize * sizeof(U32)
1281                             + hSize * sizeof(U32)
1282                             + h3Size * sizeof(U32);
1283     size_t const optPotentialSpace =
1284         ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
1285       + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
1286       + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
1287       + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
1288       + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1289       + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1290     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1291                                 ? optPotentialSpace
1292                                 : 0;
1293     DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1294                 (U32)chainSize, (U32)hSize, (U32)h3Size);
1295     return tableSpace + optSpace;
1296 }
1297 
ZSTD_estimateCCtxSize_usingCCtxParams_internal(const ZSTD_compressionParameters * cParams,const ldmParams_t * ldmParams,const int isStatic,const size_t buffInSize,const size_t buffOutSize,const U64 pledgedSrcSize)1298 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1299         const ZSTD_compressionParameters* cParams,
1300         const ldmParams_t* ldmParams,
1301         const int isStatic,
1302         const size_t buffInSize,
1303         const size_t buffOutSize,
1304         const U64 pledgedSrcSize)
1305 {
1306     size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
1307     size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1308     U32    const divider = (cParams->minMatch==3) ? 3 : 4;
1309     size_t const maxNbSeq = blockSize / divider;
1310     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1311                             + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1312                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1313     size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
1314     size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1315     size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
1316 
1317     size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
1318     size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
1319     size_t const ldmSeqSpace = ldmParams->enableLdm ?
1320         ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1321 
1322 
1323     size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
1324                              + ZSTD_cwksp_alloc_size(buffOutSize);
1325 
1326     size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
1327 
1328     size_t const neededSpace =
1329         cctxSpace +
1330         entropySpace +
1331         blockStateSpace +
1332         ldmSpace +
1333         ldmSeqSpace +
1334         matchStateSize +
1335         tokenSpace +
1336         bufferSpace;
1337 
1338     DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
1339     return neededSpace;
1340 }
1341 
ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params * params)1342 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1343 {
1344     ZSTD_compressionParameters const cParams =
1345                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1346 
1347     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1348     /* estimateCCtxSize is for one-shot compression. So no buffers should
1349      * be needed. However, we still allocate two 0-sized buffers, which can
1350      * take space under ASAN. */
1351     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1352         &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1353 }
1354 
ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)1355 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1356 {
1357     ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1358     return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
1359 }
1360 
ZSTD_estimateCCtxSize_internal(int compressionLevel)1361 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1362 {
1363     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1364     return ZSTD_estimateCCtxSize_usingCParams(cParams);
1365 }
1366 
ZSTD_estimateCCtxSize(int compressionLevel)1367 size_t ZSTD_estimateCCtxSize(int compressionLevel)
1368 {
1369     int level;
1370     size_t memBudget = 0;
1371     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1372         size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1373         if (newMB > memBudget) memBudget = newMB;
1374     }
1375     return memBudget;
1376 }
1377 
ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params * params)1378 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1379 {
1380     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1381     {   ZSTD_compressionParameters const cParams =
1382                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1383         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1384         size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
1385                 ? ((size_t)1 << cParams.windowLog) + blockSize
1386                 : 0;
1387         size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
1388                 ? ZSTD_compressBound(blockSize) + 1
1389                 : 0;
1390 
1391         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1392             &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
1393             ZSTD_CONTENTSIZE_UNKNOWN);
1394     }
1395 }
1396 
ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)1397 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1398 {
1399     ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1400     return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
1401 }
1402 
ZSTD_estimateCStreamSize_internal(int compressionLevel)1403 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
1404 {
1405     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1406     return ZSTD_estimateCStreamSize_usingCParams(cParams);
1407 }
1408 
ZSTD_estimateCStreamSize(int compressionLevel)1409 size_t ZSTD_estimateCStreamSize(int compressionLevel)
1410 {
1411     int level;
1412     size_t memBudget = 0;
1413     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1414         size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
1415         if (newMB > memBudget) memBudget = newMB;
1416     }
1417     return memBudget;
1418 }
1419 
1420 /* ZSTD_getFrameProgression():
1421  * tells how much data has been consumed (input) and produced (output) for current frame.
1422  * able to count progression inside worker threads (non-blocking mode).
1423  */
ZSTD_getFrameProgression(const ZSTD_CCtx * cctx)1424 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
1425 {
1426 #ifdef ZSTD_MULTITHREAD
1427     if (cctx->appliedParams.nbWorkers > 0) {
1428         return ZSTDMT_getFrameProgression(cctx->mtctx);
1429     }
1430 #endif
1431     {   ZSTD_frameProgression fp;
1432         size_t const buffered = (cctx->inBuff == NULL) ? 0 :
1433                                 cctx->inBuffPos - cctx->inToCompress;
1434         if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
1435         assert(buffered <= ZSTD_BLOCKSIZE_MAX);
1436         fp.ingested = cctx->consumedSrcSize + buffered;
1437         fp.consumed = cctx->consumedSrcSize;
1438         fp.produced = cctx->producedCSize;
1439         fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
1440         fp.currentJobID = 0;
1441         fp.nbActiveWorkers = 0;
1442         return fp;
1443 }   }
1444 
1445 /*! ZSTD_toFlushNow()
1446  *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
1447  */
ZSTD_toFlushNow(ZSTD_CCtx * cctx)1448 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
1449 {
1450 #ifdef ZSTD_MULTITHREAD
1451     if (cctx->appliedParams.nbWorkers > 0) {
1452         return ZSTDMT_toFlushNow(cctx->mtctx);
1453     }
1454 #endif
1455     (void)cctx;
1456     return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1457 }
1458 
ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,ZSTD_compressionParameters cParams2)1459 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1460                                     ZSTD_compressionParameters cParams2)
1461 {
1462     (void)cParams1;
1463     (void)cParams2;
1464     assert(cParams1.windowLog    == cParams2.windowLog);
1465     assert(cParams1.chainLog     == cParams2.chainLog);
1466     assert(cParams1.hashLog      == cParams2.hashLog);
1467     assert(cParams1.searchLog    == cParams2.searchLog);
1468     assert(cParams1.minMatch     == cParams2.minMatch);
1469     assert(cParams1.targetLength == cParams2.targetLength);
1470     assert(cParams1.strategy     == cParams2.strategy);
1471 }
1472 
ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t * bs)1473 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1474 {
1475     int i;
1476     for (i = 0; i < ZSTD_REP_NUM; ++i)
1477         bs->rep[i] = repStartValue[i];
1478     bs->entropy.huf.repeatMode = HUF_repeat_none;
1479     bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
1480     bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
1481     bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
1482 }
1483 
1484 /*! ZSTD_invalidateMatchState()
1485  *  Invalidate all the matches in the match finder tables.
1486  *  Requires nextSrc and base to be set (can be NULL).
1487  */
ZSTD_invalidateMatchState(ZSTD_matchState_t * ms)1488 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
1489 {
1490     ZSTD_window_clear(&ms->window);
1491 
1492     ms->nextToUpdate = ms->window.dictLimit;
1493     ms->loadedDictEnd = 0;
1494     ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
1495     ms->dictMatchState = NULL;
1496 }
1497 
1498 /**
1499  * Controls, for this matchState reset, whether the tables need to be cleared /
1500  * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1501  * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1502  * subsequent operation will overwrite the table space anyways (e.g., copying
1503  * the matchState contents in from a CDict).
1504  */
1505 typedef enum {
1506     ZSTDcrp_makeClean,
1507     ZSTDcrp_leaveDirty
1508 } ZSTD_compResetPolicy_e;
1509 
1510 /**
1511  * Controls, for this matchState reset, whether indexing can continue where it
1512  * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1513  * (ZSTDirp_reset).
1514  */
1515 typedef enum {
1516     ZSTDirp_continue,
1517     ZSTDirp_reset
1518 } ZSTD_indexResetPolicy_e;
1519 
1520 typedef enum {
1521     ZSTD_resetTarget_CDict,
1522     ZSTD_resetTarget_CCtx
1523 } ZSTD_resetTarget_e;
1524 
1525 static size_t
ZSTD_reset_matchState(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,const ZSTD_compressionParameters * cParams,const ZSTD_compResetPolicy_e crp,const ZSTD_indexResetPolicy_e forceResetIndex,const ZSTD_resetTarget_e forWho)1526 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1527                       ZSTD_cwksp* ws,
1528                 const ZSTD_compressionParameters* cParams,
1529                 const ZSTD_compResetPolicy_e crp,
1530                 const ZSTD_indexResetPolicy_e forceResetIndex,
1531                 const ZSTD_resetTarget_e forWho)
1532 {
1533     size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1534     size_t const hSize = ((size_t)1) << cParams->hashLog;
1535     U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1536     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1537 
1538     DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1539     if (forceResetIndex == ZSTDirp_reset) {
1540         ZSTD_window_init(&ms->window);
1541         ZSTD_cwksp_mark_tables_dirty(ws);
1542     }
1543 
1544     ms->hashLog3 = hashLog3;
1545 
1546     ZSTD_invalidateMatchState(ms);
1547 
1548     assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
1549 
1550     ZSTD_cwksp_clear_tables(ws);
1551 
1552     DEBUGLOG(5, "reserving table space");
1553     /* table Space */
1554     ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
1555     ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
1556     ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
1557     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1558                     "failed a workspace allocation in ZSTD_reset_matchState");
1559 
1560     DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
1561     if (crp!=ZSTDcrp_leaveDirty) {
1562         /* reset tables only */
1563         ZSTD_cwksp_clean_tables(ws);
1564     }
1565 
1566     /* opt parser space */
1567     if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
1568         DEBUGLOG(4, "reserving optimal parser space");
1569         ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
1570         ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
1571         ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
1572         ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
1573         ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
1574         ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1575     }
1576 
1577     ms->cParams = *cParams;
1578 
1579     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1580                     "failed a workspace allocation in ZSTD_reset_matchState");
1581 
1582     return 0;
1583 }
1584 
1585 /* ZSTD_indexTooCloseToMax() :
1586  * minor optimization : prefer memset() rather than reduceIndex()
1587  * which is measurably slow in some circumstances (reported for Visual Studio).
1588  * Works when re-using a context for a lot of smallish inputs :
1589  * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1590  * memset() will be triggered before reduceIndex().
1591  */
1592 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
ZSTD_indexTooCloseToMax(ZSTD_window_t w)1593 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1594 {
1595     return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1596 }
1597 
1598 /*! ZSTD_resetCCtx_internal() :
1599     note : `params` are assumed fully validated at this stage */
ZSTD_resetCCtx_internal(ZSTD_CCtx * zc,ZSTD_CCtx_params params,U64 const pledgedSrcSize,ZSTD_compResetPolicy_e const crp,ZSTD_buffered_policy_e const zbuff)1600 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1601                                       ZSTD_CCtx_params params,
1602                                       U64 const pledgedSrcSize,
1603                                       ZSTD_compResetPolicy_e const crp,
1604                                       ZSTD_buffered_policy_e const zbuff)
1605 {
1606     ZSTD_cwksp* const ws = &zc->workspace;
1607     DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1608                 (U32)pledgedSrcSize, params.cParams.windowLog);
1609     assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1610 
1611     zc->isFirstBlock = 1;
1612 
1613     if (params.ldmParams.enableLdm) {
1614         /* Adjust long distance matching parameters */
1615         ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1616         assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1617         assert(params.ldmParams.hashRateLog < 32);
1618         zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
1619     }
1620 
1621     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1622         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1623         U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
1624         size_t const maxNbSeq = blockSize / divider;
1625         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
1626                 ? ZSTD_compressBound(blockSize) + 1
1627                 : 0;
1628         size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
1629                 ? windowSize + blockSize
1630                 : 0;
1631         size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1632 
1633         int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
1634         ZSTD_indexResetPolicy_e needsIndexReset =
1635             (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
1636 
1637         size_t const neededSpace =
1638             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1639                 &params.cParams, &params.ldmParams, zc->staticSize != 0,
1640                 buffInSize, buffOutSize, pledgedSrcSize);
1641         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
1642 
1643         if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
1644 
1645         /* Check if workspace is large enough, alloc a new one if needed */
1646         {
1647             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1648             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1649 
1650             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
1651             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1652 
1653             if (workspaceTooSmall || workspaceWasteful) {
1654                 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1655                             ZSTD_cwksp_sizeof(ws) >> 10,
1656                             neededSpace >> 10);
1657 
1658                 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
1659 
1660                 needsIndexReset = ZSTDirp_reset;
1661 
1662                 ZSTD_cwksp_free(ws, zc->customMem);
1663                 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
1664 
1665                 DEBUGLOG(5, "reserving object space");
1666                 /* Statically sized space.
1667                  * entropyWorkspace never moves,
1668                  * though prev/next block swap places */
1669                 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
1670                 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1671                 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
1672                 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1673                 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
1674                 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
1675                 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
1676         }   }
1677 
1678         ZSTD_cwksp_clear(ws);
1679 
1680         /* init params */
1681         zc->appliedParams = params;
1682         zc->blockState.matchState.cParams = params.cParams;
1683         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1684         zc->consumedSrcSize = 0;
1685         zc->producedCSize = 0;
1686         if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1687             zc->appliedParams.fParams.contentSizeFlag = 0;
1688         DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1689             (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
1690         zc->blockSize = blockSize;
1691 
1692         XXH64_reset(&zc->xxhState, 0);
1693         zc->stage = ZSTDcs_init;
1694         zc->dictID = 0;
1695 
1696         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1697 
1698         /* ZSTD_wildcopy() is used to copy into the literals buffer,
1699          * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1700          */
1701         zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
1702         zc->seqStore.maxNbLit = blockSize;
1703 
1704         /* buffers */
1705         zc->bufferedPolicy = zbuff;
1706         zc->inBuffSize = buffInSize;
1707         zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
1708         zc->outBuffSize = buffOutSize;
1709         zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1710 
1711         /* ldm bucketOffsets table */
1712         if (params.ldmParams.enableLdm) {
1713             /* TODO: avoid memset? */
1714             size_t const ldmBucketSize =
1715                   ((size_t)1) << (params.ldmParams.hashLog -
1716                                   params.ldmParams.bucketSizeLog);
1717             zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
1718             ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
1719         }
1720 
1721         /* sequences storage */
1722         ZSTD_referenceExternalSequences(zc, NULL, 0);
1723         zc->seqStore.maxNbSeq = maxNbSeq;
1724         zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1725         zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1726         zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1727         zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
1728 
1729         FORWARD_IF_ERROR(ZSTD_reset_matchState(
1730             &zc->blockState.matchState,
1731             ws,
1732             &params.cParams,
1733             crp,
1734             needsIndexReset,
1735             ZSTD_resetTarget_CCtx), "");
1736 
1737         /* ldm hash table */
1738         if (params.ldmParams.enableLdm) {
1739             /* TODO: avoid memset? */
1740             size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1741             zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1742             ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1743             zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1744             zc->maxNbLdmSequences = maxNbLdmSeq;
1745 
1746             ZSTD_window_init(&zc->ldmState.window);
1747             ZSTD_window_clear(&zc->ldmState.window);
1748             zc->ldmState.loadedDictEnd = 0;
1749         }
1750 
1751         /* Due to alignment, when reusing a workspace, we can actually consume
1752          * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
1753          */
1754         assert(ZSTD_cwksp_used(ws) >= neededSpace &&
1755                ZSTD_cwksp_used(ws) <= neededSpace + 3);
1756 
1757         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1758         zc->initialized = 1;
1759 
1760         return 0;
1761     }
1762 }
1763 
1764 /* ZSTD_invalidateRepCodes() :
1765  * ensures next compression will not use repcodes from previous block.
1766  * Note : only works with regular variant;
1767  *        do not use with extDict variant ! */
ZSTD_invalidateRepCodes(ZSTD_CCtx * cctx)1768 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
1769     int i;
1770     for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
1771     assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1772 }
1773 
1774 /* These are the approximate sizes for each strategy past which copying the
1775  * dictionary tables into the working context is faster than using them
1776  * in-place.
1777  */
1778 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
1779     8 KB,  /* unused */
1780     8 KB,  /* ZSTD_fast */
1781     16 KB, /* ZSTD_dfast */
1782     32 KB, /* ZSTD_greedy */
1783     32 KB, /* ZSTD_lazy */
1784     32 KB, /* ZSTD_lazy2 */
1785     32 KB, /* ZSTD_btlazy2 */
1786     32 KB, /* ZSTD_btopt */
1787     8 KB,  /* ZSTD_btultra */
1788     8 KB   /* ZSTD_btultra2 */
1789 };
1790 
ZSTD_shouldAttachDict(const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize)1791 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1792                                  const ZSTD_CCtx_params* params,
1793                                  U64 pledgedSrcSize)
1794 {
1795     size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1796     int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
1797     return dedicatedDictSearch
1798         || ( ( pledgedSrcSize <= cutoff
1799             || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1800             || params->attachDictPref == ZSTD_dictForceAttach )
1801           && params->attachDictPref != ZSTD_dictForceCopy
1802           && !params->forceWindow ); /* dictMatchState isn't correctly
1803                                       * handled in _enforceMaxDist */
1804 }
1805 
1806 static size_t
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1807 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1808                         const ZSTD_CDict* cdict,
1809                         ZSTD_CCtx_params params,
1810                         U64 pledgedSrcSize,
1811                         ZSTD_buffered_policy_e zbuff)
1812 {
1813     {
1814         ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
1815         unsigned const windowLog = params.cParams.windowLog;
1816         assert(windowLog != 0);
1817         /* Resize working context table params for input only, since the dict
1818          * has its own tables. */
1819         /* pledgedSrcSize == 0 means 0! */
1820 
1821         if (cdict->matchState.dedicatedDictSearch) {
1822             ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
1823         }
1824 
1825         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
1826                                                      cdict->dictContentSize, ZSTD_cpm_attachDict);
1827         params.cParams.windowLog = windowLog;
1828         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1829                                                  ZSTDcrp_makeClean, zbuff), "");
1830         assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
1831     }
1832 
1833     {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1834                                   - cdict->matchState.window.base);
1835         const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1836         if (cdictLen == 0) {
1837             /* don't even attach dictionaries with no contents */
1838             DEBUGLOG(4, "skipping attaching empty dictionary");
1839         } else {
1840             DEBUGLOG(4, "attaching dictionary into context");
1841             cctx->blockState.matchState.dictMatchState = &cdict->matchState;
1842 
1843             /* prep working match state so dict matches never have negative indices
1844              * when they are translated to the working context's index space. */
1845             if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
1846                 cctx->blockState.matchState.window.nextSrc =
1847                     cctx->blockState.matchState.window.base + cdictEnd;
1848                 ZSTD_window_clear(&cctx->blockState.matchState.window);
1849             }
1850             /* loadedDictEnd is expressed within the referential of the active context */
1851             cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1852     }   }
1853 
1854     cctx->dictID = cdict->dictID;
1855 
1856     /* copy block state */
1857     ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1858 
1859     return 0;
1860 }
1861 
ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1862 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1863                             const ZSTD_CDict* cdict,
1864                             ZSTD_CCtx_params params,
1865                             U64 pledgedSrcSize,
1866                             ZSTD_buffered_policy_e zbuff)
1867 {
1868     const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1869 
1870     assert(!cdict->matchState.dedicatedDictSearch);
1871 
1872     DEBUGLOG(4, "copying dictionary into context");
1873 
1874     {   unsigned const windowLog = params.cParams.windowLog;
1875         assert(windowLog != 0);
1876         /* Copy only compression parameters related to tables. */
1877         params.cParams = *cdict_cParams;
1878         params.cParams.windowLog = windowLog;
1879         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1880                                                  ZSTDcrp_leaveDirty, zbuff), "");
1881         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1882         assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1883         assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
1884     }
1885 
1886     ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
1887 
1888     /* copy tables */
1889     {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1890         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
1891 
1892         ZSTD_memcpy(cctx->blockState.matchState.hashTable,
1893                cdict->matchState.hashTable,
1894                hSize * sizeof(U32));
1895         ZSTD_memcpy(cctx->blockState.matchState.chainTable,
1896                cdict->matchState.chainTable,
1897                chainSize * sizeof(U32));
1898     }
1899 
1900     /* Zero the hashTable3, since the cdict never fills it */
1901     {   int const h3log = cctx->blockState.matchState.hashLog3;
1902         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1903         assert(cdict->matchState.hashLog3 == 0);
1904         ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
1905     }
1906 
1907     ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
1908 
1909     /* copy dictionary offsets */
1910     {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1911         ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1912         dstMatchState->window       = srcMatchState->window;
1913         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1914         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1915     }
1916 
1917     cctx->dictID = cdict->dictID;
1918 
1919     /* copy block state */
1920     ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1921 
1922     return 0;
1923 }
1924 
1925 /* We have a choice between copying the dictionary context into the working
1926  * context, or referencing the dictionary context from the working context
1927  * in-place. We decide here which strategy to use. */
ZSTD_resetCCtx_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1928 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1929                             const ZSTD_CDict* cdict,
1930                             const ZSTD_CCtx_params* params,
1931                             U64 pledgedSrcSize,
1932                             ZSTD_buffered_policy_e zbuff)
1933 {
1934 
1935     DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
1936                 (unsigned)pledgedSrcSize);
1937 
1938     if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1939         return ZSTD_resetCCtx_byAttachingCDict(
1940             cctx, cdict, *params, pledgedSrcSize, zbuff);
1941     } else {
1942         return ZSTD_resetCCtx_byCopyingCDict(
1943             cctx, cdict, *params, pledgedSrcSize, zbuff);
1944     }
1945 }
1946 
1947 /*! ZSTD_copyCCtx_internal() :
1948  *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1949  *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1950  *  The "context", in this case, refers to the hash and chain tables,
1951  *  entropy tables, and dictionary references.
1952  * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
1953  * @return : 0, or an error code */
ZSTD_copyCCtx_internal(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,ZSTD_frameParameters fParams,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1954 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1955                             const ZSTD_CCtx* srcCCtx,
1956                             ZSTD_frameParameters fParams,
1957                             U64 pledgedSrcSize,
1958                             ZSTD_buffered_policy_e zbuff)
1959 {
1960     DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1961     RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
1962                     "Can't copy a ctx that's not in init stage.");
1963 
1964     ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
1965     {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
1966         /* Copy only compression parameters related to tables. */
1967         params.cParams = srcCCtx->appliedParams.cParams;
1968         params.fParams = fParams;
1969         ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
1970                                 ZSTDcrp_leaveDirty, zbuff);
1971         assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
1972         assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
1973         assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
1974         assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
1975         assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
1976     }
1977 
1978     ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
1979 
1980     /* copy tables */
1981     {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
1982         size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
1983         int const h3log = srcCCtx->blockState.matchState.hashLog3;
1984         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1985 
1986         ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
1987                srcCCtx->blockState.matchState.hashTable,
1988                hSize * sizeof(U32));
1989         ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
1990                srcCCtx->blockState.matchState.chainTable,
1991                chainSize * sizeof(U32));
1992         ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
1993                srcCCtx->blockState.matchState.hashTable3,
1994                h3Size * sizeof(U32));
1995     }
1996 
1997     ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
1998 
1999     /* copy dictionary offsets */
2000     {
2001         const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
2002         ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
2003         dstMatchState->window       = srcMatchState->window;
2004         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
2005         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
2006     }
2007     dstCCtx->dictID = srcCCtx->dictID;
2008 
2009     /* copy block state */
2010     ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
2011 
2012     return 0;
2013 }
2014 
2015 /*! ZSTD_copyCCtx() :
2016  *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2017  *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2018  *  pledgedSrcSize==0 means "unknown".
2019 *   @return : 0, or an error code */
ZSTD_copyCCtx(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,unsigned long long pledgedSrcSize)2020 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
2021 {
2022     ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2023     ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
2024     ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
2025     if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
2026     fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
2027 
2028     return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
2029                                 fParams, pledgedSrcSize,
2030                                 zbuff);
2031 }
2032 
2033 
2034 #define ZSTD_ROWSIZE 16
2035 /*! ZSTD_reduceTable() :
2036  *  reduce table indexes by `reducerValue`, or squash to zero.
2037  *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
2038  *  It must be set to a clear 0/1 value, to remove branch during inlining.
2039  *  Presume table size is a multiple of ZSTD_ROWSIZE
2040  *  to help auto-vectorization */
2041 FORCE_INLINE_TEMPLATE void
ZSTD_reduceTable_internal(U32 * const table,U32 const size,U32 const reducerValue,int const preserveMark)2042 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
2043 {
2044     int const nbRows = (int)size / ZSTD_ROWSIZE;
2045     int cellNb = 0;
2046     int rowNb;
2047     assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
2048     assert(size < (1U<<31));   /* can be casted to int */
2049 
2050 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
2051     /* To validate that the table re-use logic is sound, and that we don't
2052      * access table space that we haven't cleaned, we re-"poison" the table
2053      * space every time we mark it dirty.
2054      *
2055      * This function however is intended to operate on those dirty tables and
2056      * re-clean them. So when this function is used correctly, we can unpoison
2057      * the memory it operated on. This introduces a blind spot though, since
2058      * if we now try to operate on __actually__ poisoned memory, we will not
2059      * detect that. */
2060     __msan_unpoison(table, size * sizeof(U32));
2061 #endif
2062 
2063     for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
2064         int column;
2065         for (column=0; column<ZSTD_ROWSIZE; column++) {
2066             if (preserveMark) {
2067                 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
2068                 table[cellNb] += adder;
2069             }
2070             if (table[cellNb] < reducerValue) table[cellNb] = 0;
2071             else table[cellNb] -= reducerValue;
2072             cellNb++;
2073     }   }
2074 }
2075 
ZSTD_reduceTable(U32 * const table,U32 const size,U32 const reducerValue)2076 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
2077 {
2078     ZSTD_reduceTable_internal(table, size, reducerValue, 0);
2079 }
2080 
ZSTD_reduceTable_btlazy2(U32 * const table,U32 const size,U32 const reducerValue)2081 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
2082 {
2083     ZSTD_reduceTable_internal(table, size, reducerValue, 1);
2084 }
2085 
2086 /*! ZSTD_reduceIndex() :
2087 *   rescale all indexes to avoid future overflow (indexes are U32) */
ZSTD_reduceIndex(ZSTD_matchState_t * ms,ZSTD_CCtx_params const * params,const U32 reducerValue)2088 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
2089 {
2090     {   U32 const hSize = (U32)1 << params->cParams.hashLog;
2091         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
2092     }
2093 
2094     if (params->cParams.strategy != ZSTD_fast) {
2095         U32 const chainSize = (U32)1 << params->cParams.chainLog;
2096         if (params->cParams.strategy == ZSTD_btlazy2)
2097             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
2098         else
2099             ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
2100     }
2101 
2102     if (ms->hashLog3) {
2103         U32 const h3Size = (U32)1 << ms->hashLog3;
2104         ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
2105     }
2106 }
2107 
2108 
2109 /*-*******************************************************
2110 *  Block entropic compression
2111 *********************************************************/
2112 
2113 /* See doc/zstd_compression_format.md for detailed format description */
2114 
ZSTD_seqToCodes(const seqStore_t * seqStorePtr)2115 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2116 {
2117     const seqDef* const sequences = seqStorePtr->sequencesStart;
2118     BYTE* const llCodeTable = seqStorePtr->llCode;
2119     BYTE* const ofCodeTable = seqStorePtr->ofCode;
2120     BYTE* const mlCodeTable = seqStorePtr->mlCode;
2121     U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2122     U32 u;
2123     assert(nbSeq <= seqStorePtr->maxNbSeq);
2124     for (u=0; u<nbSeq; u++) {
2125         U32 const llv = sequences[u].litLength;
2126         U32 const mlv = sequences[u].matchLength;
2127         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
2128         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
2129         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
2130     }
2131     if (seqStorePtr->longLengthID==1)
2132         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
2133     if (seqStorePtr->longLengthID==2)
2134         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2135 }
2136 
2137 /* ZSTD_useTargetCBlockSize():
2138  * Returns if target compressed block size param is being used.
2139  * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
2140  * Returns 1 if true, 0 otherwise. */
ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params * cctxParams)2141 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
2142 {
2143     DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
2144     return (cctxParams->targetCBlockSize != 0);
2145 }
2146 
2147 /* ZSTD_entropyCompressSequences_internal():
2148  * actually compresses both literals and sequences */
2149 MEM_STATIC size_t
ZSTD_entropyCompressSequences_internal(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,void * entropyWorkspace,size_t entropyWkspSize,const int bmi2)2150 ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2151                           const ZSTD_entropyCTables_t* prevEntropy,
2152                                 ZSTD_entropyCTables_t* nextEntropy,
2153                           const ZSTD_CCtx_params* cctxParams,
2154                                 void* dst, size_t dstCapacity,
2155                                 void* entropyWorkspace, size_t entropyWkspSize,
2156                           const int bmi2)
2157 {
2158     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2159     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
2160     unsigned* count = (unsigned*)entropyWorkspace;
2161     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2162     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2163     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
2164     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
2165     const seqDef* const sequences = seqStorePtr->sequencesStart;
2166     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2167     const BYTE* const llCodeTable = seqStorePtr->llCode;
2168     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2169     BYTE* const ostart = (BYTE*)dst;
2170     BYTE* const oend = ostart + dstCapacity;
2171     BYTE* op = ostart;
2172     size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2173     BYTE* seqHead;
2174     BYTE* lastNCount = NULL;
2175 
2176     entropyWorkspace = count + (MaxSeq + 1);
2177     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
2178 
2179     DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
2180     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2181     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
2182 
2183     /* Compress literals */
2184     {   const BYTE* const literals = seqStorePtr->litStart;
2185         size_t const litSize = (size_t)(seqStorePtr->lit - literals);
2186         size_t const cSize = ZSTD_compressLiterals(
2187                                     &prevEntropy->huf, &nextEntropy->huf,
2188                                     cctxParams->cParams.strategy,
2189                                     ZSTD_disableLiteralsCompression(cctxParams),
2190                                     op, dstCapacity,
2191                                     literals, litSize,
2192                                     entropyWorkspace, entropyWkspSize,
2193                                     bmi2);
2194         FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
2195         assert(cSize <= dstCapacity);
2196         op += cSize;
2197     }
2198 
2199     /* Sequences Header */
2200     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
2201                     dstSize_tooSmall, "Can't fit seq hdr in output buf!");
2202     if (nbSeq < 128) {
2203         *op++ = (BYTE)nbSeq;
2204     } else if (nbSeq < LONGNBSEQ) {
2205         op[0] = (BYTE)((nbSeq>>8) + 0x80);
2206         op[1] = (BYTE)nbSeq;
2207         op+=2;
2208     } else {
2209         op[0]=0xFF;
2210         MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
2211         op+=3;
2212     }
2213     assert(op <= oend);
2214     if (nbSeq==0) {
2215         /* Copy the old tables over as if we repeated them */
2216         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2217         return (size_t)(op - ostart);
2218     }
2219 
2220     /* seqHead : flags for FSE encoding type */
2221     seqHead = op++;
2222     assert(op <= oend);
2223 
2224     /* convert length/distances into codes */
2225     ZSTD_seqToCodes(seqStorePtr);
2226     /* build CTable for Literal Lengths */
2227     {   unsigned max = MaxLL;
2228         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
2229         DEBUGLOG(5, "Building LL table");
2230         nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2231         LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2232                                         count, max, mostFrequent, nbSeq,
2233                                         LLFSELog, prevEntropy->fse.litlengthCTable,
2234                                         LL_defaultNorm, LL_defaultNormLog,
2235                                         ZSTD_defaultAllowed, strategy);
2236         assert(set_basic < set_compressed && set_rle < set_compressed);
2237         assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2238         {   size_t const countSize = ZSTD_buildCTable(
2239                 op, (size_t)(oend - op),
2240                 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2241                 count, max, llCodeTable, nbSeq,
2242                 LL_defaultNorm, LL_defaultNormLog, MaxLL,
2243                 prevEntropy->fse.litlengthCTable,
2244                 sizeof(prevEntropy->fse.litlengthCTable),
2245                 entropyWorkspace, entropyWkspSize);
2246             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
2247             if (LLtype == set_compressed)
2248                 lastNCount = op;
2249             op += countSize;
2250             assert(op <= oend);
2251     }   }
2252     /* build CTable for Offsets */
2253     {   unsigned max = MaxOff;
2254         size_t const mostFrequent = HIST_countFast_wksp(
2255             count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
2256         /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2257         ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2258         DEBUGLOG(5, "Building OF table");
2259         nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2260         Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2261                                         count, max, mostFrequent, nbSeq,
2262                                         OffFSELog, prevEntropy->fse.offcodeCTable,
2263                                         OF_defaultNorm, OF_defaultNormLog,
2264                                         defaultPolicy, strategy);
2265         assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2266         {   size_t const countSize = ZSTD_buildCTable(
2267                 op, (size_t)(oend - op),
2268                 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2269                 count, max, ofCodeTable, nbSeq,
2270                 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2271                 prevEntropy->fse.offcodeCTable,
2272                 sizeof(prevEntropy->fse.offcodeCTable),
2273                 entropyWorkspace, entropyWkspSize);
2274             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
2275             if (Offtype == set_compressed)
2276                 lastNCount = op;
2277             op += countSize;
2278             assert(op <= oend);
2279     }   }
2280     /* build CTable for MatchLengths */
2281     {   unsigned max = MaxML;
2282         size_t const mostFrequent = HIST_countFast_wksp(
2283             count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
2284         DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2285         nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2286         MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2287                                         count, max, mostFrequent, nbSeq,
2288                                         MLFSELog, prevEntropy->fse.matchlengthCTable,
2289                                         ML_defaultNorm, ML_defaultNormLog,
2290                                         ZSTD_defaultAllowed, strategy);
2291         assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2292         {   size_t const countSize = ZSTD_buildCTable(
2293                 op, (size_t)(oend - op),
2294                 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2295                 count, max, mlCodeTable, nbSeq,
2296                 ML_defaultNorm, ML_defaultNormLog, MaxML,
2297                 prevEntropy->fse.matchlengthCTable,
2298                 sizeof(prevEntropy->fse.matchlengthCTable),
2299                 entropyWorkspace, entropyWkspSize);
2300             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
2301             if (MLtype == set_compressed)
2302                 lastNCount = op;
2303             op += countSize;
2304             assert(op <= oend);
2305     }   }
2306 
2307     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2308 
2309     {   size_t const bitstreamSize = ZSTD_encodeSequences(
2310                                         op, (size_t)(oend - op),
2311                                         CTable_MatchLength, mlCodeTable,
2312                                         CTable_OffsetBits, ofCodeTable,
2313                                         CTable_LitLength, llCodeTable,
2314                                         sequences, nbSeq,
2315                                         longOffsets, bmi2);
2316         FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
2317         op += bitstreamSize;
2318         assert(op <= oend);
2319         /* zstd versions <= 1.3.4 mistakenly report corruption when
2320          * FSE_readNCount() receives a buffer < 4 bytes.
2321          * Fixed by https://github.com/facebook/zstd/pull/1146.
2322          * This can happen when the last set_compressed table present is 2
2323          * bytes and the bitstream is only one byte.
2324          * In this exceedingly rare case, we will simply emit an uncompressed
2325          * block, since it isn't worth optimizing.
2326          */
2327         if (lastNCount && (op - lastNCount) < 4) {
2328             /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2329             assert(op - lastNCount == 3);
2330             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2331                         "emitting an uncompressed block.");
2332             return 0;
2333         }
2334     }
2335 
2336     DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
2337     return (size_t)(op - ostart);
2338 }
2339 
2340 MEM_STATIC size_t
ZSTD_entropyCompressSequences(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,size_t srcSize,void * entropyWorkspace,size_t entropyWkspSize,int bmi2)2341 ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2342                        const ZSTD_entropyCTables_t* prevEntropy,
2343                              ZSTD_entropyCTables_t* nextEntropy,
2344                        const ZSTD_CCtx_params* cctxParams,
2345                              void* dst, size_t dstCapacity,
2346                              size_t srcSize,
2347                              void* entropyWorkspace, size_t entropyWkspSize,
2348                              int bmi2)
2349 {
2350     size_t const cSize = ZSTD_entropyCompressSequences_internal(
2351                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2352                             dst, dstCapacity,
2353                             entropyWorkspace, entropyWkspSize, bmi2);
2354     if (cSize == 0) return 0;
2355     /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2356      * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2357      */
2358     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2359         return 0;  /* block not compressed */
2360     FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
2361 
2362     /* Check compressibility */
2363     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
2364         if (cSize >= maxCSize) return 0;  /* block not compressed */
2365     }
2366     DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
2367     return cSize;
2368 }
2369 
2370 /* ZSTD_selectBlockCompressor() :
2371  * Not static, but internal use only (used by long distance matcher)
2372  * assumption : strat is a valid strategy */
ZSTD_selectBlockCompressor(ZSTD_strategy strat,ZSTD_dictMode_e dictMode)2373 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2374 {
2375     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
2376         { ZSTD_compressBlock_fast  /* default for 0 */,
2377           ZSTD_compressBlock_fast,
2378           ZSTD_compressBlock_doubleFast,
2379           ZSTD_compressBlock_greedy,
2380           ZSTD_compressBlock_lazy,
2381           ZSTD_compressBlock_lazy2,
2382           ZSTD_compressBlock_btlazy2,
2383           ZSTD_compressBlock_btopt,
2384           ZSTD_compressBlock_btultra,
2385           ZSTD_compressBlock_btultra2 },
2386         { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
2387           ZSTD_compressBlock_fast_extDict,
2388           ZSTD_compressBlock_doubleFast_extDict,
2389           ZSTD_compressBlock_greedy_extDict,
2390           ZSTD_compressBlock_lazy_extDict,
2391           ZSTD_compressBlock_lazy2_extDict,
2392           ZSTD_compressBlock_btlazy2_extDict,
2393           ZSTD_compressBlock_btopt_extDict,
2394           ZSTD_compressBlock_btultra_extDict,
2395           ZSTD_compressBlock_btultra_extDict },
2396         { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
2397           ZSTD_compressBlock_fast_dictMatchState,
2398           ZSTD_compressBlock_doubleFast_dictMatchState,
2399           ZSTD_compressBlock_greedy_dictMatchState,
2400           ZSTD_compressBlock_lazy_dictMatchState,
2401           ZSTD_compressBlock_lazy2_dictMatchState,
2402           ZSTD_compressBlock_btlazy2_dictMatchState,
2403           ZSTD_compressBlock_btopt_dictMatchState,
2404           ZSTD_compressBlock_btultra_dictMatchState,
2405           ZSTD_compressBlock_btultra_dictMatchState },
2406         { NULL  /* default for 0 */,
2407           NULL,
2408           NULL,
2409           ZSTD_compressBlock_greedy_dedicatedDictSearch,
2410           ZSTD_compressBlock_lazy_dedicatedDictSearch,
2411           ZSTD_compressBlock_lazy2_dedicatedDictSearch,
2412           NULL,
2413           NULL,
2414           NULL,
2415           NULL }
2416     };
2417     ZSTD_blockCompressor selectedCompressor;
2418     ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2419 
2420     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2421     selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2422     assert(selectedCompressor != NULL);
2423     return selectedCompressor;
2424 }
2425 
ZSTD_storeLastLiterals(seqStore_t * seqStorePtr,const BYTE * anchor,size_t lastLLSize)2426 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
2427                                    const BYTE* anchor, size_t lastLLSize)
2428 {
2429     ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
2430     seqStorePtr->lit += lastLLSize;
2431 }
2432 
ZSTD_resetSeqStore(seqStore_t * ssPtr)2433 void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2434 {
2435     ssPtr->lit = ssPtr->litStart;
2436     ssPtr->sequences = ssPtr->sequencesStart;
2437     ssPtr->longLengthID = 0;
2438 }
2439 
2440 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
2441 
ZSTD_buildSeqStore(ZSTD_CCtx * zc,const void * src,size_t srcSize)2442 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2443 {
2444     ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2445     DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
2446     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2447     /* Assert that we have correctly flushed the ctx params into the ms's copy */
2448     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2449     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2450         if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
2451             ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
2452         } else {
2453             ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2454         }
2455         return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
2456     }
2457     ZSTD_resetSeqStore(&(zc->seqStore));
2458     /* required for optimal parser to read stats from dictionary */
2459     ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
2460     /* tell the optimal parser how we expect to compress literals */
2461     ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
2462     /* a gap between an attached dict and the current window is not safe,
2463      * they must remain adjacent,
2464      * and when that stops being the case, the dict must be unset */
2465     assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
2466 
2467     /* limited update after a very long match */
2468     {   const BYTE* const base = ms->window.base;
2469         const BYTE* const istart = (const BYTE*)src;
2470         const U32 curr = (U32)(istart-base);
2471         if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
2472         if (curr > ms->nextToUpdate + 384)
2473             ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
2474     }
2475 
2476     /* select and store sequences */
2477     {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
2478         size_t lastLLSize;
2479         {   int i;
2480             for (i = 0; i < ZSTD_REP_NUM; ++i)
2481                 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
2482         }
2483         if (zc->externSeqStore.pos < zc->externSeqStore.size) {
2484             assert(!zc->appliedParams.ldmParams.enableLdm);
2485             /* Updates ldmSeqStore.pos */
2486             lastLLSize =
2487                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
2488                                        ms, &zc->seqStore,
2489                                        zc->blockState.nextCBlock->rep,
2490                                        src, srcSize);
2491             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2492         } else if (zc->appliedParams.ldmParams.enableLdm) {
2493             rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
2494 
2495             ldmSeqStore.seq = zc->ldmSequences;
2496             ldmSeqStore.capacity = zc->maxNbLdmSequences;
2497             /* Updates ldmSeqStore.size */
2498             FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
2499                                                &zc->appliedParams.ldmParams,
2500                                                src, srcSize), "");
2501             /* Updates ldmSeqStore.pos */
2502             lastLLSize =
2503                 ZSTD_ldm_blockCompress(&ldmSeqStore,
2504                                        ms, &zc->seqStore,
2505                                        zc->blockState.nextCBlock->rep,
2506                                        src, srcSize);
2507             assert(ldmSeqStore.pos == ldmSeqStore.size);
2508         } else {   /* not long range mode */
2509             ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2510             ms->ldmSeqStore = NULL;
2511             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
2512         }
2513         {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2514             ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2515     }   }
2516     return ZSTDbss_compress;
2517 }
2518 
ZSTD_copyBlockSequences(ZSTD_CCtx * zc)2519 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2520 {
2521     const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
2522     const seqDef* seqStoreSeqs = seqStore->sequencesStart;
2523     size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
2524     size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
2525     size_t literalsRead = 0;
2526     size_t lastLLSize;
2527 
2528     ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
2529     size_t i;
2530     repcodes_t updatedRepcodes;
2531 
2532     assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
2533     /* Ensure we have enough space for last literals "sequence" */
2534     assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
2535     ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
2536     for (i = 0; i < seqStoreSeqSize; ++i) {
2537         U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
2538         outSeqs[i].litLength = seqStoreSeqs[i].litLength;
2539         outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
2540         outSeqs[i].rep = 0;
2541 
2542         if (i == seqStore->longLengthPos) {
2543             if (seqStore->longLengthID == 1) {
2544                 outSeqs[i].litLength += 0x10000;
2545             } else if (seqStore->longLengthID == 2) {
2546                 outSeqs[i].matchLength += 0x10000;
2547             }
2548         }
2549 
2550         if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
2551             /* Derive the correct offset corresponding to a repcode */
2552             outSeqs[i].rep = seqStoreSeqs[i].offset;
2553             if (outSeqs[i].litLength != 0) {
2554                 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
2555             } else {
2556                 if (outSeqs[i].rep == 3) {
2557                     rawOffset = updatedRepcodes.rep[0] - 1;
2558                 } else {
2559                     rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
2560                 }
2561             }
2562         }
2563         outSeqs[i].offset = rawOffset;
2564         /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
2565            so we provide seqStoreSeqs[i].offset - 1 */
2566         updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
2567                                          seqStoreSeqs[i].offset - 1,
2568                                          seqStoreSeqs[i].litLength == 0);
2569         literalsRead += outSeqs[i].litLength;
2570     }
2571     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
2572      * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
2573      * for the block boundary, according to the API.
2574      */
2575     assert(seqStoreLiteralsSize >= literalsRead);
2576     lastLLSize = seqStoreLiteralsSize - literalsRead;
2577     outSeqs[i].litLength = (U32)lastLLSize;
2578     outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
2579     seqStoreSeqSize++;
2580     zc->seqCollector.seqIndex += seqStoreSeqSize;
2581 }
2582 
ZSTD_generateSequences(ZSTD_CCtx * zc,ZSTD_Sequence * outSeqs,size_t outSeqsSize,const void * src,size_t srcSize)2583 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
2584                               size_t outSeqsSize, const void* src, size_t srcSize)
2585 {
2586     const size_t dstCapacity = ZSTD_compressBound(srcSize);
2587     void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
2588     SeqCollector seqCollector;
2589 
2590     RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
2591 
2592     seqCollector.collectSequences = 1;
2593     seqCollector.seqStart = outSeqs;
2594     seqCollector.seqIndex = 0;
2595     seqCollector.maxSequences = outSeqsSize;
2596     zc->seqCollector = seqCollector;
2597 
2598     ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
2599     ZSTD_customFree(dst, ZSTD_defaultCMem);
2600     return zc->seqCollector.seqIndex;
2601 }
2602 
ZSTD_mergeBlockDelimiters(ZSTD_Sequence * sequences,size_t seqsSize)2603 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
2604     size_t in = 0;
2605     size_t out = 0;
2606     for (; in < seqsSize; ++in) {
2607         if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
2608             if (in != seqsSize - 1) {
2609                 sequences[in+1].litLength += sequences[in].litLength;
2610             }
2611         } else {
2612             sequences[out] = sequences[in];
2613             ++out;
2614         }
2615     }
2616     return out;
2617 }
2618 
2619 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
ZSTD_isRLE(const BYTE * src,size_t length)2620 static int ZSTD_isRLE(const BYTE* src, size_t length) {
2621     const BYTE* ip = src;
2622     const BYTE value = ip[0];
2623     const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
2624     const size_t unrollSize = sizeof(size_t) * 4;
2625     const size_t unrollMask = unrollSize - 1;
2626     const size_t prefixLength = length & unrollMask;
2627     size_t i;
2628     size_t u;
2629     if (length == 1) return 1;
2630     /* Check if prefix is RLE first before using unrolled loop */
2631     if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
2632         return 0;
2633     }
2634     for (i = prefixLength; i != length; i += unrollSize) {
2635         for (u = 0; u < unrollSize; u += sizeof(size_t)) {
2636             if (MEM_readST(ip + i + u) != valueST) {
2637                 return 0;
2638             }
2639         }
2640     }
2641     return 1;
2642 }
2643 
2644 /* Returns true if the given block may be RLE.
2645  * This is just a heuristic based on the compressibility.
2646  * It may return both false positives and false negatives.
2647  */
ZSTD_maybeRLE(seqStore_t const * seqStore)2648 static int ZSTD_maybeRLE(seqStore_t const* seqStore)
2649 {
2650     size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
2651     size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
2652 
2653     return nbSeqs < 4 && nbLits < 10;
2654 }
2655 
ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx * zc)2656 static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
2657 {
2658     ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2659     zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2660     zc->blockState.nextCBlock = tmp;
2661 }
2662 
ZSTD_compressBlock_internal(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame)2663 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2664                                         void* dst, size_t dstCapacity,
2665                                         const void* src, size_t srcSize, U32 frame)
2666 {
2667     /* This the upper bound for the length of an rle block.
2668      * This isn't the actual upper bound. Finding the real threshold
2669      * needs further investigation.
2670      */
2671     const U32 rleMaxLength = 25;
2672     size_t cSize;
2673     const BYTE* ip = (const BYTE*)src;
2674     BYTE* op = (BYTE*)dst;
2675     DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2676                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
2677                 (unsigned)zc->blockState.matchState.nextToUpdate);
2678 
2679     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
2680         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
2681         if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
2682     }
2683 
2684     if (zc->seqCollector.collectSequences) {
2685         ZSTD_copyBlockSequences(zc);
2686         ZSTD_confirmRepcodesAndEntropyTables(zc);
2687         return 0;
2688     }
2689 
2690     /* encode sequences and literals */
2691     cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
2692             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2693             &zc->appliedParams,
2694             dst, dstCapacity,
2695             srcSize,
2696             zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
2697             zc->bmi2);
2698 
2699     if (zc->seqCollector.collectSequences) {
2700         ZSTD_copyBlockSequences(zc);
2701         return 0;
2702     }
2703 
2704 
2705     if (frame &&
2706         /* We don't want to emit our first block as a RLE even if it qualifies because
2707          * doing so will cause the decoder (cli only) to throw a "should consume all input error."
2708          * This is only an issue for zstd <= v1.4.3
2709          */
2710         !zc->isFirstBlock &&
2711         cSize < rleMaxLength &&
2712         ZSTD_isRLE(ip, srcSize))
2713     {
2714         cSize = 1;
2715         op[0] = ip[0];
2716     }
2717 
2718 out:
2719     if (!ZSTD_isError(cSize) && cSize > 1) {
2720         ZSTD_confirmRepcodesAndEntropyTables(zc);
2721     }
2722     /* We check that dictionaries have offset codes available for the first
2723      * block. After the first block, the offcode table might not have large
2724      * enough codes to represent the offsets in the data.
2725      */
2726     if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2727         zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2728 
2729     return cSize;
2730 }
2731 
ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const size_t bss,U32 lastBlock)2732 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
2733                                void* dst, size_t dstCapacity,
2734                                const void* src, size_t srcSize,
2735                                const size_t bss, U32 lastBlock)
2736 {
2737     DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
2738     if (bss == ZSTDbss_compress) {
2739         if (/* We don't want to emit our first block as a RLE even if it qualifies because
2740             * doing so will cause the decoder (cli only) to throw a "should consume all input error."
2741             * This is only an issue for zstd <= v1.4.3
2742             */
2743             !zc->isFirstBlock &&
2744             ZSTD_maybeRLE(&zc->seqStore) &&
2745             ZSTD_isRLE((BYTE const*)src, srcSize))
2746         {
2747             return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
2748         }
2749         /* Attempt superblock compression.
2750          *
2751          * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
2752          * standard ZSTD_compressBound(). This is a problem, because even if we have
2753          * space now, taking an extra byte now could cause us to run out of space later
2754          * and violate ZSTD_compressBound().
2755          *
2756          * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
2757          *
2758          * In order to respect ZSTD_compressBound() we must attempt to emit a raw
2759          * uncompressed block in these cases:
2760          *   * cSize == 0: Return code for an uncompressed block.
2761          *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
2762          *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
2763          *     output space.
2764          *   * cSize >= blockBound(srcSize): We have expanded the block too much so
2765          *     emit an uncompressed block.
2766          */
2767         {
2768             size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
2769             if (cSize != ERROR(dstSize_tooSmall)) {
2770                 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
2771                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
2772                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
2773                     ZSTD_confirmRepcodesAndEntropyTables(zc);
2774                     return cSize;
2775                 }
2776             }
2777         }
2778     }
2779 
2780     DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
2781     /* Superblock compression failed, attempt to emit a single no compress block.
2782      * The decoder will be able to stream this block since it is uncompressed.
2783      */
2784     return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
2785 }
2786 
ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)2787 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
2788                                void* dst, size_t dstCapacity,
2789                                const void* src, size_t srcSize,
2790                                U32 lastBlock)
2791 {
2792     size_t cSize = 0;
2793     const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
2794     DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
2795                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
2796     FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
2797 
2798     cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
2799     FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
2800 
2801     if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2802         zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2803 
2804     return cSize;
2805 }
2806 
ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,void const * ip,void const * iend)2807 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2808                                          ZSTD_cwksp* ws,
2809                                          ZSTD_CCtx_params const* params,
2810                                          void const* ip,
2811                                          void const* iend)
2812 {
2813     if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2814         U32 const maxDist = (U32)1 << params->cParams.windowLog;
2815         U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
2816         U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2817         ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2818         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2819         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2820         ZSTD_cwksp_mark_tables_dirty(ws);
2821         ZSTD_reduceIndex(ms, params, correction);
2822         ZSTD_cwksp_mark_tables_clean(ws);
2823         if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2824         else ms->nextToUpdate -= correction;
2825         /* invalidate dictionaries on overflow correction */
2826         ms->loadedDictEnd = 0;
2827         ms->dictMatchState = NULL;
2828     }
2829 }
2830 
2831 /*! ZSTD_compress_frameChunk() :
2832 *   Compress a chunk of data into one or multiple blocks.
2833 *   All blocks will be terminated, all input will be consumed.
2834 *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2835 *   Frame is supposed already started (header already produced)
2836 *   @return : compressed size, or an error code
2837 */
ZSTD_compress_frameChunk(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastFrameChunk)2838 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2839                                      void* dst, size_t dstCapacity,
2840                                const void* src, size_t srcSize,
2841                                      U32 lastFrameChunk)
2842 {
2843     size_t blockSize = cctx->blockSize;
2844     size_t remaining = srcSize;
2845     const BYTE* ip = (const BYTE*)src;
2846     BYTE* const ostart = (BYTE*)dst;
2847     BYTE* op = ostart;
2848     U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
2849 
2850     assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
2851 
2852     DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
2853     if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2854         XXH64_update(&cctx->xxhState, src, srcSize);
2855 
2856     while (remaining) {
2857         ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2858         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2859 
2860         RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
2861                         dstSize_tooSmall,
2862                         "not enough space to store compressed block");
2863         if (remaining < blockSize) blockSize = remaining;
2864 
2865         ZSTD_overflowCorrectIfNeeded(
2866             ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
2867         ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2868 
2869         /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
2870         if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2871 
2872         {   size_t cSize;
2873             if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
2874                 cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
2875                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
2876                 assert(cSize > 0);
2877                 assert(cSize <= blockSize + ZSTD_blockHeaderSize);
2878             } else {
2879                 cSize = ZSTD_compressBlock_internal(cctx,
2880                                         op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
2881                                         ip, blockSize, 1 /* frame */);
2882                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
2883 
2884                 if (cSize == 0) {  /* block is not compressible */
2885                     cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2886                     FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
2887                 } else {
2888                     U32 const cBlockHeader = cSize == 1 ?
2889                         lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
2890                         lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2891                     MEM_writeLE24(op, cBlockHeader);
2892                     cSize += ZSTD_blockHeaderSize;
2893                 }
2894             }
2895 
2896 
2897             ip += blockSize;
2898             assert(remaining >= blockSize);
2899             remaining -= blockSize;
2900             op += cSize;
2901             assert(dstCapacity >= cSize);
2902             dstCapacity -= cSize;
2903             cctx->isFirstBlock = 0;
2904             DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
2905                         (unsigned)cSize);
2906     }   }
2907 
2908     if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2909     return (size_t)(op-ostart);
2910 }
2911 
2912 
ZSTD_writeFrameHeader(void * dst,size_t dstCapacity,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,U32 dictID)2913 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2914                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
2915 {   BYTE* const op = (BYTE*)dst;
2916     U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
2917     U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
2918     U32   const checksumFlag = params->fParams.checksumFlag>0;
2919     U32   const windowSize = (U32)1 << params->cParams.windowLog;
2920     U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2921     BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2922     U32   const fcsCode = params->fParams.contentSizeFlag ?
2923                      (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
2924     BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2925     size_t pos=0;
2926 
2927     assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2928     RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
2929                     "dst buf is too small to fit worst-case frame header size.");
2930     DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2931                 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
2932     if (params->format == ZSTD_f_zstd1) {
2933         MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2934         pos = 4;
2935     }
2936     op[pos++] = frameHeaderDescriptionByte;
2937     if (!singleSegment) op[pos++] = windowLogByte;
2938     switch(dictIDSizeCode)
2939     {
2940         default:  assert(0); /* impossible */
2941         case 0 : break;
2942         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2943         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2944         case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2945     }
2946     switch(fcsCode)
2947     {
2948         default:  assert(0); /* impossible */
2949         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2950         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2951         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2952         case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2953     }
2954     return pos;
2955 }
2956 
2957 /* ZSTD_writeLastEmptyBlock() :
2958  * output an empty Block with end-of-frame mark to complete a frame
2959  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
2960  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
2961  */
ZSTD_writeLastEmptyBlock(void * dst,size_t dstCapacity)2962 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
2963 {
2964     RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
2965                     "dst buf is too small to write frame trailer empty block.");
2966     {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
2967         MEM_writeLE24(dst, cBlockHeader24);
2968         return ZSTD_blockHeaderSize;
2969     }
2970 }
2971 
ZSTD_referenceExternalSequences(ZSTD_CCtx * cctx,rawSeq * seq,size_t nbSeq)2972 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
2973 {
2974     RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
2975                     "wrong cctx stage");
2976     RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
2977                     parameter_unsupported,
2978                     "incompatible with ldm");
2979     cctx->externSeqStore.seq = seq;
2980     cctx->externSeqStore.size = nbSeq;
2981     cctx->externSeqStore.capacity = nbSeq;
2982     cctx->externSeqStore.pos = 0;
2983     cctx->externSeqStore.posInSequence = 0;
2984     return 0;
2985 }
2986 
2987 
ZSTD_compressContinue_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame,U32 lastFrameChunk)2988 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2989                               void* dst, size_t dstCapacity,
2990                         const void* src, size_t srcSize,
2991                                U32 frame, U32 lastFrameChunk)
2992 {
2993     ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2994     size_t fhSize = 0;
2995 
2996     DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
2997                 cctx->stage, (unsigned)srcSize);
2998     RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
2999                     "missing init (ZSTD_compressBegin)");
3000 
3001     if (frame && (cctx->stage==ZSTDcs_init)) {
3002         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
3003                                        cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
3004         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
3005         assert(fhSize <= dstCapacity);
3006         dstCapacity -= fhSize;
3007         dst = (char*)dst + fhSize;
3008         cctx->stage = ZSTDcs_ongoing;
3009     }
3010 
3011     if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
3012 
3013     if (!ZSTD_window_update(&ms->window, src, srcSize)) {
3014         ms->nextToUpdate = ms->window.dictLimit;
3015     }
3016     if (cctx->appliedParams.ldmParams.enableLdm) {
3017         ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
3018     }
3019 
3020     if (!frame) {
3021         /* overflow check and correction for block mode */
3022         ZSTD_overflowCorrectIfNeeded(
3023             ms, &cctx->workspace, &cctx->appliedParams,
3024             src, (BYTE const*)src + srcSize);
3025     }
3026 
3027     DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
3028     {   size_t const cSize = frame ?
3029                              ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
3030                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
3031         FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
3032         cctx->consumedSrcSize += srcSize;
3033         cctx->producedCSize += (cSize + fhSize);
3034         assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3035         if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
3036             ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
3037             RETURN_ERROR_IF(
3038                 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
3039                 srcSize_wrong,
3040                 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
3041                 (unsigned)cctx->pledgedSrcSizePlusOne-1,
3042                 (unsigned)cctx->consumedSrcSize);
3043         }
3044         return cSize + fhSize;
3045     }
3046 }
3047 
ZSTD_compressContinue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)3048 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
3049                               void* dst, size_t dstCapacity,
3050                         const void* src, size_t srcSize)
3051 {
3052     DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
3053     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
3054 }
3055 
3056 
ZSTD_getBlockSize(const ZSTD_CCtx * cctx)3057 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
3058 {
3059     ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
3060     assert(!ZSTD_checkCParams(cParams));
3061     return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
3062 }
3063 
ZSTD_compressBlock(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)3064 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3065 {
3066     DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
3067     { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
3068       RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
3069 
3070     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
3071 }
3072 
3073 /*! ZSTD_loadDictionaryContent() :
3074  *  @return : 0, or an error code
3075  */
ZSTD_loadDictionaryContent(ZSTD_matchState_t * ms,ldmState_t * ls,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * src,size_t srcSize,ZSTD_dictTableLoadMethod_e dtlm)3076 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3077                                          ldmState_t* ls,
3078                                          ZSTD_cwksp* ws,
3079                                          ZSTD_CCtx_params const* params,
3080                                          const void* src, size_t srcSize,
3081                                          ZSTD_dictTableLoadMethod_e dtlm)
3082 {
3083     const BYTE* ip = (const BYTE*) src;
3084     const BYTE* const iend = ip + srcSize;
3085 
3086     ZSTD_window_update(&ms->window, src, srcSize);
3087     ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
3088 
3089     if (params->ldmParams.enableLdm && ls != NULL) {
3090         ZSTD_window_update(&ls->window, src, srcSize);
3091         ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
3092     }
3093 
3094     /* Assert that we the ms params match the params we're being given */
3095     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3096 
3097     if (srcSize <= HASH_READ_SIZE) return 0;
3098 
3099     while (iend - ip > HASH_READ_SIZE) {
3100         size_t const remaining = (size_t)(iend - ip);
3101         size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
3102         const BYTE* const ichunk = ip + chunk;
3103 
3104         ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
3105 
3106         if (params->ldmParams.enableLdm && ls != NULL)
3107             ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
3108 
3109         switch(params->cParams.strategy)
3110         {
3111         case ZSTD_fast:
3112             ZSTD_fillHashTable(ms, ichunk, dtlm);
3113             break;
3114         case ZSTD_dfast:
3115             ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
3116             break;
3117 
3118         case ZSTD_greedy:
3119         case ZSTD_lazy:
3120         case ZSTD_lazy2:
3121             if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
3122                 assert(chunk == remaining); /* must load everything in one go */
3123                 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
3124             } else if (chunk >= HASH_READ_SIZE) {
3125                 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
3126             }
3127             break;
3128 
3129         case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
3130         case ZSTD_btopt:
3131         case ZSTD_btultra:
3132         case ZSTD_btultra2:
3133             if (chunk >= HASH_READ_SIZE)
3134                 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
3135             break;
3136 
3137         default:
3138             assert(0);  /* not possible : not a valid strategy id */
3139         }
3140 
3141         ip = ichunk;
3142     }
3143 
3144     ms->nextToUpdate = (U32)(iend - ms->window.base);
3145     return 0;
3146 }
3147 
3148 
3149 /* Dictionaries that assign zero probability to symbols that show up causes problems
3150  * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
3151  * and only dictionaries with 100% valid symbols can be assumed valid.
3152  */
ZSTD_dictNCountRepeat(short * normalizedCounter,unsigned dictMaxSymbolValue,unsigned maxSymbolValue)3153 static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
3154 {
3155     U32 s;
3156     if (dictMaxSymbolValue < maxSymbolValue) {
3157         return FSE_repeat_check;
3158     }
3159     for (s = 0; s <= maxSymbolValue; ++s) {
3160         if (normalizedCounter[s] == 0) {
3161             return FSE_repeat_check;
3162         }
3163     }
3164     return FSE_repeat_valid;
3165 }
3166 
ZSTD_loadCEntropy(ZSTD_compressedBlockState_t * bs,void * workspace,const void * const dict,size_t dictSize)3167 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
3168                          const void* const dict, size_t dictSize)
3169 {
3170     short offcodeNCount[MaxOff+1];
3171     unsigned offcodeMaxValue = MaxOff;
3172     const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
3173     const BYTE* const dictEnd = dictPtr + dictSize;
3174     dictPtr += 8;
3175     bs->entropy.huf.repeatMode = HUF_repeat_check;
3176 
3177     {   unsigned maxSymbolValue = 255;
3178         unsigned hasZeroWeights = 1;
3179         size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
3180             dictEnd-dictPtr, &hasZeroWeights);
3181 
3182         /* We only set the loaded table as valid if it contains all non-zero
3183          * weights. Otherwise, we set it to check */
3184         if (!hasZeroWeights)
3185             bs->entropy.huf.repeatMode = HUF_repeat_valid;
3186 
3187         RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
3188         RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
3189         dictPtr += hufHeaderSize;
3190     }
3191 
3192     {   unsigned offcodeLog;
3193         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
3194         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
3195         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
3196         /* fill all offset symbols to avoid garbage at end of table */
3197         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3198                 bs->entropy.fse.offcodeCTable,
3199                 offcodeNCount, MaxOff, offcodeLog,
3200                 workspace, HUF_WORKSPACE_SIZE)),
3201             dictionary_corrupted, "");
3202         /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
3203         dictPtr += offcodeHeaderSize;
3204     }
3205 
3206     {   short matchlengthNCount[MaxML+1];
3207         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
3208         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
3209         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
3210         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
3211         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3212                 bs->entropy.fse.matchlengthCTable,
3213                 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
3214                 workspace, HUF_WORKSPACE_SIZE)),
3215             dictionary_corrupted, "");
3216         bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
3217         dictPtr += matchlengthHeaderSize;
3218     }
3219 
3220     {   short litlengthNCount[MaxLL+1];
3221         unsigned litlengthMaxValue = MaxLL, litlengthLog;
3222         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
3223         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
3224         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
3225         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3226                 bs->entropy.fse.litlengthCTable,
3227                 litlengthNCount, litlengthMaxValue, litlengthLog,
3228                 workspace, HUF_WORKSPACE_SIZE)),
3229             dictionary_corrupted, "");
3230         bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
3231         dictPtr += litlengthHeaderSize;
3232     }
3233 
3234     RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
3235     bs->rep[0] = MEM_readLE32(dictPtr+0);
3236     bs->rep[1] = MEM_readLE32(dictPtr+4);
3237     bs->rep[2] = MEM_readLE32(dictPtr+8);
3238     dictPtr += 12;
3239 
3240     {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
3241         U32 offcodeMax = MaxOff;
3242         if (dictContentSize <= ((U32)-1) - 128 KB) {
3243             U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
3244             offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
3245         }
3246         /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
3247         bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
3248 
3249         /* All repCodes must be <= dictContentSize and != 0 */
3250         {   U32 u;
3251             for (u=0; u<3; u++) {
3252                 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
3253                 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
3254     }   }   }
3255 
3256     return dictPtr - (const BYTE*)dict;
3257 }
3258 
3259 /* Dictionary format :
3260  * See :
3261  * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
3262  */
3263 /*! ZSTD_loadZstdDictionary() :
3264  * @return : dictID, or an error code
3265  *  assumptions : magic number supposed already checked
3266  *                dictSize supposed >= 8
3267  */
ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * dict,size_t dictSize,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)3268 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
3269                                       ZSTD_matchState_t* ms,
3270                                       ZSTD_cwksp* ws,
3271                                       ZSTD_CCtx_params const* params,
3272                                       const void* dict, size_t dictSize,
3273                                       ZSTD_dictTableLoadMethod_e dtlm,
3274                                       void* workspace)
3275 {
3276     const BYTE* dictPtr = (const BYTE*)dict;
3277     const BYTE* const dictEnd = dictPtr + dictSize;
3278     size_t dictID;
3279     size_t eSize;
3280 
3281     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
3282     assert(dictSize >= 8);
3283     assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
3284 
3285     dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
3286     eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
3287     FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
3288     dictPtr += eSize;
3289 
3290     {
3291         size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
3292         FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
3293             ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
3294     }
3295     return dictID;
3296 }
3297 
3298 /** ZSTD_compress_insertDictionary() :
3299 *   @return : dictID, or an error code */
3300 static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ldmState_t * ls,ZSTD_cwksp * ws,const ZSTD_CCtx_params * params,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)3301 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
3302                                ZSTD_matchState_t* ms,
3303                                ldmState_t* ls,
3304                                ZSTD_cwksp* ws,
3305                          const ZSTD_CCtx_params* params,
3306                          const void* dict, size_t dictSize,
3307                                ZSTD_dictContentType_e dictContentType,
3308                                ZSTD_dictTableLoadMethod_e dtlm,
3309                                void* workspace)
3310 {
3311     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
3312     if ((dict==NULL) || (dictSize<8)) {
3313         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
3314         return 0;
3315     }
3316 
3317     ZSTD_reset_compressedBlockState(bs);
3318 
3319     /* dict restricted modes */
3320     if (dictContentType == ZSTD_dct_rawContent)
3321         return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
3322 
3323     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
3324         if (dictContentType == ZSTD_dct_auto) {
3325             DEBUGLOG(4, "raw content dictionary detected");
3326             return ZSTD_loadDictionaryContent(
3327                 ms, ls, ws, params, dict, dictSize, dtlm);
3328         }
3329         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
3330         assert(0);   /* impossible */
3331     }
3332 
3333     /* dict as full zstd dictionary */
3334     return ZSTD_loadZstdDictionary(
3335         bs, ms, ws, params, dict, dictSize, dtlm, workspace);
3336 }
3337 
3338 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
3339 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
3340 
3341 /*! ZSTD_compressBegin_internal() :
3342  * @return : 0, or an error code */
ZSTD_compressBegin_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)3343 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3344                                     const void* dict, size_t dictSize,
3345                                     ZSTD_dictContentType_e dictContentType,
3346                                     ZSTD_dictTableLoadMethod_e dtlm,
3347                                     const ZSTD_CDict* cdict,
3348                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
3349                                     ZSTD_buffered_policy_e zbuff)
3350 {
3351     DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
3352     /* params are supposed to be fully validated at this point */
3353     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
3354     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
3355     if ( (cdict)
3356       && (cdict->dictContentSize > 0)
3357       && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
3358         || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
3359         || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
3360         || cdict->compressionLevel == 0)
3361       && (params->attachDictPref != ZSTD_dictForceLoad) ) {
3362         return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
3363     }
3364 
3365     FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
3366                                      ZSTDcrp_makeClean, zbuff) , "");
3367     {   size_t const dictID = cdict ?
3368                 ZSTD_compress_insertDictionary(
3369                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
3370                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
3371                         cdict->dictContentSize, cdict->dictContentType, dtlm,
3372                         cctx->entropyWorkspace)
3373               : ZSTD_compress_insertDictionary(
3374                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
3375                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
3376                         dictContentType, dtlm, cctx->entropyWorkspace);
3377         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
3378         assert(dictID <= UINT_MAX);
3379         cctx->dictID = (U32)dictID;
3380     }
3381     return 0;
3382 }
3383 
ZSTD_compressBegin_advanced_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)3384 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
3385                                     const void* dict, size_t dictSize,
3386                                     ZSTD_dictContentType_e dictContentType,
3387                                     ZSTD_dictTableLoadMethod_e dtlm,
3388                                     const ZSTD_CDict* cdict,
3389                                     const ZSTD_CCtx_params* params,
3390                                     unsigned long long pledgedSrcSize)
3391 {
3392     DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
3393     /* compression parameters verification and optimization */
3394     FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
3395     return ZSTD_compressBegin_internal(cctx,
3396                                        dict, dictSize, dictContentType, dtlm,
3397                                        cdict,
3398                                        params, pledgedSrcSize,
3399                                        ZSTDb_not_buffered);
3400 }
3401 
3402 /*! ZSTD_compressBegin_advanced() :
3403 *   @return : 0, or an error code */
ZSTD_compressBegin_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pledgedSrcSize)3404 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
3405                              const void* dict, size_t dictSize,
3406                                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
3407 {
3408     ZSTD_CCtx_params const cctxParams =
3409             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
3410     return ZSTD_compressBegin_advanced_internal(cctx,
3411                                             dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
3412                                             NULL /*cdict*/,
3413                                             &cctxParams, pledgedSrcSize);
3414 }
3415 
ZSTD_compressBegin_usingDict(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,int compressionLevel)3416 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
3417 {
3418     ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
3419     ZSTD_CCtx_params const cctxParams =
3420             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
3421     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
3422     return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3423                                        &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3424 }
3425 
ZSTD_compressBegin(ZSTD_CCtx * cctx,int compressionLevel)3426 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
3427 {
3428     return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
3429 }
3430 
3431 
3432 /*! ZSTD_writeEpilogue() :
3433 *   Ends a frame.
3434 *   @return : nb of bytes written into dst (or an error code) */
ZSTD_writeEpilogue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity)3435 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
3436 {
3437     BYTE* const ostart = (BYTE*)dst;
3438     BYTE* op = ostart;
3439     size_t fhSize = 0;
3440 
3441     DEBUGLOG(4, "ZSTD_writeEpilogue");
3442     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
3443 
3444     /* special case : empty frame */
3445     if (cctx->stage == ZSTDcs_init) {
3446         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
3447         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
3448         dstCapacity -= fhSize;
3449         op += fhSize;
3450         cctx->stage = ZSTDcs_ongoing;
3451     }
3452 
3453     if (cctx->stage != ZSTDcs_ending) {
3454         /* write one last empty block, make it the "last" block */
3455         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
3456         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
3457         MEM_writeLE32(op, cBlockHeader24);
3458         op += ZSTD_blockHeaderSize;
3459         dstCapacity -= ZSTD_blockHeaderSize;
3460     }
3461 
3462     if (cctx->appliedParams.fParams.checksumFlag) {
3463         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3464         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
3465         DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
3466         MEM_writeLE32(op, checksum);
3467         op += 4;
3468     }
3469 
3470     cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
3471     return op-ostart;
3472 }
3473 
ZSTD_compressEnd(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)3474 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
3475                          void* dst, size_t dstCapacity,
3476                    const void* src, size_t srcSize)
3477 {
3478     size_t endResult;
3479     size_t const cSize = ZSTD_compressContinue_internal(cctx,
3480                                 dst, dstCapacity, src, srcSize,
3481                                 1 /* frame mode */, 1 /* last chunk */);
3482     FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
3483     endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
3484     FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
3485     assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3486     if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
3487         ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
3488         DEBUGLOG(4, "end of frame : controlling src size");
3489         RETURN_ERROR_IF(
3490             cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
3491             srcSize_wrong,
3492              "error : pledgedSrcSize = %u, while realSrcSize = %u",
3493             (unsigned)cctx->pledgedSrcSizePlusOne-1,
3494             (unsigned)cctx->consumedSrcSize);
3495     }
3496     return cSize + endResult;
3497 }
3498 
ZSTD_compress_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,const ZSTD_parameters * params)3499 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
3500                                       void* dst, size_t dstCapacity,
3501                                 const void* src, size_t srcSize,
3502                                 const void* dict,size_t dictSize,
3503                                 const ZSTD_parameters* params)
3504 {
3505     ZSTD_CCtx_params const cctxParams =
3506             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
3507     DEBUGLOG(4, "ZSTD_compress_internal");
3508     return ZSTD_compress_advanced_internal(cctx,
3509                                            dst, dstCapacity,
3510                                            src, srcSize,
3511                                            dict, dictSize,
3512                                            &cctxParams);
3513 }
3514 
ZSTD_compress_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)3515 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
3516                                void* dst, size_t dstCapacity,
3517                          const void* src, size_t srcSize,
3518                          const void* dict,size_t dictSize,
3519                                ZSTD_parameters params)
3520 {
3521     DEBUGLOG(4, "ZSTD_compress_advanced");
3522     FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
3523     return ZSTD_compress_internal(cctx,
3524                                   dst, dstCapacity,
3525                                   src, srcSize,
3526                                   dict, dictSize,
3527                                   &params);
3528 }
3529 
3530 /* Internal */
ZSTD_compress_advanced_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,const ZSTD_CCtx_params * params)3531 size_t ZSTD_compress_advanced_internal(
3532         ZSTD_CCtx* cctx,
3533         void* dst, size_t dstCapacity,
3534         const void* src, size_t srcSize,
3535         const void* dict,size_t dictSize,
3536         const ZSTD_CCtx_params* params)
3537 {
3538     DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
3539     FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3540                          dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3541                          params, srcSize, ZSTDb_not_buffered) , "");
3542     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3543 }
3544 
ZSTD_compress_usingDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel)3545 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3546                                void* dst, size_t dstCapacity,
3547                          const void* src, size_t srcSize,
3548                          const void* dict, size_t dictSize,
3549                                int compressionLevel)
3550 {
3551     ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
3552     ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
3553     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
3554     assert(params.fParams.contentSizeFlag == 1);
3555     return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
3556 }
3557 
ZSTD_compressCCtx(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)3558 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3559                          void* dst, size_t dstCapacity,
3560                    const void* src, size_t srcSize,
3561                          int compressionLevel)
3562 {
3563     DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
3564     assert(cctx != NULL);
3565     return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3566 }
3567 
ZSTD_compress(void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)3568 size_t ZSTD_compress(void* dst, size_t dstCapacity,
3569                const void* src, size_t srcSize,
3570                      int compressionLevel)
3571 {
3572     size_t result;
3573 #if ZSTD_COMPRESS_HEAPMODE
3574     ZSTD_CCtx* cctx = ZSTD_createCCtx();
3575     RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
3576     result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
3577     ZSTD_freeCCtx(cctx);
3578 #else
3579     ZSTD_CCtx ctxBody;
3580     ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
3581     result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
3582     ZSTD_freeCCtxContent(&ctxBody);   /* can't free ctxBody itself, as it's on stack; free only heap content */
3583 #endif
3584     return result;
3585 }
3586 
3587 
3588 /* =====  Dictionary API  ===== */
3589 
3590 /*! ZSTD_estimateCDictSize_advanced() :
3591  *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
ZSTD_estimateCDictSize_advanced(size_t dictSize,ZSTD_compressionParameters cParams,ZSTD_dictLoadMethod_e dictLoadMethod)3592 size_t ZSTD_estimateCDictSize_advanced(
3593         size_t dictSize, ZSTD_compressionParameters cParams,
3594         ZSTD_dictLoadMethod_e dictLoadMethod)
3595 {
3596     DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3597     return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3598          + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3599          + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3600          + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3601             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
3602 }
3603 
ZSTD_estimateCDictSize(size_t dictSize,int compressionLevel)3604 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
3605 {
3606     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3607     return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
3608 }
3609 
ZSTD_sizeof_CDict(const ZSTD_CDict * cdict)3610 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3611 {
3612     if (cdict==NULL) return 0;   /* support sizeof on NULL */
3613     DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
3614     /* cdict may be in the workspace */
3615     return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
3616         + ZSTD_cwksp_sizeof(&cdict->workspace);
3617 }
3618 
ZSTD_initCDict_internal(ZSTD_CDict * cdict,const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_CCtx_params params)3619 static size_t ZSTD_initCDict_internal(
3620                     ZSTD_CDict* cdict,
3621               const void* dictBuffer, size_t dictSize,
3622                     ZSTD_dictLoadMethod_e dictLoadMethod,
3623                     ZSTD_dictContentType_e dictContentType,
3624                     ZSTD_CCtx_params params)
3625 {
3626     DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
3627     assert(!ZSTD_checkCParams(params.cParams));
3628     cdict->matchState.cParams = params.cParams;
3629     cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
3630     if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
3631         cdict->matchState.dedicatedDictSearch = 0;
3632     }
3633     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3634         cdict->dictContent = dictBuffer;
3635     } else {
3636          void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
3637         RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
3638         cdict->dictContent = internalBuffer;
3639         ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
3640     }
3641     cdict->dictContentSize = dictSize;
3642     cdict->dictContentType = dictContentType;
3643 
3644     cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
3645 
3646 
3647     /* Reset the state to no dictionary */
3648     ZSTD_reset_compressedBlockState(&cdict->cBlockState);
3649     FORWARD_IF_ERROR(ZSTD_reset_matchState(
3650         &cdict->matchState,
3651         &cdict->workspace,
3652         &params.cParams,
3653         ZSTDcrp_makeClean,
3654         ZSTDirp_reset,
3655         ZSTD_resetTarget_CDict), "");
3656     /* (Maybe) load the dictionary
3657      * Skips loading the dictionary if it is < 8 bytes.
3658      */
3659     {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
3660         params.fParams.contentSizeFlag = 1;
3661         {   size_t const dictID = ZSTD_compress_insertDictionary(
3662                     &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
3663                     &params, cdict->dictContent, cdict->dictContentSize,
3664                     dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
3665             FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
3666             assert(dictID <= (size_t)(U32)-1);
3667             cdict->dictID = (U32)dictID;
3668         }
3669     }
3670 
3671     return 0;
3672 }
3673 
ZSTD_createCDict_advanced_internal(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_compressionParameters cParams,ZSTD_customMem customMem)3674 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
3675                                       ZSTD_dictLoadMethod_e dictLoadMethod,
3676                                       ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
3677 {
3678     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
3679 
3680     {   size_t const workspaceSize =
3681             ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
3682             ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
3683             ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
3684             (dictLoadMethod == ZSTD_dlm_byRef ? 0
3685              : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
3686         void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
3687         ZSTD_cwksp ws;
3688         ZSTD_CDict* cdict;
3689 
3690         if (!workspace) {
3691             ZSTD_customFree(workspace, customMem);
3692             return NULL;
3693         }
3694 
3695         ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
3696 
3697         cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3698         assert(cdict != NULL);
3699         ZSTD_cwksp_move(&cdict->workspace, &ws);
3700         cdict->customMem = customMem;
3701         cdict->compressionLevel = 0; /* signals advanced API usage */
3702 
3703         return cdict;
3704     }
3705 }
3706 
ZSTD_createCDict_advanced(const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams,ZSTD_customMem customMem)3707 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
3708                                       ZSTD_dictLoadMethod_e dictLoadMethod,
3709                                       ZSTD_dictContentType_e dictContentType,
3710                                       ZSTD_compressionParameters cParams,
3711                                       ZSTD_customMem customMem)
3712 {
3713     ZSTD_CCtx_params cctxParams;
3714     ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
3715     ZSTD_CCtxParams_init(&cctxParams, 0);
3716     cctxParams.cParams = cParams;
3717     cctxParams.customMem = customMem;
3718     return ZSTD_createCDict_advanced2(
3719         dictBuffer, dictSize,
3720         dictLoadMethod, dictContentType,
3721         &cctxParams, customMem);
3722 }
3723 
ZSTD_createCDict_advanced2(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,const ZSTD_CCtx_params * originalCctxParams,ZSTD_customMem customMem)3724 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
3725         const void* dict, size_t dictSize,
3726         ZSTD_dictLoadMethod_e dictLoadMethod,
3727         ZSTD_dictContentType_e dictContentType,
3728         const ZSTD_CCtx_params* originalCctxParams,
3729         ZSTD_customMem customMem)
3730 {
3731     ZSTD_CCtx_params cctxParams = *originalCctxParams;
3732     ZSTD_compressionParameters cParams;
3733     ZSTD_CDict* cdict;
3734 
3735     DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
3736     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3737 
3738     if (cctxParams.enableDedicatedDictSearch) {
3739         cParams = ZSTD_dedicatedDictSearch_getCParams(
3740             cctxParams.compressionLevel, dictSize);
3741         ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
3742     } else {
3743         cParams = ZSTD_getCParamsFromCCtxParams(
3744             &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3745     }
3746 
3747     if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
3748         /* Fall back to non-DDSS params */
3749         cctxParams.enableDedicatedDictSearch = 0;
3750         cParams = ZSTD_getCParamsFromCCtxParams(
3751             &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3752     }
3753 
3754     cctxParams.cParams = cParams;
3755 
3756     cdict = ZSTD_createCDict_advanced_internal(dictSize,
3757                         dictLoadMethod, cctxParams.cParams,
3758                         customMem);
3759 
3760     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3761                                     dict, dictSize,
3762                                     dictLoadMethod, dictContentType,
3763                                     cctxParams) )) {
3764         ZSTD_freeCDict(cdict);
3765         return NULL;
3766     }
3767 
3768     return cdict;
3769 }
3770 
ZSTD_createCDict(const void * dict,size_t dictSize,int compressionLevel)3771 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3772 {
3773     ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3774     ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
3775                                                   ZSTD_dlm_byCopy, ZSTD_dct_auto,
3776                                                   cParams, ZSTD_defaultCMem);
3777     if (cdict)
3778         cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
3779     return cdict;
3780 }
3781 
ZSTD_createCDict_byReference(const void * dict,size_t dictSize,int compressionLevel)3782 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
3783 {
3784     ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3785     ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
3786                                      ZSTD_dlm_byRef, ZSTD_dct_auto,
3787                                      cParams, ZSTD_defaultCMem);
3788     if (cdict)
3789         cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
3790     return cdict;
3791 }
3792 
ZSTD_freeCDict(ZSTD_CDict * cdict)3793 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3794 {
3795     if (cdict==NULL) return 0;   /* support free on NULL */
3796     {   ZSTD_customMem const cMem = cdict->customMem;
3797         int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
3798         ZSTD_cwksp_free(&cdict->workspace, cMem);
3799         if (!cdictInWorkspace) {
3800             ZSTD_customFree(cdict, cMem);
3801         }
3802         return 0;
3803     }
3804 }
3805 
3806 /*! ZSTD_initStaticCDict_advanced() :
3807  *  Generate a digested dictionary in provided memory area.
3808  *  workspace: The memory area to emplace the dictionary into.
3809  *             Provided pointer must 8-bytes aligned.
3810  *             It must outlive dictionary usage.
3811  *  workspaceSize: Use ZSTD_estimateCDictSize()
3812  *                 to determine how large workspace must be.
3813  *  cParams : use ZSTD_getCParams() to transform a compression level
3814  *            into its relevants cParams.
3815  * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
3816  *  Note : there is no corresponding "free" function.
3817  *         Since workspace was allocated externally, it must be freed externally.
3818  */
ZSTD_initStaticCDict(void * workspace,size_t workspaceSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams)3819 const ZSTD_CDict* ZSTD_initStaticCDict(
3820                                  void* workspace, size_t workspaceSize,
3821                            const void* dict, size_t dictSize,
3822                                  ZSTD_dictLoadMethod_e dictLoadMethod,
3823                                  ZSTD_dictContentType_e dictContentType,
3824                                  ZSTD_compressionParameters cParams)
3825 {
3826     size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3827     size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3828                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3829                                : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
3830                             + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3831                             + matchStateSize;
3832     ZSTD_CDict* cdict;
3833     ZSTD_CCtx_params params;
3834 
3835     if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
3836 
3837     {
3838         ZSTD_cwksp ws;
3839         ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
3840         cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3841         if (cdict == NULL) return NULL;
3842         ZSTD_cwksp_move(&cdict->workspace, &ws);
3843     }
3844 
3845     DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
3846         (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
3847     if (workspaceSize < neededSize) return NULL;
3848 
3849     ZSTD_CCtxParams_init(&params, 0);
3850     params.cParams = cParams;
3851 
3852     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3853                                               dict, dictSize,
3854                                               dictLoadMethod, dictContentType,
3855                                               params) ))
3856         return NULL;
3857 
3858     return cdict;
3859 }
3860 
ZSTD_getCParamsFromCDict(const ZSTD_CDict * cdict)3861 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
3862 {
3863     assert(cdict != NULL);
3864     return cdict->matchState.cParams;
3865 }
3866 
3867 /*! ZSTD_getDictID_fromCDict() :
3868  *  Provides the dictID of the dictionary loaded into `cdict`.
3869  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
3870  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromCDict(const ZSTD_CDict * cdict)3871 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
3872 {
3873     if (cdict==NULL) return 0;
3874     return cdict->dictID;
3875 }
3876 
3877 
3878 /* ZSTD_compressBegin_usingCDict_advanced() :
3879  * cdict must be != NULL */
ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx * const cctx,const ZSTD_CDict * const cdict,ZSTD_frameParameters const fParams,unsigned long long const pledgedSrcSize)3880 size_t ZSTD_compressBegin_usingCDict_advanced(
3881     ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3882     ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
3883 {
3884     DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
3885     RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
3886     {   ZSTD_CCtx_params params = cctx->requestedParams;
3887         params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
3888                         || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
3889                         || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
3890                         || cdict->compressionLevel == 0 )
3891                       && (params.attachDictPref != ZSTD_dictForceLoad) ?
3892                 ZSTD_getCParamsFromCDict(cdict)
3893               : ZSTD_getCParams(cdict->compressionLevel,
3894                                 pledgedSrcSize,
3895                                 cdict->dictContentSize);
3896         /* Increase window log to fit the entire dictionary and source if the
3897          * source size is known. Limit the increase to 19, which is the
3898          * window log for compression level 1 with the largest source size.
3899          */
3900         if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
3901             U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
3902             U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
3903             params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
3904         }
3905         params.fParams = fParams;
3906         return ZSTD_compressBegin_internal(cctx,
3907                                            NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
3908                                            cdict,
3909                                            &params, pledgedSrcSize,
3910                                            ZSTDb_not_buffered);
3911     }
3912 }
3913 
3914 /* ZSTD_compressBegin_usingCDict() :
3915  * pledgedSrcSize=0 means "unknown"
3916  * if pledgedSrcSize>0, it will enable contentSizeFlag */
ZSTD_compressBegin_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)3917 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
3918 {
3919     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3920     DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
3921     return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
3922 }
3923 
ZSTD_compress_usingCDict_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams)3924 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3925                                 void* dst, size_t dstCapacity,
3926                                 const void* src, size_t srcSize,
3927                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
3928 {
3929     FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
3930     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3931 }
3932 
3933 /*! ZSTD_compress_usingCDict() :
3934  *  Compression using a digested Dictionary.
3935  *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
3936  *  Note that compression parameters are decided at CDict creation time
3937  *  while frame parameters are hardcoded */
ZSTD_compress_usingCDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict)3938 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3939                                 void* dst, size_t dstCapacity,
3940                                 const void* src, size_t srcSize,
3941                                 const ZSTD_CDict* cdict)
3942 {
3943     ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3944     return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
3945 }
3946 
3947 
3948 
3949 /* ******************************************************************
3950 *  Streaming
3951 ********************************************************************/
3952 
ZSTD_createCStream(void)3953 ZSTD_CStream* ZSTD_createCStream(void)
3954 {
3955     DEBUGLOG(3, "ZSTD_createCStream");
3956     return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
3957 }
3958 
ZSTD_initStaticCStream(void * workspace,size_t workspaceSize)3959 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
3960 {
3961     return ZSTD_initStaticCCtx(workspace, workspaceSize);
3962 }
3963 
ZSTD_createCStream_advanced(ZSTD_customMem customMem)3964 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
3965 {   /* CStream and CCtx are now same object */
3966     return ZSTD_createCCtx_advanced(customMem);
3967 }
3968 
ZSTD_freeCStream(ZSTD_CStream * zcs)3969 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
3970 {
3971     return ZSTD_freeCCtx(zcs);   /* same object */
3972 }
3973 
3974 
3975 
3976 /*======   Initialization   ======*/
3977 
ZSTD_CStreamInSize(void)3978 size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
3979 
ZSTD_CStreamOutSize(void)3980 size_t ZSTD_CStreamOutSize(void)
3981 {
3982     return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
3983 }
3984 
ZSTD_getCParamMode(ZSTD_CDict const * cdict,ZSTD_CCtx_params const * params,U64 pledgedSrcSize)3985 static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
3986 {
3987     if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
3988         return ZSTD_cpm_attachDict;
3989     else
3990         return ZSTD_cpm_noAttachDict;
3991 }
3992 
3993 /* ZSTD_resetCStream():
3994  * pledgedSrcSize == 0 means "unknown" */
ZSTD_resetCStream(ZSTD_CStream * zcs,unsigned long long pss)3995 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
3996 {
3997     /* temporary : 0 interpreted as "unknown" during transition period.
3998      * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
3999      * 0 will be interpreted as "empty" in the future.
4000      */
4001     U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
4002     DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
4003     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4004     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
4005     return 0;
4006 }
4007 
4008 /*! ZSTD_initCStream_internal() :
4009  *  Note : for lib/compress only. Used by zstdmt_compress.c.
4010  *  Assumption 1 : params are valid
4011  *  Assumption 2 : either dict, or cdict, is defined, not both */
ZSTD_initCStream_internal(ZSTD_CStream * zcs,const void * dict,size_t dictSize,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)4012 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
4013                     const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
4014                     const ZSTD_CCtx_params* params,
4015                     unsigned long long pledgedSrcSize)
4016 {
4017     DEBUGLOG(4, "ZSTD_initCStream_internal");
4018     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4019     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
4020     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
4021     zcs->requestedParams = *params;
4022     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
4023     if (dict) {
4024         FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
4025     } else {
4026         /* Dictionary is cleared if !cdict */
4027         FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
4028     }
4029     return 0;
4030 }
4031 
4032 /* ZSTD_initCStream_usingCDict_advanced() :
4033  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream * zcs,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams,unsigned long long pledgedSrcSize)4034 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
4035                                             const ZSTD_CDict* cdict,
4036                                             ZSTD_frameParameters fParams,
4037                                             unsigned long long pledgedSrcSize)
4038 {
4039     DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
4040     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4041     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
4042     zcs->requestedParams.fParams = fParams;
4043     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
4044     return 0;
4045 }
4046 
4047 /* note : cdict must outlive compression session */
ZSTD_initCStream_usingCDict(ZSTD_CStream * zcs,const ZSTD_CDict * cdict)4048 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
4049 {
4050     DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
4051     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4052     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
4053     return 0;
4054 }
4055 
4056 
4057 /* ZSTD_initCStream_advanced() :
4058  * pledgedSrcSize must be exact.
4059  * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
4060  * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
ZSTD_initCStream_advanced(ZSTD_CStream * zcs,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pss)4061 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
4062                                  const void* dict, size_t dictSize,
4063                                  ZSTD_parameters params, unsigned long long pss)
4064 {
4065     /* for compatibility with older programs relying on this behavior.
4066      * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
4067      * This line will be removed in the future.
4068      */
4069     U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
4070     DEBUGLOG(4, "ZSTD_initCStream_advanced");
4071     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4072     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
4073     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
4074     zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, &params);
4075     FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
4076     return 0;
4077 }
4078 
ZSTD_initCStream_usingDict(ZSTD_CStream * zcs,const void * dict,size_t dictSize,int compressionLevel)4079 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
4080 {
4081     DEBUGLOG(4, "ZSTD_initCStream_usingDict");
4082     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4083     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
4084     FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
4085     return 0;
4086 }
4087 
ZSTD_initCStream_srcSize(ZSTD_CStream * zcs,int compressionLevel,unsigned long long pss)4088 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
4089 {
4090     /* temporary : 0 interpreted as "unknown" during transition period.
4091      * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
4092      * 0 will be interpreted as "empty" in the future.
4093      */
4094     U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
4095     DEBUGLOG(4, "ZSTD_initCStream_srcSize");
4096     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4097     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
4098     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
4099     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
4100     return 0;
4101 }
4102 
ZSTD_initCStream(ZSTD_CStream * zcs,int compressionLevel)4103 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
4104 {
4105     DEBUGLOG(4, "ZSTD_initCStream");
4106     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
4107     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
4108     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
4109     return 0;
4110 }
4111 
4112 /*======   Compression   ======*/
4113 
ZSTD_nextInputSizeHint(const ZSTD_CCtx * cctx)4114 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
4115 {
4116     size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
4117     if (hintInSize==0) hintInSize = cctx->blockSize;
4118     return hintInSize;
4119 }
4120 
4121 /** ZSTD_compressStream_generic():
4122  *  internal function for all *compressStream*() variants
4123  *  non-static, because can be called from zstdmt_compress.c
4124  * @return : hint size for next input */
ZSTD_compressStream_generic(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective const flushMode)4125 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
4126                                           ZSTD_outBuffer* output,
4127                                           ZSTD_inBuffer* input,
4128                                           ZSTD_EndDirective const flushMode)
4129 {
4130     const char* const istart = (const char*)input->src;
4131     const char* const iend = input->size != 0 ? istart + input->size : istart;
4132     const char* ip = input->pos != 0 ? istart + input->pos : istart;
4133     char* const ostart = (char*)output->dst;
4134     char* const oend = output->size != 0 ? ostart + output->size : ostart;
4135     char* op = output->pos != 0 ? ostart + output->pos : ostart;
4136     U32 someMoreWork = 1;
4137 
4138     /* check expectations */
4139     DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
4140     if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
4141         assert(zcs->inBuff != NULL);
4142         assert(zcs->inBuffSize > 0);
4143     }
4144     if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
4145         assert(zcs->outBuff !=  NULL);
4146         assert(zcs->outBuffSize > 0);
4147     }
4148     assert(output->pos <= output->size);
4149     assert(input->pos <= input->size);
4150     assert((U32)flushMode <= (U32)ZSTD_e_end);
4151 
4152     while (someMoreWork) {
4153         switch(zcs->streamStage)
4154         {
4155         case zcss_init:
4156             RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
4157 
4158         case zcss_load:
4159             if ( (flushMode == ZSTD_e_end)
4160               && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
4161                 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
4162               && (zcs->inBuffPos == 0) ) {
4163                 /* shortcut to compression pass directly into output buffer */
4164                 size_t const cSize = ZSTD_compressEnd(zcs,
4165                                                 op, oend-op, ip, iend-ip);
4166                 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
4167                 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
4168                 ip = iend;
4169                 op += cSize;
4170                 zcs->frameEnded = 1;
4171                 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4172                 someMoreWork = 0; break;
4173             }
4174             /* complete loading into inBuffer in buffered mode */
4175             if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
4176                 size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
4177                 size_t const loaded = ZSTD_limitCopy(
4178                                         zcs->inBuff + zcs->inBuffPos, toLoad,
4179                                         ip, iend-ip);
4180                 zcs->inBuffPos += loaded;
4181                 if (loaded != 0)
4182                     ip += loaded;
4183                 if ( (flushMode == ZSTD_e_continue)
4184                   && (zcs->inBuffPos < zcs->inBuffTarget) ) {
4185                     /* not enough input to fill full block : stop here */
4186                     someMoreWork = 0; break;
4187                 }
4188                 if ( (flushMode == ZSTD_e_flush)
4189                   && (zcs->inBuffPos == zcs->inToCompress) ) {
4190                     /* empty */
4191                     someMoreWork = 0; break;
4192                 }
4193             }
4194             /* compress current block (note : this stage cannot be stopped in the middle) */
4195             DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
4196             {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
4197                 void* cDst;
4198                 size_t cSize;
4199                 size_t oSize = oend-op;
4200                 size_t const iSize = inputBuffered
4201                     ? zcs->inBuffPos - zcs->inToCompress
4202                     : MIN((size_t)(iend - ip), zcs->blockSize);
4203                 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
4204                     cDst = op;   /* compress into output buffer, to skip flush stage */
4205                 else
4206                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
4207                 if (inputBuffered) {
4208                     unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
4209                     cSize = lastBlock ?
4210                             ZSTD_compressEnd(zcs, cDst, oSize,
4211                                         zcs->inBuff + zcs->inToCompress, iSize) :
4212                             ZSTD_compressContinue(zcs, cDst, oSize,
4213                                         zcs->inBuff + zcs->inToCompress, iSize);
4214                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
4215                     zcs->frameEnded = lastBlock;
4216                     /* prepare next block */
4217                     zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
4218                     if (zcs->inBuffTarget > zcs->inBuffSize)
4219                         zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
4220                     DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
4221                             (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
4222                     if (!lastBlock)
4223                         assert(zcs->inBuffTarget <= zcs->inBuffSize);
4224                     zcs->inToCompress = zcs->inBuffPos;
4225                 } else {
4226                     unsigned const lastBlock = (ip + iSize == iend);
4227                     assert(flushMode == ZSTD_e_end /* Already validated */);
4228                     cSize = lastBlock ?
4229                             ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
4230                             ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
4231                     /* Consume the input prior to error checking to mirror buffered mode. */
4232                     if (iSize > 0)
4233                         ip += iSize;
4234                     FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
4235                     zcs->frameEnded = lastBlock;
4236                     if (lastBlock)
4237                         assert(ip == iend);
4238                 }
4239                 if (cDst == op) {  /* no need to flush */
4240                     op += cSize;
4241                     if (zcs->frameEnded) {
4242                         DEBUGLOG(5, "Frame completed directly in outBuffer");
4243                         someMoreWork = 0;
4244                         ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4245                     }
4246                     break;
4247                 }
4248                 zcs->outBuffContentSize = cSize;
4249                 zcs->outBuffFlushedSize = 0;
4250                 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
4251             }
4252 	    /* fall-through */
4253         case zcss_flush:
4254             DEBUGLOG(5, "flush stage");
4255             assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
4256             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
4257                 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
4258                             zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
4259                 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
4260                             (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
4261                 if (flushed)
4262                     op += flushed;
4263                 zcs->outBuffFlushedSize += flushed;
4264                 if (toFlush!=flushed) {
4265                     /* flush not fully completed, presumably because dst is too small */
4266                     assert(op==oend);
4267                     someMoreWork = 0;
4268                     break;
4269                 }
4270                 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
4271                 if (zcs->frameEnded) {
4272                     DEBUGLOG(5, "Frame completed on flush");
4273                     someMoreWork = 0;
4274                     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4275                     break;
4276                 }
4277                 zcs->streamStage = zcss_load;
4278                 break;
4279             }
4280 
4281         default: /* impossible */
4282             assert(0);
4283         }
4284     }
4285 
4286     input->pos = ip - istart;
4287     output->pos = op - ostart;
4288     if (zcs->frameEnded) return 0;
4289     return ZSTD_nextInputSizeHint(zcs);
4290 }
4291 
ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx * cctx)4292 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
4293 {
4294 #ifdef ZSTD_MULTITHREAD
4295     if (cctx->appliedParams.nbWorkers >= 1) {
4296         assert(cctx->mtctx != NULL);
4297         return ZSTDMT_nextInputSizeHint(cctx->mtctx);
4298     }
4299 #endif
4300     return ZSTD_nextInputSizeHint(cctx);
4301 
4302 }
4303 
ZSTD_compressStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input)4304 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
4305 {
4306     FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
4307     return ZSTD_nextInputSizeHint_MTorST(zcs);
4308 }
4309 
4310 /* After a compression call set the expected input/output buffer.
4311  * This is validated at the start of the next compression call.
4312  */
ZSTD_setBufferExpectations(ZSTD_CCtx * cctx,ZSTD_outBuffer const * output,ZSTD_inBuffer const * input)4313 static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
4314 {
4315     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
4316         cctx->expectedInBuffer = *input;
4317     }
4318     if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
4319         cctx->expectedOutBufferSize = output->size - output->pos;
4320     }
4321 }
4322 
4323 /* Validate that the input/output buffers match the expectations set by
4324  * ZSTD_setBufferExpectations.
4325  */
ZSTD_checkBufferStability(ZSTD_CCtx const * cctx,ZSTD_outBuffer const * output,ZSTD_inBuffer const * input,ZSTD_EndDirective endOp)4326 static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
4327                                         ZSTD_outBuffer const* output,
4328                                         ZSTD_inBuffer const* input,
4329                                         ZSTD_EndDirective endOp)
4330 {
4331     if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
4332         ZSTD_inBuffer const expect = cctx->expectedInBuffer;
4333         if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
4334             RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
4335         if (endOp != ZSTD_e_end)
4336             RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
4337     }
4338     if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
4339         size_t const outBufferSize = output->size - output->pos;
4340         if (cctx->expectedOutBufferSize != outBufferSize)
4341             RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
4342     }
4343     return 0;
4344 }
4345 
ZSTD_CCtx_init_compressStream2(ZSTD_CCtx * cctx,ZSTD_EndDirective endOp,size_t inSize)4346 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
4347                                              ZSTD_EndDirective endOp,
4348                                              size_t inSize) {
4349     ZSTD_CCtx_params params = cctx->requestedParams;
4350     ZSTD_prefixDict const prefixDict = cctx->prefixDict;
4351     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
4352     ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
4353     assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
4354     if (cctx->cdict)
4355         params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
4356     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
4357     if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
4358     {
4359         size_t const dictSize = prefixDict.dict
4360                 ? prefixDict.dictSize
4361                 : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
4362         ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
4363         params.cParams = ZSTD_getCParamsFromCCtxParams(
4364                 &params, cctx->pledgedSrcSizePlusOne-1,
4365                 dictSize, mode);
4366     }
4367 
4368     if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
4369         /* Enable LDM by default for optimal parser and window size >= 128MB */
4370         DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
4371         params.ldmParams.enableLdm = 1;
4372     }
4373 
4374 #ifdef ZSTD_MULTITHREAD
4375     if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
4376         params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
4377     }
4378     if (params.nbWorkers > 0) {
4379         /* mt context creation */
4380         if (cctx->mtctx == NULL) {
4381             DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
4382                         params.nbWorkers);
4383             cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
4384             RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
4385         }
4386         /* mt compression */
4387         DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
4388         FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
4389                     cctx->mtctx,
4390                     prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
4391                     cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
4392         cctx->streamStage = zcss_load;
4393         cctx->appliedParams = params;
4394     } else
4395 #endif
4396     {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
4397         assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
4398         FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
4399                 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
4400                 cctx->cdict,
4401                 &params, pledgedSrcSize,
4402                 ZSTDb_buffered) , "");
4403         assert(cctx->appliedParams.nbWorkers == 0);
4404         cctx->inToCompress = 0;
4405         cctx->inBuffPos = 0;
4406         if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
4407             /* for small input: avoid automatic flush on reaching end of block, since
4408             * it would require to add a 3-bytes null block to end frame
4409             */
4410             cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
4411         } else {
4412             cctx->inBuffTarget = 0;
4413         }
4414         cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
4415         cctx->streamStage = zcss_load;
4416         cctx->frameEnded = 0;
4417     }
4418     return 0;
4419 }
4420 
ZSTD_compressStream2(ZSTD_CCtx * cctx,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective endOp)4421 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
4422                              ZSTD_outBuffer* output,
4423                              ZSTD_inBuffer* input,
4424                              ZSTD_EndDirective endOp)
4425 {
4426     DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
4427     /* check conditions */
4428     RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
4429     RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
4430     RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
4431     assert(cctx != NULL);
4432 
4433     /* transparent initialization stage */
4434     if (cctx->streamStage == zcss_init) {
4435         FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
4436         ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
4437     }
4438     /* end of transparent initialization stage */
4439 
4440     FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
4441     /* compression stage */
4442 #ifdef ZSTD_MULTITHREAD
4443     if (cctx->appliedParams.nbWorkers > 0) {
4444         size_t flushMin;
4445         if (cctx->cParamsChanged) {
4446             ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
4447             cctx->cParamsChanged = 0;
4448         }
4449         for (;;) {
4450             size_t const ipos = input->pos;
4451             size_t const opos = output->pos;
4452             flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
4453             if ( ZSTD_isError(flushMin)
4454               || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
4455                 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
4456             }
4457             FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
4458 
4459             if (endOp == ZSTD_e_continue) {
4460                 /* We only require some progress with ZSTD_e_continue, not maximal progress.
4461                  * We're done if we've consumed or produced any bytes, or either buffer is
4462                  * full.
4463                  */
4464                 if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)
4465                     break;
4466             } else {
4467                 assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
4468                 /* We require maximal progress. We're done when the flush is complete or the
4469                  * output buffer is full.
4470                  */
4471                 if (flushMin == 0 || output->pos == output->size)
4472                     break;
4473             }
4474         }
4475         DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
4476         /* Either we don't require maximum forward progress, we've finished the
4477          * flush, or we are out of output space.
4478          */
4479         assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);
4480         ZSTD_setBufferExpectations(cctx, output, input);
4481         return flushMin;
4482     }
4483 #endif
4484     FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
4485     DEBUGLOG(5, "completed ZSTD_compressStream2");
4486     ZSTD_setBufferExpectations(cctx, output, input);
4487     return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
4488 }
4489 
ZSTD_compressStream2_simpleArgs(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,size_t * dstPos,const void * src,size_t srcSize,size_t * srcPos,ZSTD_EndDirective endOp)4490 size_t ZSTD_compressStream2_simpleArgs (
4491                             ZSTD_CCtx* cctx,
4492                             void* dst, size_t dstCapacity, size_t* dstPos,
4493                       const void* src, size_t srcSize, size_t* srcPos,
4494                             ZSTD_EndDirective endOp)
4495 {
4496     ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
4497     ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
4498     /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
4499     size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
4500     *dstPos = output.pos;
4501     *srcPos = input.pos;
4502     return cErr;
4503 }
4504 
ZSTD_compress2(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)4505 size_t ZSTD_compress2(ZSTD_CCtx* cctx,
4506                       void* dst, size_t dstCapacity,
4507                       const void* src, size_t srcSize)
4508 {
4509     ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
4510     ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
4511     DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
4512     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
4513     /* Enable stable input/output buffers. */
4514     cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
4515     cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
4516     {   size_t oPos = 0;
4517         size_t iPos = 0;
4518         size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
4519                                         dst, dstCapacity, &oPos,
4520                                         src, srcSize, &iPos,
4521                                         ZSTD_e_end);
4522         /* Reset to the original values. */
4523         cctx->requestedParams.inBufferMode = originalInBufferMode;
4524         cctx->requestedParams.outBufferMode = originalOutBufferMode;
4525         FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
4526         if (result != 0) {  /* compression not completed, due to lack of output space */
4527             assert(oPos == dstCapacity);
4528             RETURN_ERROR(dstSize_tooSmall, "");
4529         }
4530         assert(iPos == srcSize);   /* all input is expected consumed */
4531         return oPos;
4532     }
4533 }
4534 
4535 typedef struct {
4536     U32 idx;             /* Index in array of ZSTD_Sequence */
4537     U32 posInSequence;   /* Position within sequence at idx */
4538     size_t posInSrc;        /* Number of bytes given by sequences provided so far */
4539 } ZSTD_sequencePosition;
4540 
4541 /* Returns a ZSTD error code if sequence is not valid */
ZSTD_validateSequence(U32 offCode,U32 matchLength,size_t posInSrc,U32 windowLog,size_t dictSize,U32 minMatch)4542 static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
4543                                     size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
4544     size_t offsetBound;
4545     U32 windowSize = 1 << windowLog;
4546     /* posInSrc represents the amount of data the the decoder would decode up to this point.
4547      * As long as the amount of data decoded is less than or equal to window size, offsets may be
4548      * larger than the total length of output decoded in order to reference the dict, even larger than
4549      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
4550      */
4551     offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
4552     RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
4553     RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
4554     return 0;
4555 }
4556 
4557 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
ZSTD_finalizeOffCode(U32 rawOffset,const U32 rep[ZSTD_REP_NUM],U32 ll0)4558 static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
4559     U32 offCode = rawOffset + ZSTD_REP_MOVE;
4560     U32 repCode = 0;
4561 
4562     if (!ll0 && rawOffset == rep[0]) {
4563         repCode = 1;
4564     } else if (rawOffset == rep[1]) {
4565         repCode = 2 - ll0;
4566     } else if (rawOffset == rep[2]) {
4567         repCode = 3 - ll0;
4568     } else if (ll0 && rawOffset == rep[0] - 1) {
4569         repCode = 3;
4570     }
4571     if (repCode) {
4572         /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
4573         offCode = repCode - 1;
4574     }
4575     return offCode;
4576 }
4577 
4578 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
4579  * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
4580  */
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx * cctx,ZSTD_sequencePosition * seqPos,const ZSTD_Sequence * const inSeqs,size_t inSeqsSize,const void * src,size_t blockSize)4581 static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
4582                                                              const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
4583                                                              const void* src, size_t blockSize) {
4584     U32 idx = seqPos->idx;
4585     BYTE const* ip = (BYTE const*)(src);
4586     const BYTE* const iend = ip + blockSize;
4587     repcodes_t updatedRepcodes;
4588     U32 dictSize;
4589     U32 litLength;
4590     U32 matchLength;
4591     U32 ll0;
4592     U32 offCode;
4593 
4594     if (cctx->cdict) {
4595         dictSize = (U32)cctx->cdict->dictContentSize;
4596     } else if (cctx->prefixDict.dict) {
4597         dictSize = (U32)cctx->prefixDict.dictSize;
4598     } else {
4599         dictSize = 0;
4600     }
4601     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
4602     for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
4603         litLength = inSeqs[idx].litLength;
4604         matchLength = inSeqs[idx].matchLength;
4605         ll0 = litLength == 0;
4606         offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
4607         updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
4608 
4609         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
4610         if (cctx->appliedParams.validateSequences) {
4611             seqPos->posInSrc += litLength + matchLength;
4612             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
4613                                                 cctx->appliedParams.cParams.windowLog, dictSize,
4614                                                 cctx->appliedParams.cParams.minMatch),
4615                                                 "Sequence validation failed");
4616         }
4617         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
4618                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
4619         ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
4620         ip += matchLength + litLength;
4621     }
4622     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
4623 
4624     if (inSeqs[idx].litLength) {
4625         DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
4626         ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
4627         ip += inSeqs[idx].litLength;
4628         seqPos->posInSrc += inSeqs[idx].litLength;
4629     }
4630     RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
4631     seqPos->idx = idx+1;
4632     return 0;
4633 }
4634 
4635 /* Returns the number of bytes to move the current read position back by. Only non-zero
4636  * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
4637  * went wrong.
4638  *
4639  * This function will attempt to scan through blockSize bytes represented by the sequences
4640  * in inSeqs, storing any (partial) sequences.
4641  *
4642  * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
4643  * avoid splitting a match, or to avoid splitting a match such that it would produce a match
4644  * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
4645  */
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx * cctx,ZSTD_sequencePosition * seqPos,const ZSTD_Sequence * const inSeqs,size_t inSeqsSize,const void * src,size_t blockSize)4646 static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
4647                                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
4648                                                        const void* src, size_t blockSize) {
4649     U32 idx = seqPos->idx;
4650     U32 startPosInSequence = seqPos->posInSequence;
4651     U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
4652     size_t dictSize;
4653     BYTE const* ip = (BYTE const*)(src);
4654     BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
4655     repcodes_t updatedRepcodes;
4656     U32 bytesAdjustment = 0;
4657     U32 finalMatchSplit = 0;
4658     U32 litLength;
4659     U32 matchLength;
4660     U32 rawOffset;
4661     U32 offCode;
4662 
4663     if (cctx->cdict) {
4664         dictSize = cctx->cdict->dictContentSize;
4665     } else if (cctx->prefixDict.dict) {
4666         dictSize = cctx->prefixDict.dictSize;
4667     } else {
4668         dictSize = 0;
4669     }
4670     DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
4671     DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
4672     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
4673     while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
4674         const ZSTD_Sequence currSeq = inSeqs[idx];
4675         litLength = currSeq.litLength;
4676         matchLength = currSeq.matchLength;
4677         rawOffset = currSeq.offset;
4678 
4679         /* Modify the sequence depending on where endPosInSequence lies */
4680         if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
4681             if (startPosInSequence >= litLength) {
4682                 startPosInSequence -= litLength;
4683                 litLength = 0;
4684                 matchLength -= startPosInSequence;
4685             } else {
4686                 litLength -= startPosInSequence;
4687             }
4688             /* Move to the next sequence */
4689             endPosInSequence -= currSeq.litLength + currSeq.matchLength;
4690             startPosInSequence = 0;
4691             idx++;
4692         } else {
4693             /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
4694                does not reach the end of the match. So, we have to split the sequence */
4695             DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
4696                      currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
4697             if (endPosInSequence > litLength) {
4698                 U32 firstHalfMatchLength;
4699                 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
4700                 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
4701                 if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
4702                     /* Only ever split the match if it is larger than the block size */
4703                     U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
4704                     if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
4705                         /* Move the endPosInSequence backward so that it creates match of minMatch length */
4706                         endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
4707                         bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
4708                         firstHalfMatchLength -= bytesAdjustment;
4709                     }
4710                     matchLength = firstHalfMatchLength;
4711                     /* Flag that we split the last match - after storing the sequence, exit the loop,
4712                        but keep the value of endPosInSequence */
4713                     finalMatchSplit = 1;
4714                 } else {
4715                     /* Move the position in sequence backwards so that we don't split match, and break to store
4716                      * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
4717                      * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
4718                      * would cause the first half of the match to be too small
4719                      */
4720                     bytesAdjustment = endPosInSequence - currSeq.litLength;
4721                     endPosInSequence = currSeq.litLength;
4722                     break;
4723                 }
4724             } else {
4725                 /* This sequence ends inside the literals, break to store the last literals */
4726                 break;
4727             }
4728         }
4729         /* Check if this offset can be represented with a repcode */
4730         {   U32 ll0 = (litLength == 0);
4731             offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
4732             updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
4733         }
4734 
4735         if (cctx->appliedParams.validateSequences) {
4736             seqPos->posInSrc += litLength + matchLength;
4737             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
4738                                                    cctx->appliedParams.cParams.windowLog, dictSize,
4739                                                    cctx->appliedParams.cParams.minMatch),
4740                                                    "Sequence validation failed");
4741         }
4742         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
4743         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
4744                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
4745         ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
4746         ip += matchLength + litLength;
4747     }
4748     DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
4749     assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
4750     seqPos->idx = idx;
4751     seqPos->posInSequence = endPosInSequence;
4752     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
4753 
4754     iend -= bytesAdjustment;
4755     if (ip != iend) {
4756         /* Store any last literals */
4757         U32 lastLLSize = (U32)(iend - ip);
4758         assert(ip <= iend);
4759         DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
4760         ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
4761         seqPos->posInSrc += lastLLSize;
4762     }
4763 
4764     return bytesAdjustment;
4765 }
4766 
4767 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
4768                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
4769                                        const void* src, size_t blockSize);
ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)4770 static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
4771     ZSTD_sequenceCopier sequenceCopier = NULL;
4772     assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
4773     if (mode == ZSTD_sf_explicitBlockDelimiters) {
4774         return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
4775     } else if (mode == ZSTD_sf_noBlockDelimiters) {
4776         return ZSTD_copySequencesToSeqStoreNoBlockDelim;
4777     }
4778     assert(sequenceCopier != NULL);
4779     return sequenceCopier;
4780 }
4781 
4782 /* Compress, block-by-block, all of the sequences given.
4783  *
4784  * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
4785  */
ZSTD_compressSequences_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const ZSTD_Sequence * inSeqs,size_t inSeqsSize,const void * src,size_t srcSize)4786 static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
4787                                               void* dst, size_t dstCapacity,
4788                                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
4789                                               const void* src, size_t srcSize) {
4790     size_t cSize = 0;
4791     U32 lastBlock;
4792     size_t blockSize;
4793     size_t compressedSeqsSize;
4794     size_t remaining = srcSize;
4795     ZSTD_sequencePosition seqPos = {0, 0, 0};
4796 
4797     BYTE const* ip = (BYTE const*)src;
4798     BYTE* op = (BYTE*)dst;
4799     ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
4800 
4801     DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
4802     /* Special case: empty frame */
4803     if (remaining == 0) {
4804         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
4805         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
4806         MEM_writeLE32(op, cBlockHeader24);
4807         op += ZSTD_blockHeaderSize;
4808         dstCapacity -= ZSTD_blockHeaderSize;
4809         cSize += ZSTD_blockHeaderSize;
4810     }
4811 
4812     while (remaining) {
4813         size_t cBlockSize;
4814         size_t additionalByteAdjustment;
4815         lastBlock = remaining <= cctx->blockSize;
4816         blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
4817         ZSTD_resetSeqStore(&cctx->seqStore);
4818         DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
4819 
4820         additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
4821         FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
4822         blockSize -= additionalByteAdjustment;
4823 
4824         /* If blocks are too small, emit as a nocompress block */
4825         if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
4826             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
4827             FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
4828             DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
4829             cSize += cBlockSize;
4830             ip += blockSize;
4831             op += cBlockSize;
4832             remaining -= blockSize;
4833             dstCapacity -= cBlockSize;
4834             continue;
4835         }
4836 
4837         compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
4838                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
4839                                 &cctx->appliedParams,
4840                                 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
4841                                 blockSize,
4842                                 cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
4843                                 cctx->bmi2);
4844         FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
4845         DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
4846 
4847         if (!cctx->isFirstBlock &&
4848             ZSTD_maybeRLE(&cctx->seqStore) &&
4849             ZSTD_isRLE((BYTE const*)src, srcSize)) {
4850             /* We don't want to emit our first block as a RLE even if it qualifies because
4851             * doing so will cause the decoder (cli only) to throw a "should consume all input error."
4852             * This is only an issue for zstd <= v1.4.3
4853             */
4854             compressedSeqsSize = 1;
4855         }
4856 
4857         if (compressedSeqsSize == 0) {
4858             /* ZSTD_noCompressBlock writes the block header as well */
4859             cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
4860             FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
4861             DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
4862         } else if (compressedSeqsSize == 1) {
4863             cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
4864             FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
4865             DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
4866         } else {
4867             U32 cBlockHeader;
4868             /* Error checking and repcodes update */
4869             ZSTD_confirmRepcodesAndEntropyTables(cctx);
4870             if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
4871                 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
4872 
4873             /* Write block header into beginning of block*/
4874             cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
4875             MEM_writeLE24(op, cBlockHeader);
4876             cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
4877             DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
4878         }
4879 
4880         cSize += cBlockSize;
4881         DEBUGLOG(4, "cSize running total: %zu", cSize);
4882 
4883         if (lastBlock) {
4884             break;
4885         } else {
4886             ip += blockSize;
4887             op += cBlockSize;
4888             remaining -= blockSize;
4889             dstCapacity -= cBlockSize;
4890             cctx->isFirstBlock = 0;
4891         }
4892     }
4893 
4894     return cSize;
4895 }
4896 
ZSTD_compressSequences(ZSTD_CCtx * const cctx,void * dst,size_t dstCapacity,const ZSTD_Sequence * inSeqs,size_t inSeqsSize,const void * src,size_t srcSize)4897 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
4898                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
4899                               const void* src, size_t srcSize) {
4900     BYTE* op = (BYTE*)dst;
4901     size_t cSize = 0;
4902     size_t compressedBlocksSize = 0;
4903     size_t frameHeaderSize = 0;
4904 
4905     /* Transparent initialization stage, same as compressStream2() */
4906     DEBUGLOG(3, "ZSTD_compressSequences()");
4907     assert(cctx != NULL);
4908     FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
4909     /* Begin writing output, starting with frame header */
4910     frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
4911     op += frameHeaderSize;
4912     dstCapacity -= frameHeaderSize;
4913     cSize += frameHeaderSize;
4914     if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
4915         XXH64_update(&cctx->xxhState, src, srcSize);
4916     }
4917     /* cSize includes block header size and compressed sequences size */
4918     compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
4919                                                            op, dstCapacity,
4920                                                            inSeqs, inSeqsSize,
4921                                                            src, srcSize);
4922     FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
4923     cSize += compressedBlocksSize;
4924     dstCapacity -= compressedBlocksSize;
4925 
4926     if (cctx->appliedParams.fParams.checksumFlag) {
4927         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
4928         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
4929         DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
4930         MEM_writeLE32((char*)dst + cSize, checksum);
4931         cSize += 4;
4932     }
4933 
4934     DEBUGLOG(3, "Final compressed size: %zu", cSize);
4935     return cSize;
4936 }
4937 
4938 /*======   Finalize   ======*/
4939 
4940 /*! ZSTD_flushStream() :
4941  * @return : amount of data remaining to flush */
ZSTD_flushStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)4942 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4943 {
4944     ZSTD_inBuffer input = { NULL, 0, 0 };
4945     return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
4946 }
4947 
4948 
ZSTD_endStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)4949 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4950 {
4951     ZSTD_inBuffer input = { NULL, 0, 0 };
4952     size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
4953     FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
4954     if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
4955     /* single thread mode : attempt to calculate remaining to flush more precisely */
4956     {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
4957         size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
4958         size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
4959         DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
4960         return toFlush;
4961     }
4962 }
4963 
4964 
4965 /*-=====  Pre-defined compression levels  =====-*/
4966 
4967 #define ZSTD_MAX_CLEVEL     22
ZSTD_maxCLevel(void)4968 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
ZSTD_minCLevel(void)4969 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
4970 
4971 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
4972 {   /* "default" - for any srcSize > 256 KB */
4973     /* W,  C,  H,  S,  L, TL, strat */
4974     { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
4975     { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
4976     { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
4977     { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
4978     { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
4979     { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
4980     { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
4981     { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
4982     { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
4983     { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
4984     { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
4985     { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
4986     { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
4987     { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
4988     { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
4989     { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
4990     { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
4991     { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
4992     { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
4993     { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
4994     { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
4995     { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
4996     { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
4997 },
4998 {   /* for srcSize <= 256 KB */
4999     /* W,  C,  H,  S,  L,  T, strat */
5000     { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
5001     { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
5002     { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
5003     { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
5004     { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
5005     { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
5006     { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
5007     { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
5008     { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
5009     { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
5010     { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
5011     { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
5012     { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
5013     { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
5014     { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
5015     { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
5016     { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
5017     { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
5018     { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
5019     { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
5020     { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
5021     { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
5022     { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
5023 },
5024 {   /* for srcSize <= 128 KB */
5025     /* W,  C,  H,  S,  L,  T, strat */
5026     { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
5027     { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
5028     { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
5029     { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
5030     { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
5031     { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
5032     { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
5033     { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
5034     { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
5035     { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
5036     { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
5037     { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
5038     { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
5039     { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
5040     { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
5041     { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
5042     { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
5043     { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
5044     { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
5045     { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
5046     { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
5047     { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
5048     { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
5049 },
5050 {   /* for srcSize <= 16 KB */
5051     /* W,  C,  H,  S,  L,  T, strat */
5052     { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
5053     { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
5054     { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
5055     { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
5056     { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
5057     { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
5058     { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
5059     { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
5060     { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
5061     { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
5062     { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
5063     { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
5064     { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
5065     { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
5066     { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
5067     { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
5068     { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
5069     { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
5070     { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
5071     { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
5072     { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
5073     { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
5074     { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
5075 },
5076 };
5077 
ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel,size_t const dictSize)5078 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
5079 {
5080     ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
5081     switch (cParams.strategy) {
5082         case ZSTD_fast:
5083         case ZSTD_dfast:
5084             break;
5085         case ZSTD_greedy:
5086         case ZSTD_lazy:
5087         case ZSTD_lazy2:
5088             cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
5089             break;
5090         case ZSTD_btlazy2:
5091         case ZSTD_btopt:
5092         case ZSTD_btultra:
5093         case ZSTD_btultra2:
5094             break;
5095     }
5096     return cParams;
5097 }
5098 
ZSTD_dedicatedDictSearch_isSupported(ZSTD_compressionParameters const * cParams)5099 static int ZSTD_dedicatedDictSearch_isSupported(
5100         ZSTD_compressionParameters const* cParams)
5101 {
5102     return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
5103 }
5104 
5105 /**
5106  * Reverses the adjustment applied to cparams when enabling dedicated dict
5107  * search. This is used to recover the params set to be used in the working
5108  * context. (Otherwise, those tables would also grow.)
5109  */
ZSTD_dedicatedDictSearch_revertCParams(ZSTD_compressionParameters * cParams)5110 static void ZSTD_dedicatedDictSearch_revertCParams(
5111         ZSTD_compressionParameters* cParams) {
5112     switch (cParams->strategy) {
5113         case ZSTD_fast:
5114         case ZSTD_dfast:
5115             break;
5116         case ZSTD_greedy:
5117         case ZSTD_lazy:
5118         case ZSTD_lazy2:
5119             cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
5120             break;
5121         case ZSTD_btlazy2:
5122         case ZSTD_btopt:
5123         case ZSTD_btultra:
5124         case ZSTD_btultra2:
5125             break;
5126     }
5127 }
5128 
ZSTD_getCParamRowSize(U64 srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)5129 static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
5130 {
5131     switch (mode) {
5132     case ZSTD_cpm_unknown:
5133     case ZSTD_cpm_noAttachDict:
5134     case ZSTD_cpm_createCDict:
5135         break;
5136     case ZSTD_cpm_attachDict:
5137         dictSize = 0;
5138         break;
5139     default:
5140         assert(0);
5141         break;
5142     }
5143     {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
5144         size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
5145         return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
5146     }
5147 }
5148 
5149 /*! ZSTD_getCParams_internal() :
5150  * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
5151  *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
5152  *        Use dictSize == 0 for unknown or unused.
5153  *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
ZSTD_getCParams_internal(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)5154 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
5155 {
5156     U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
5157     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
5158     int row;
5159     DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
5160 
5161     /* row */
5162     if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
5163     else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
5164     else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
5165     else row = compressionLevel;
5166 
5167     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
5168         /* acceleration factor */
5169         if (compressionLevel < 0) {
5170             int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
5171             cp.targetLength = (unsigned)(-clampedCompressionLevel);
5172         }
5173         /* refine parameters based on srcSize & dictSize */
5174         return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
5175     }
5176 }
5177 
5178 /*! ZSTD_getCParams() :
5179  * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
5180  *  Size values are optional, provide 0 if not known or unused */
ZSTD_getCParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)5181 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
5182 {
5183     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
5184     return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
5185 }
5186 
5187 /*! ZSTD_getParams() :
5188  *  same idea as ZSTD_getCParams()
5189  * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
5190  *  Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_getParams_internal(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)5191 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
5192     ZSTD_parameters params;
5193     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
5194     DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
5195     ZSTD_memset(&params, 0, sizeof(params));
5196     params.cParams = cParams;
5197     params.fParams.contentSizeFlag = 1;
5198     return params;
5199 }
5200 
5201 /*! ZSTD_getParams() :
5202  *  same idea as ZSTD_getCParams()
5203  * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
5204  *  Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_getParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)5205 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
5206     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
5207     return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
5208 }
5209