1 /*
2 * Copyright (c) 2017-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* *********************************************************
12 * Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 # if !defined(_FILE_OFFSET_BITS)
16 # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
17 # endif
18 # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
20 # endif
21 # if defined(_AIX) || defined(__hpux)
22 # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
23 # endif
24 #endif
25
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 # define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 # define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 # define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
36 # include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)37 static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
38 LARGE_INTEGER off;
39 DWORD method;
40 off.QuadPart = offset;
41 if (origin == SEEK_END)
42 method = FILE_END;
43 else if (origin == SEEK_CUR)
44 method = FILE_CURRENT;
45 else
46 method = FILE_BEGIN;
47
48 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
49 return 0;
50 else
51 return -1;
52 }
53 #else
54 # define LONG_SEEK fseek
55 #endif
56
57 #include <stdlib.h> /* malloc, free */
58 #include <stdio.h> /* FILE* */
59 #include <limits.h> /* UNIT_MAX */
60 #include <assert.h>
61
62 #define XXH_STATIC_LINKING_ONLY
63 #define XXH_NAMESPACE ZSTD_
64 #include "xxhash.h"
65
66 #define ZSTD_STATIC_LINKING_ONLY
67 #include "zstd.h"
68 #include "zstd_errors.h"
69 #include "mem.h"
70 #include "zstd_seekable.h"
71
72 #undef ERROR
73 #define ERROR(name) ((size_t)-ZSTD_error_##name)
74
75 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
76
77 #undef MIN
78 #undef MAX
79 #define MIN(a, b) ((a) < (b) ? (a) : (b))
80 #define MAX(a, b) ((a) > (b) ? (a) : (b))
81
82 #define ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX 16
83
84 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
85 * them the same way as the advanced API */
ZSTD_seekable_read_FILE(void * opaque,void * buffer,size_t n)86 static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
87 {
88 size_t const result = fread(buffer, 1, n, (FILE*)opaque);
89 if (result != n) {
90 return -1;
91 }
92 return 0;
93 }
94
ZSTD_seekable_seek_FILE(void * opaque,long long offset,int origin)95 static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
96 {
97 int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
98 if (ret) return ret;
99 return fflush((FILE*)opaque);
100 }
101
102 typedef struct {
103 const void *ptr;
104 size_t size;
105 size_t pos;
106 } buffWrapper_t;
107
ZSTD_seekable_read_buff(void * opaque,void * buffer,size_t n)108 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
109 {
110 buffWrapper_t* buff = (buffWrapper_t*) opaque;
111 if (buff->pos + n > buff->size) return -1;
112 memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
113 buff->pos += n;
114 return 0;
115 }
116
ZSTD_seekable_seek_buff(void * opaque,long long offset,int origin)117 static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
118 {
119 buffWrapper_t* const buff = (buffWrapper_t*) opaque;
120 unsigned long long newOffset;
121 switch (origin) {
122 case SEEK_SET:
123 newOffset = offset;
124 break;
125 case SEEK_CUR:
126 newOffset = (unsigned long long)buff->pos + offset;
127 break;
128 case SEEK_END:
129 newOffset = (unsigned long long)buff->size + offset;
130 break;
131 default:
132 assert(0); /* not possible */
133 }
134 if (newOffset > buff->size) {
135 return -1;
136 }
137 buff->pos = newOffset;
138 return 0;
139 }
140
141 typedef struct {
142 U64 cOffset;
143 U64 dOffset;
144 U32 checksum;
145 } seekEntry_t;
146
147 typedef struct {
148 seekEntry_t* entries;
149 size_t tableLen;
150
151 int checksumFlag;
152 } seekTable_t;
153
154 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
155
156 struct ZSTD_seekable_s {
157 ZSTD_DStream* dstream;
158 seekTable_t seekTable;
159 ZSTD_seekable_customFile src;
160
161 U64 decompressedOffset;
162 U32 curFrame;
163
164 BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
165 BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
166 starts of chunks before we get to the
167 desired section */
168 ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
169 buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
170
171 XXH64_state_t xxhState;
172 };
173
ZSTD_seekable_create(void)174 ZSTD_seekable* ZSTD_seekable_create(void)
175 {
176 ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
177
178 if (zs == NULL) return NULL;
179
180 /* also initializes stage to zsds_init */
181 memset(zs, 0, sizeof(*zs));
182
183 zs->dstream = ZSTD_createDStream();
184 if (zs->dstream == NULL) {
185 free(zs);
186 return NULL;
187 }
188
189 return zs;
190 }
191
ZSTD_seekable_free(ZSTD_seekable * zs)192 size_t ZSTD_seekable_free(ZSTD_seekable* zs)
193 {
194 if (zs == NULL) return 0; /* support free on null */
195 ZSTD_freeDStream(zs->dstream);
196 free(zs->seekTable.entries);
197 free(zs);
198
199 return 0;
200 }
201
202 /** ZSTD_seekable_offsetToFrameIndex() :
203 * Performs a binary search to find the last frame with a decompressed offset
204 * <= pos
205 * @return : the frame's index */
ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable * const zs,unsigned long long pos)206 unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
207 {
208 U32 lo = 0;
209 U32 hi = (U32)zs->seekTable.tableLen;
210 assert(zs->seekTable.tableLen <= UINT_MAX);
211
212 if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
213 return (U32)zs->seekTable.tableLen;
214 }
215
216 while (lo + 1 < hi) {
217 U32 const mid = lo + ((hi - lo) >> 1);
218 if (zs->seekTable.entries[mid].dOffset <= pos) {
219 lo = mid;
220 } else {
221 hi = mid;
222 }
223 }
224 return lo;
225 }
226
ZSTD_seekable_getNumFrames(ZSTD_seekable * const zs)227 unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
228 {
229 assert(zs->seekTable.tableLen <= UINT_MAX);
230 return (unsigned)zs->seekTable.tableLen;
231 }
232
ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)233 unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
234 {
235 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
236 return zs->seekTable.entries[frameIndex].cOffset;
237 }
238
ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)239 unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
240 {
241 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
242 return zs->seekTable.entries[frameIndex].dOffset;
243 }
244
ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)245 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
246 {
247 if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
248 return zs->seekTable.entries[frameIndex + 1].cOffset -
249 zs->seekTable.entries[frameIndex].cOffset;
250 }
251
ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)252 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
253 {
254 if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
255 return zs->seekTable.entries[frameIndex + 1].dOffset -
256 zs->seekTable.entries[frameIndex].dOffset;
257 }
258
ZSTD_seekable_loadSeekTable(ZSTD_seekable * zs)259 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
260 {
261 int checksumFlag;
262 ZSTD_seekable_customFile src = zs->src;
263 /* read the footer, fixed size */
264 CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
265 CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
266
267 if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
268 return ERROR(prefix_unknown);
269 }
270
271 { BYTE const sfd = zs->inBuff[4];
272 checksumFlag = sfd >> 7;
273
274 /* check reserved bits */
275 if ((checksumFlag >> 2) & 0x1f) {
276 return ERROR(corruption_detected);
277 }
278 }
279
280 { U32 const numFrames = MEM_readLE32(zs->inBuff);
281 U32 const sizePerEntry = 8 + (checksumFlag?4:0);
282 U32 const tableSize = sizePerEntry * numFrames;
283 U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
284
285 U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
286 {
287 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
288
289 CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
290 CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
291
292 remaining -= toRead;
293 }
294
295 if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
296 return ERROR(prefix_unknown);
297 }
298 if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) {
299 return ERROR(prefix_unknown);
300 }
301
302 { /* Allocate an extra entry at the end so that we can do size
303 * computations on the last element without special case */
304 seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
305
306 U32 idx = 0;
307 U32 pos = 8;
308
309
310 U64 cOffset = 0;
311 U64 dOffset = 0;
312
313 if (!entries) {
314 free(entries);
315 return ERROR(memory_allocation);
316 }
317
318 /* compute cumulative positions */
319 for (; idx < numFrames; idx++) {
320 if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
321 U32 const offset = SEEKABLE_BUFF_SIZE - pos;
322 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
323 memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
324 CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
325 remaining -= toRead;
326 pos = 0;
327 }
328 entries[idx].cOffset = cOffset;
329 entries[idx].dOffset = dOffset;
330
331 cOffset += MEM_readLE32(zs->inBuff + pos);
332 pos += 4;
333 dOffset += MEM_readLE32(zs->inBuff + pos);
334 pos += 4;
335 if (checksumFlag) {
336 entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
337 pos += 4;
338 }
339 }
340 entries[numFrames].cOffset = cOffset;
341 entries[numFrames].dOffset = dOffset;
342
343 zs->seekTable.entries = entries;
344 zs->seekTable.tableLen = numFrames;
345 zs->seekTable.checksumFlag = checksumFlag;
346 return 0;
347 }
348 }
349 }
350
ZSTD_seekable_initBuff(ZSTD_seekable * zs,const void * src,size_t srcSize)351 size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
352 {
353 zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
354 { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
355 &ZSTD_seekable_read_buff,
356 &ZSTD_seekable_seek_buff};
357 return ZSTD_seekable_initAdvanced(zs, srcFile); }
358 }
359
ZSTD_seekable_initFile(ZSTD_seekable * zs,FILE * src)360 size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
361 {
362 ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
363 &ZSTD_seekable_seek_FILE};
364 return ZSTD_seekable_initAdvanced(zs, srcFile);
365 }
366
ZSTD_seekable_initAdvanced(ZSTD_seekable * zs,ZSTD_seekable_customFile src)367 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
368 {
369 zs->src = src;
370
371 { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
372 if (ZSTD_isError(seekTableInit)) return seekTableInit; }
373
374 zs->decompressedOffset = (U64)-1;
375 zs->curFrame = (U32)-1;
376
377 { const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
378 if (ZSTD_isError(dstreamInit)) return dstreamInit; }
379 return 0;
380 }
381
ZSTD_seekable_decompress(ZSTD_seekable * zs,void * dst,size_t len,unsigned long long offset)382 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
383 {
384 U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
385 U32 noOutputProgressCount = 0;
386 do {
387 /* check if we can continue from a previous decompress job */
388 if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
389 zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
390 zs->curFrame = targetFrame;
391
392 CHECK_IO(zs->src.seek(zs->src.opaque,
393 zs->seekTable.entries[targetFrame].cOffset,
394 SEEK_SET));
395 zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
396 XXH64_reset(&zs->xxhState, 0);
397 ZSTD_resetDStream(zs->dstream);
398 }
399
400 while (zs->decompressedOffset < offset + len) {
401 size_t toRead;
402 ZSTD_outBuffer outTmp;
403 size_t prevOutPos;
404 size_t forwardProgress;
405 if (zs->decompressedOffset < offset) {
406 /* dummy decompressions until we get to the target offset */
407 outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
408 } else {
409 outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
410 }
411
412 prevOutPos = outTmp.pos;
413 toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
414 if (ZSTD_isError(toRead)) {
415 return toRead;
416 }
417
418 if (zs->seekTable.checksumFlag) {
419 XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
420 outTmp.pos - prevOutPos);
421 }
422 forwardProgress = outTmp.pos - prevOutPos;
423 if (forwardProgress == 0) {
424 if (noOutputProgressCount++ > ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX) {
425 return ERROR(seekableIO);
426 }
427 } else {
428 noOutputProgressCount = 0;
429 }
430 zs->decompressedOffset += forwardProgress;
431
432 if (toRead == 0) {
433 /* frame complete */
434
435 /* verify checksum */
436 if (zs->seekTable.checksumFlag &&
437 (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
438 zs->seekTable.entries[targetFrame].checksum) {
439 return ERROR(corruption_detected);
440 }
441
442 if (zs->decompressedOffset < offset + len) {
443 /* go back to the start and force a reset of the stream */
444 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
445 }
446 break;
447 }
448
449 /* read in more data if we're done with this buffer */
450 if (zs->in.pos == zs->in.size) {
451 toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
452 CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
453 zs->in.size = toRead;
454 zs->in.pos = 0;
455 }
456 }
457 } while (zs->decompressedOffset != offset + len);
458
459 return len;
460 }
461
ZSTD_seekable_decompressFrame(ZSTD_seekable * zs,void * dst,size_t dstSize,unsigned frameIndex)462 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex)
463 {
464 if (frameIndex >= zs->seekTable.tableLen) {
465 return ERROR(frameIndex_tooLarge);
466 }
467
468 {
469 size_t const decompressedSize =
470 zs->seekTable.entries[frameIndex + 1].dOffset -
471 zs->seekTable.entries[frameIndex].dOffset;
472 if (dstSize < decompressedSize) {
473 return ERROR(dstSize_tooSmall);
474 }
475 return ZSTD_seekable_decompress(
476 zs, dst, decompressedSize,
477 zs->seekTable.entries[frameIndex].dOffset);
478 }
479 }
480