1 /*
2  * Copyright (c) 2017-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* *********************************************************
12 *  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 #  if !defined(_FILE_OFFSET_BITS)
16 #    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
17 #  endif
18 #  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 #    define _LARGEFILE_SOURCE 1                       /* Large File Support extension (LFS) - fseeko, ftello */
20 #  endif
21 #  if defined(_AIX) || defined(__hpux)
22 #    define _LARGE_FILES                              /* Large file support on 32-bits AIX and HP-UX */
23 #  endif
24 #endif
25 
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 #   define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 #  define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 #   define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
36 #   include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)37     static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
38         LARGE_INTEGER off;
39         DWORD method;
40         off.QuadPart = offset;
41         if (origin == SEEK_END)
42             method = FILE_END;
43         else if (origin == SEEK_CUR)
44             method = FILE_CURRENT;
45         else
46             method = FILE_BEGIN;
47 
48         if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
49             return 0;
50         else
51             return -1;
52     }
53 #else
54 #   define LONG_SEEK fseek
55 #endif
56 
57 #include <stdlib.h>  /* malloc, free */
58 #include <stdio.h>   /* FILE* */
59 #include <limits.h>  /* UNIT_MAX */
60 #include <assert.h>
61 
62 #define XXH_STATIC_LINKING_ONLY
63 #define XXH_NAMESPACE ZSTD_
64 #include "xxhash.h"
65 
66 #define ZSTD_STATIC_LINKING_ONLY
67 #include "zstd.h"
68 #include "zstd_errors.h"
69 #include "mem.h"
70 #include "zstd_seekable.h"
71 
72 #undef ERROR
73 #define ERROR(name) ((size_t)-ZSTD_error_##name)
74 
75 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
76 
77 #undef MIN
78 #undef MAX
79 #define MIN(a, b) ((a) < (b) ? (a) : (b))
80 #define MAX(a, b) ((a) > (b) ? (a) : (b))
81 
82 #define ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX 16
83 
84 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
85  * them the same way as the advanced API */
ZSTD_seekable_read_FILE(void * opaque,void * buffer,size_t n)86 static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
87 {
88     size_t const result = fread(buffer, 1, n, (FILE*)opaque);
89     if (result != n) {
90         return -1;
91     }
92     return 0;
93 }
94 
ZSTD_seekable_seek_FILE(void * opaque,long long offset,int origin)95 static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
96 {
97     int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
98     if (ret) return ret;
99     return fflush((FILE*)opaque);
100 }
101 
102 typedef struct {
103     const void *ptr;
104     size_t size;
105     size_t pos;
106 } buffWrapper_t;
107 
ZSTD_seekable_read_buff(void * opaque,void * buffer,size_t n)108 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
109 {
110     buffWrapper_t* buff = (buffWrapper_t*) opaque;
111     if (buff->pos + n > buff->size) return -1;
112     memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
113     buff->pos += n;
114     return 0;
115 }
116 
ZSTD_seekable_seek_buff(void * opaque,long long offset,int origin)117 static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
118 {
119     buffWrapper_t* const buff = (buffWrapper_t*) opaque;
120     unsigned long long newOffset;
121     switch (origin) {
122     case SEEK_SET:
123         newOffset = offset;
124         break;
125     case SEEK_CUR:
126         newOffset = (unsigned long long)buff->pos + offset;
127         break;
128     case SEEK_END:
129         newOffset = (unsigned long long)buff->size + offset;
130         break;
131     default:
132         assert(0);  /* not possible */
133     }
134     if (newOffset > buff->size) {
135         return -1;
136     }
137     buff->pos = newOffset;
138     return 0;
139 }
140 
141 typedef struct {
142     U64 cOffset;
143     U64 dOffset;
144     U32 checksum;
145 } seekEntry_t;
146 
147 typedef struct {
148     seekEntry_t* entries;
149     size_t tableLen;
150 
151     int checksumFlag;
152 } seekTable_t;
153 
154 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
155 
156 struct ZSTD_seekable_s {
157     ZSTD_DStream* dstream;
158     seekTable_t seekTable;
159     ZSTD_seekable_customFile src;
160 
161     U64 decompressedOffset;
162     U32 curFrame;
163 
164     BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
165     BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
166                                          starts of chunks before we get to the
167                                          desired section */
168     ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
169     buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
170 
171     XXH64_state_t xxhState;
172 };
173 
ZSTD_seekable_create(void)174 ZSTD_seekable* ZSTD_seekable_create(void)
175 {
176     ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
177 
178     if (zs == NULL) return NULL;
179 
180     /* also initializes stage to zsds_init */
181     memset(zs, 0, sizeof(*zs));
182 
183     zs->dstream = ZSTD_createDStream();
184     if (zs->dstream == NULL) {
185         free(zs);
186         return NULL;
187     }
188 
189     return zs;
190 }
191 
ZSTD_seekable_free(ZSTD_seekable * zs)192 size_t ZSTD_seekable_free(ZSTD_seekable* zs)
193 {
194     if (zs == NULL) return 0; /* support free on null */
195     ZSTD_freeDStream(zs->dstream);
196     free(zs->seekTable.entries);
197     free(zs);
198 
199     return 0;
200 }
201 
202 /** ZSTD_seekable_offsetToFrameIndex() :
203  *  Performs a binary search to find the last frame with a decompressed offset
204  *  <= pos
205  *  @return : the frame's index */
ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable * const zs,unsigned long long pos)206 unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
207 {
208     U32 lo = 0;
209     U32 hi = (U32)zs->seekTable.tableLen;
210     assert(zs->seekTable.tableLen <= UINT_MAX);
211 
212     if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
213         return (U32)zs->seekTable.tableLen;
214     }
215 
216     while (lo + 1 < hi) {
217         U32 const mid = lo + ((hi - lo) >> 1);
218         if (zs->seekTable.entries[mid].dOffset <= pos) {
219             lo = mid;
220         } else {
221             hi = mid;
222         }
223     }
224     return lo;
225 }
226 
ZSTD_seekable_getNumFrames(ZSTD_seekable * const zs)227 unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
228 {
229     assert(zs->seekTable.tableLen <= UINT_MAX);
230     return (unsigned)zs->seekTable.tableLen;
231 }
232 
ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)233 unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
234 {
235     if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
236     return zs->seekTable.entries[frameIndex].cOffset;
237 }
238 
ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)239 unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
240 {
241     if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
242     return zs->seekTable.entries[frameIndex].dOffset;
243 }
244 
ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)245 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
246 {
247     if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
248     return zs->seekTable.entries[frameIndex + 1].cOffset -
249            zs->seekTable.entries[frameIndex].cOffset;
250 }
251 
ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)252 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
253 {
254     if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
255     return zs->seekTable.entries[frameIndex + 1].dOffset -
256            zs->seekTable.entries[frameIndex].dOffset;
257 }
258 
ZSTD_seekable_loadSeekTable(ZSTD_seekable * zs)259 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
260 {
261     int checksumFlag;
262     ZSTD_seekable_customFile src = zs->src;
263     /* read the footer, fixed size */
264     CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
265     CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
266 
267     if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
268         return ERROR(prefix_unknown);
269     }
270 
271     {   BYTE const sfd = zs->inBuff[4];
272         checksumFlag = sfd >> 7;
273 
274         /* check reserved bits */
275         if ((checksumFlag >> 2) & 0x1f) {
276             return ERROR(corruption_detected);
277         }
278     }
279 
280     {   U32 const numFrames = MEM_readLE32(zs->inBuff);
281         U32 const sizePerEntry = 8 + (checksumFlag?4:0);
282         U32 const tableSize = sizePerEntry * numFrames;
283         U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
284 
285         U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
286         {
287             U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
288 
289             CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
290             CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
291 
292             remaining -= toRead;
293         }
294 
295         if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
296             return ERROR(prefix_unknown);
297         }
298         if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) {
299             return ERROR(prefix_unknown);
300         }
301 
302         {   /* Allocate an extra entry at the end so that we can do size
303              * computations on the last element without special case */
304             seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
305 
306             U32 idx = 0;
307             U32 pos = 8;
308 
309 
310             U64 cOffset = 0;
311             U64 dOffset = 0;
312 
313             if (!entries) {
314                 free(entries);
315                 return ERROR(memory_allocation);
316             }
317 
318             /* compute cumulative positions */
319             for (; idx < numFrames; idx++) {
320                 if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
321                     U32 const offset = SEEKABLE_BUFF_SIZE - pos;
322                     U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
323                     memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
324                     CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
325                     remaining -= toRead;
326                     pos = 0;
327                 }
328                 entries[idx].cOffset = cOffset;
329                 entries[idx].dOffset = dOffset;
330 
331                 cOffset += MEM_readLE32(zs->inBuff + pos);
332                 pos += 4;
333                 dOffset += MEM_readLE32(zs->inBuff + pos);
334                 pos += 4;
335                 if (checksumFlag) {
336                     entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
337                     pos += 4;
338                 }
339             }
340             entries[numFrames].cOffset = cOffset;
341             entries[numFrames].dOffset = dOffset;
342 
343             zs->seekTable.entries = entries;
344             zs->seekTable.tableLen = numFrames;
345             zs->seekTable.checksumFlag = checksumFlag;
346             return 0;
347         }
348     }
349 }
350 
ZSTD_seekable_initBuff(ZSTD_seekable * zs,const void * src,size_t srcSize)351 size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
352 {
353     zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
354     {   ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
355                                             &ZSTD_seekable_read_buff,
356                                             &ZSTD_seekable_seek_buff};
357         return ZSTD_seekable_initAdvanced(zs, srcFile); }
358 }
359 
ZSTD_seekable_initFile(ZSTD_seekable * zs,FILE * src)360 size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
361 {
362     ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
363                                         &ZSTD_seekable_seek_FILE};
364     return ZSTD_seekable_initAdvanced(zs, srcFile);
365 }
366 
ZSTD_seekable_initAdvanced(ZSTD_seekable * zs,ZSTD_seekable_customFile src)367 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
368 {
369     zs->src = src;
370 
371     {   const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
372         if (ZSTD_isError(seekTableInit)) return seekTableInit; }
373 
374     zs->decompressedOffset = (U64)-1;
375     zs->curFrame = (U32)-1;
376 
377     {   const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
378         if (ZSTD_isError(dstreamInit)) return dstreamInit; }
379     return 0;
380 }
381 
ZSTD_seekable_decompress(ZSTD_seekable * zs,void * dst,size_t len,unsigned long long offset)382 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
383 {
384     U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
385     U32 noOutputProgressCount = 0;
386     do {
387         /* check if we can continue from a previous decompress job */
388         if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
389             zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
390             zs->curFrame = targetFrame;
391 
392             CHECK_IO(zs->src.seek(zs->src.opaque,
393                                   zs->seekTable.entries[targetFrame].cOffset,
394                                   SEEK_SET));
395             zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
396             XXH64_reset(&zs->xxhState, 0);
397             ZSTD_resetDStream(zs->dstream);
398         }
399 
400         while (zs->decompressedOffset < offset + len) {
401             size_t toRead;
402             ZSTD_outBuffer outTmp;
403             size_t prevOutPos;
404             size_t forwardProgress;
405             if (zs->decompressedOffset < offset) {
406                 /* dummy decompressions until we get to the target offset */
407                 outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
408             } else {
409                 outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
410             }
411 
412             prevOutPos = outTmp.pos;
413             toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
414             if (ZSTD_isError(toRead)) {
415                 return toRead;
416             }
417 
418             if (zs->seekTable.checksumFlag) {
419                 XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
420                              outTmp.pos - prevOutPos);
421             }
422             forwardProgress = outTmp.pos - prevOutPos;
423             if (forwardProgress == 0) {
424                 if (noOutputProgressCount++ > ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX) {
425                     return ERROR(seekableIO);
426                 }
427             } else {
428                 noOutputProgressCount = 0;
429             }
430             zs->decompressedOffset += forwardProgress;
431 
432             if (toRead == 0) {
433                 /* frame complete */
434 
435                 /* verify checksum */
436                 if (zs->seekTable.checksumFlag &&
437                     (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
438                             zs->seekTable.entries[targetFrame].checksum) {
439                     return ERROR(corruption_detected);
440                 }
441 
442                 if (zs->decompressedOffset < offset + len) {
443                     /* go back to the start and force a reset of the stream */
444                     targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
445                 }
446                 break;
447             }
448 
449             /* read in more data if we're done with this buffer */
450             if (zs->in.pos == zs->in.size) {
451                 toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
452                 CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
453                 zs->in.size = toRead;
454                 zs->in.pos = 0;
455             }
456         }
457     } while (zs->decompressedOffset != offset + len);
458 
459     return len;
460 }
461 
ZSTD_seekable_decompressFrame(ZSTD_seekable * zs,void * dst,size_t dstSize,unsigned frameIndex)462 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex)
463 {
464     if (frameIndex >= zs->seekTable.tableLen) {
465         return ERROR(frameIndex_tooLarge);
466     }
467 
468     {
469         size_t const decompressedSize =
470                 zs->seekTable.entries[frameIndex + 1].dOffset -
471                 zs->seekTable.entries[frameIndex].dOffset;
472         if (dstSize < decompressedSize) {
473             return ERROR(dstSize_tooSmall);
474         }
475         return ZSTD_seekable_decompress(
476                 zs, dst, decompressedSize,
477                 zs->seekTable.entries[frameIndex].dOffset);
478     }
479 }
480