1 /*
2  * Simple XZ decoder command line tool
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  * Modified for toybox by Isaac Dunham
9 USE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
10 
11 config XZCAT
12   bool "xzcat"
13   default n
14   help
15     usage: xzcat [filename...]
16 
17     Decompress listed files to stdout. Use stdin if no files listed.
18 
19 */
20 #define FOR_xzcat
21 #include "toys.h"
22 
23 // BEGIN xz.h
24 
25 /**
26  * enum xz_ret - Return codes
27  * @XZ_OK:                  Everything is OK so far. More input or more
28  *                          output space is required to continue.
29  * @XZ_STREAM_END:          Operation finished successfully.
30  * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
31  *                          is still possible in multi-call mode by simply
32  *                          calling xz_dec_run() again.
33  *                          Note that this return value is used only if
34  *                          XZ_DEC_ANY_CHECK was defined at build time,
35  *                          which is not used in the kernel. Unsupported
36  *                          check types return XZ_OPTIONS_ERROR if
37  *                          XZ_DEC_ANY_CHECK was not defined at build time.
38  * @XZ_MEM_ERROR:           Allocating memory failed. The amount of memory
39  *                          that was tried to be allocated was no more than the
40  *                          dict_max argument given to xz_dec_init().
41  * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
42  *                          allowed by the dict_max argument given to
43  *                          xz_dec_init().
44  * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
45  *                          bytes).
46  * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
47  *                          compression options. In the decoder this means
48  *                          that the header CRC32 matches, but the header
49  *                          itself specifies something that we don't support.
50  * @XZ_DATA_ERROR:          Compressed data is corrupt.
51  * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
52  *                          different between multi-call and single-call
53  *                          mode; more information below.
54  *
55  * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot
56  * consume any input and cannot produce any new output. This happens when
57  * there is no new input available, or the output buffer is full while at
58  * least one output byte is still pending. Assuming your code is not buggy,
59  * you can get this error only when decoding a compressed stream that is
60  * truncated or otherwise corrupt.
61  */
62 enum xz_ret {
63   XZ_OK,
64   XZ_STREAM_END,
65   XZ_UNSUPPORTED_CHECK,
66   XZ_MEM_ERROR,
67   XZ_MEMLIMIT_ERROR,
68   XZ_FORMAT_ERROR,
69   XZ_OPTIONS_ERROR,
70   XZ_DATA_ERROR,
71   XZ_BUF_ERROR
72 };
73 
74 /**
75  * struct xz_buf - Passing input and output buffers to XZ code
76  * @in:         Beginning of the input buffer. This may be NULL if and only
77  *              if in_pos is equal to in_size.
78  * @in_pos:     Current position in the input buffer. This must not exceed
79  *              in_size.
80  * @in_size:    Size of the input buffer
81  * @out:        Beginning of the output buffer. This may be NULL if and only
82  *              if out_pos is equal to out_size.
83  * @out_pos:    Current position in the output buffer. This must not exceed
84  *              out_size.
85  * @out_size:   Size of the output buffer
86  *
87  * Only the contents of the output buffer from out[out_pos] onward, and
88  * the variables in_pos and out_pos are modified by the XZ code.
89  */
90 struct xz_buf {
91   const uint8_t *in;
92   size_t in_pos;
93   size_t in_size;
94 
95   uint8_t *out;
96   size_t out_pos;
97   size_t out_size;
98 };
99 
100 /**
101  * struct xz_dec - Opaque type to hold the XZ decoder state
102  */
103 struct xz_dec;
104 
105 /**
106  * xz_dec_init() - Allocate and initialize a XZ decoder state
107  * @mode:       Operation mode
108  * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
109  *              multi-call decoding. LZMA2 dictionary is always 2^n bytes
110  *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
111  *              in practice), so other values for dict_max don't make sense.
112  *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
113  *              512 KiB, and 1 MiB are probably the only reasonable values,
114  *              except for kernel and initramfs images where a bigger
115  *              dictionary can be fine and useful.
116  *
117  * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
118  * may allocate once it has parsed the dictionary size from the stream
119  * headers. This way excessive allocations can be avoided while still
120  * limiting the maximum memory usage to a sane value to prevent running the
121  * system out of memory when decompressing streams from untrusted sources.
122  *
123  * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
124  * ready to be used with xz_dec_run(). If memory allocation fails,
125  * xz_dec_init() returns NULL.
126  */
127 struct xz_dec *xz_dec_init(uint32_t dict_max);
128 
129 /**
130  * xz_dec_run() - Run the XZ decoder
131  * @s:          Decoder state allocated using xz_dec_init()
132  * @b:          Input and output buffers
133  *
134  * The possible return values depend on build options and operation mode.
135  * See enum xz_ret for details.
136  *
137  * Note that if an error occurs in single-call mode (return value is not
138  * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
139  * contents of the output buffer from b->out[b->out_pos] onward are
140  * undefined. This is true even after XZ_BUF_ERROR, because with some filter
141  * chains, there may be a second pass over the output buffer, and this pass
142  * cannot be properly done if the output buffer is truncated. Thus, you
143  * cannot give the single-call decoder a too small buffer and then expect to
144  * get that amount valid data from the beginning of the stream. You must use
145  * the multi-call decoder if you don't want to uncompress the whole stream.
146  */
147 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
148 
149 /**
150  * xz_dec_reset() - Reset an already allocated decoder state
151  * @s:          Decoder state allocated using xz_dec_init()
152  *
153  * This function can be used to reset the multi-call decoder state without
154  * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
155  *
156  * In single-call mode, xz_dec_reset() is always called in the beginning of
157  * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
158  * multi-call mode.
159  */
160 void xz_dec_reset(struct xz_dec *s);
161 
162 /**
163  * xz_dec_end() - Free the memory allocated for the decoder state
164  * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
165  *              this function does nothing.
166  */
167 void xz_dec_end(struct xz_dec *s);
168 
169 /*
170  * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
171  * calculation, the third argument must be zero. To continue the calculation,
172  * the previously returned value is passed as the third argument.
173  */
174 static uint32_t xz_crc32_table[256];
175 
176 uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
177 {
178   crc = ~crc;
179 
180   while (size != 0) {
181     crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
182     --size;
183   }
184 
185   return ~crc;
186 }
187 
188 static uint64_t xz_crc64_table[256];
189 
190 
191 // END xz.h
192 
193 static uint8_t in[BUFSIZ];
194 static uint8_t out[BUFSIZ];
195 
196 void do_xzcat(int fd, char *name)
197 {
198   struct xz_buf b;
199   struct xz_dec *s;
200   enum xz_ret ret;
201   const char *msg;
202 
203   crc_init(xz_crc32_table, 1);
204   const uint64_t poly = 0xC96C5795D7870F42ULL;
205   uint32_t i;
206   uint32_t j;
207   uint64_t r;
208 
209   /* initialize CRC64 table*/
210   for (i = 0; i < 256; ++i) {
211     r = i;
212     for (j = 0; j < 8; ++j)
213       r = (r >> 1) ^ (poly & ~((r & 1) - 1));
214 
215     xz_crc64_table[i] = r;
216   }
217 
218   /*
219    * Support up to 64 MiB dictionary. The actually needed memory
220    * is allocated once the headers have been parsed.
221    */
222   s = xz_dec_init(1 << 26);
223   if (s == NULL) {
224     msg = "Memory allocation failed\n";
225     goto error;
226   }
227 
228   b.in = in;
229   b.in_pos = 0;
230   b.in_size = 0;
231   b.out = out;
232   b.out_pos = 0;
233   b.out_size = BUFSIZ;
234 
235   for (;;) {
236     if (b.in_pos == b.in_size) {
237       b.in_size = read(fd, in, sizeof(in));
238       b.in_pos = 0;
239     }
240 
241     ret = xz_dec_run(s, &b);
242 
243     if (b.out_pos == sizeof(out)) {
244       if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
245         msg = "Write error\n";
246         goto error;
247       }
248 
249       b.out_pos = 0;
250     }
251 
252     if (ret == XZ_OK)
253       continue;
254 
255     if (ret == XZ_UNSUPPORTED_CHECK)
256       continue;
257 
258     if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
259       msg = "Write error\n";
260       goto error;
261     }
262 
263     switch (ret) {
264     case XZ_STREAM_END:
265       xz_dec_end(s);
266       return;
267 
268     case XZ_MEM_ERROR:
269       msg = "Memory allocation failed\n";
270       goto error;
271 
272     case XZ_MEMLIMIT_ERROR:
273       msg = "Memory usage limit reached\n";
274       goto error;
275 
276     case XZ_FORMAT_ERROR:
277       msg = "Not a .xz file\n";
278       goto error;
279 
280     case XZ_OPTIONS_ERROR:
281       msg = "Unsupported options in the .xz headers\n";
282       goto error;
283 
284     case XZ_DATA_ERROR:
285     case XZ_BUF_ERROR:
286       msg = "File is corrupt\n";
287       goto error;
288 
289     default:
290       msg = "Bug!\n";
291       goto error;
292     }
293   }
294 
295 error:
296   xz_dec_end(s);
297   error_exit("%s", msg);
298 }
299 
300 void xzcat_main(void)
301 {
302   loopfiles(toys.optargs, do_xzcat);
303 }
304 
305 // BEGIN xz_private.h
306 
307 
308 /* Uncomment as needed to enable BCJ filter decoders.
309  * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
310  * */
311 
312 #define XZ_DEC_X86
313 #define XZ_DEC_POWERPC
314 #define XZ_DEC_IA64
315 #define XZ_DEC_ARM
316 #define XZ_DEC_ARMTHUMB
317 #define XZ_DEC_SPARC
318 
319 
320 #define memeq(a, b, size) (memcmp(a, b, size) == 0)
321 
322 /* Inline functions to access unaligned unsigned 32-bit integers */
323 #ifndef get_unaligned_le32
324 static inline uint32_t get_unaligned_le32(const uint8_t *buf)
325 {
326   return (uint32_t)buf[0]
327       | ((uint32_t)buf[1] << 8)
328       | ((uint32_t)buf[2] << 16)
329       | ((uint32_t)buf[3] << 24);
330 }
331 #endif
332 
333 #ifndef get_unaligned_be32
334 static inline uint32_t get_unaligned_be32(const uint8_t *buf)
335 {
336   return (uint32_t)(buf[0] << 24)
337       | ((uint32_t)buf[1] << 16)
338       | ((uint32_t)buf[2] << 8)
339       | (uint32_t)buf[3];
340 }
341 #endif
342 
343 #ifndef put_unaligned_le32
344 static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
345 {
346   buf[0] = (uint8_t)val;
347   buf[1] = (uint8_t)(val >> 8);
348   buf[2] = (uint8_t)(val >> 16);
349   buf[3] = (uint8_t)(val >> 24);
350 }
351 #endif
352 
353 #ifndef put_unaligned_be32
354 static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
355 {
356   buf[0] = (uint8_t)(val >> 24);
357   buf[1] = (uint8_t)(val >> 16);
358   buf[2] = (uint8_t)(val >> 8);
359   buf[3] = (uint8_t)val;
360 }
361 #endif
362 
363 /*
364  * Use get_unaligned_le32() also for aligned access for simplicity. On
365  * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
366  * could save a few bytes in code size.
367  */
368 #ifndef get_le32
369 #	define get_le32 get_unaligned_le32
370 #endif
371 
372 /*
373  * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
374  * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
375  */
376 #ifndef XZ_DEC_BCJ
377 #	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
378       || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
379       || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
380       || defined(XZ_DEC_SPARC)
381 #		define XZ_DEC_BCJ
382 #	endif
383 #endif
384 
385 /*
386  * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
387  * before calling xz_dec_lzma2_run().
388  */
389 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
390 
391 /*
392  * Decode the LZMA2 properties (one byte) and reset the decoder. Return
393  * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
394  * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
395  * decoder doesn't support.
396  */
397 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
398            uint8_t props);
399 
400 /* Decode raw LZMA2 stream from b->in to b->out. */
401 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
402                struct xz_buf *b);
403 
404 // END "xz_private.h"
405 
406 
407 
408 
409 /*
410  * Branch/Call/Jump (BCJ) filter decoders
411  * The rest of the code is inside this ifdef. It makes things a little more
412  * convenient when building without support for any BCJ filters.
413  */
414 #ifdef XZ_DEC_BCJ
415 
416 struct xz_dec_bcj {
417   /* Type of the BCJ filter being used */
418   enum {
419     BCJ_X86 = 4,        /* x86 or x86-64 */
420     BCJ_POWERPC = 5,    /* Big endian only */
421     BCJ_IA64 = 6,       /* Big or little endian */
422     BCJ_ARM = 7,        /* Little endian only */
423     BCJ_ARMTHUMB = 8,   /* Little endian only */
424     BCJ_SPARC = 9       /* Big or little endian */
425   } type;
426 
427   /*
428    * Return value of the next filter in the chain. We need to preserve
429    * this information across calls, because we must not call the next
430    * filter anymore once it has returned XZ_STREAM_END.
431    */
432   enum xz_ret ret;
433 
434   /*
435    * Absolute position relative to the beginning of the uncompressed
436    * data (in a single .xz Block). We care only about the lowest 32
437    * bits so this doesn't need to be uint64_t even with big files.
438    */
439   uint32_t pos;
440 
441   /* x86 filter state */
442   uint32_t x86_prev_mask;
443 
444   /* Temporary space to hold the variables from struct xz_buf */
445   uint8_t *out;
446   size_t out_pos;
447   size_t out_size;
448 
449   struct {
450     /* Amount of already filtered data in the beginning of buf */
451     size_t filtered;
452 
453     /* Total amount of data currently stored in buf  */
454     size_t size;
455 
456     /*
457      * Buffer to hold a mix of filtered and unfiltered data. This
458      * needs to be big enough to hold Alignment + 2 * Look-ahead:
459      *
460      * Type         Alignment   Look-ahead
461      * x86              1           4
462      * PowerPC          4           0
463      * IA-64           16           0
464      * ARM              4           0
465      * ARM-Thumb        2           2
466      * SPARC            4           0
467      */
468     uint8_t buf[16];
469   } temp;
470 };
471 
472 /*
473  * Decode the Filter ID of a BCJ filter. This implementation doesn't
474  * support custom start offsets, so no decoding of Filter Properties
475  * is needed. Returns XZ_OK if the given Filter ID is supported.
476  * Otherwise XZ_OPTIONS_ERROR is returned.
477  */
478 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
479 
480 /*
481  * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
482  * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
483  * must be called directly.
484  */
485 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
486              struct xz_dec_lzma2 *lzma2,
487              struct xz_buf *b);
488 
489 #ifdef XZ_DEC_X86
490 /*
491  * This is used to test the most significant byte of a memory address
492  * in an x86 instruction.
493  */
494 static inline int bcj_x86_test_msbyte(uint8_t b)
495 {
496   return b == 0x00 || b == 0xFF;
497 }
498 
499 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
500 {
501   static const int mask_to_allowed_status[8]
502     = { 1,1,1,0,1,0,0,0 };
503 
504   static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
505 
506   size_t i;
507   size_t prev_pos = (size_t)-1;
508   uint32_t prev_mask = s->x86_prev_mask;
509   uint32_t src;
510   uint32_t dest;
511   uint32_t j;
512   uint8_t b;
513 
514   if (size <= 4)
515     return 0;
516 
517   size -= 4;
518   for (i = 0; i < size; ++i) {
519     if ((buf[i] & 0xFE) != 0xE8)
520       continue;
521 
522     prev_pos = i - prev_pos;
523     if (prev_pos > 3) {
524       prev_mask = 0;
525     } else {
526       prev_mask = (prev_mask << (prev_pos - 1)) & 7;
527       if (prev_mask != 0) {
528         b = buf[i + 4 - mask_to_bit_num[prev_mask]];
529         if (!mask_to_allowed_status[prev_mask]
530             || bcj_x86_test_msbyte(b)) {
531           prev_pos = i;
532           prev_mask = (prev_mask << 1) | 1;
533           continue;
534         }
535       }
536     }
537 
538     prev_pos = i;
539 
540     if (bcj_x86_test_msbyte(buf[i + 4])) {
541       src = get_unaligned_le32(buf + i + 1);
542       for (;;) {
543         dest = src - (s->pos + (uint32_t)i + 5);
544         if (prev_mask == 0)
545           break;
546 
547         j = mask_to_bit_num[prev_mask] * 8;
548         b = (uint8_t)(dest >> (24 - j));
549         if (!bcj_x86_test_msbyte(b))
550           break;
551 
552         src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
553       }
554 
555       dest &= 0x01FFFFFF;
556       dest |= (uint32_t)0 - (dest & 0x01000000);
557       put_unaligned_le32(dest, buf + i + 1);
558       i += 4;
559     } else {
560       prev_mask = (prev_mask << 1) | 1;
561     }
562   }
563 
564   prev_pos = i - prev_pos;
565   s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
566   return i;
567 }
568 #endif
569 
570 #ifdef XZ_DEC_POWERPC
571 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
572 {
573   size_t i;
574   uint32_t instr;
575 
576   for (i = 0; i + 4 <= size; i += 4) {
577     instr = get_unaligned_be32(buf + i);
578     if ((instr & 0xFC000003) == 0x48000001) {
579       instr &= 0x03FFFFFC;
580       instr -= s->pos + (uint32_t)i;
581       instr &= 0x03FFFFFC;
582       instr |= 0x48000001;
583       put_unaligned_be32(instr, buf + i);
584     }
585   }
586 
587   return i;
588 }
589 #endif
590 
591 #ifdef XZ_DEC_IA64
592 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
593 {
594   static const uint8_t branch_table[32] = {
595     0, 0, 0, 0, 0, 0, 0, 0,
596     0, 0, 0, 0, 0, 0, 0, 0,
597     4, 4, 6, 6, 0, 0, 7, 7,
598     4, 4, 0, 0, 4, 4, 0, 0
599   };
600 
601   /*
602    * The local variables take a little bit stack space, but it's less
603    * than what LZMA2 decoder takes, so it doesn't make sense to reduce
604    * stack usage here without doing that for the LZMA2 decoder too.
605    */
606 
607   /* Loop counters */
608   size_t i;
609   size_t j;
610 
611   /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
612   uint32_t slot;
613 
614   /* Bitwise offset of the instruction indicated by slot */
615   uint32_t bit_pos;
616 
617   /* bit_pos split into byte and bit parts */
618   uint32_t byte_pos;
619   uint32_t bit_res;
620 
621   /* Address part of an instruction */
622   uint32_t addr;
623 
624   /* Mask used to detect which instructions to convert */
625   uint32_t mask;
626 
627   /* 41-bit instruction stored somewhere in the lowest 48 bits */
628   uint64_t instr;
629 
630   /* Instruction normalized with bit_res for easier manipulation */
631   uint64_t norm;
632 
633   for (i = 0; i + 16 <= size; i += 16) {
634     mask = branch_table[buf[i] & 0x1F];
635     for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
636       if (((mask >> slot) & 1) == 0)
637         continue;
638 
639       byte_pos = bit_pos >> 3;
640       bit_res = bit_pos & 7;
641       instr = 0;
642       for (j = 0; j < 6; ++j)
643         instr |= (uint64_t)(buf[i + j + byte_pos])
644             << (8 * j);
645 
646       norm = instr >> bit_res;
647 
648       if (((norm >> 37) & 0x0F) == 0x05
649           && ((norm >> 9) & 0x07) == 0) {
650         addr = (norm >> 13) & 0x0FFFFF;
651         addr |= ((uint32_t)(norm >> 36) & 1) << 20;
652         addr <<= 4;
653         addr -= s->pos + (uint32_t)i;
654         addr >>= 4;
655 
656         norm &= ~((uint64_t)0x8FFFFF << 13);
657         norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
658         norm |= (uint64_t)(addr & 0x100000)
659             << (36 - 20);
660 
661         instr &= (1 << bit_res) - 1;
662         instr |= norm << bit_res;
663 
664         for (j = 0; j < 6; j++)
665           buf[i + j + byte_pos]
666             = (uint8_t)(instr >> (8 * j));
667       }
668     }
669   }
670 
671   return i;
672 }
673 #endif
674 
675 #ifdef XZ_DEC_ARM
676 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
677 {
678   size_t i;
679   uint32_t addr;
680 
681   for (i = 0; i + 4 <= size; i += 4) {
682     if (buf[i + 3] == 0xEB) {
683       addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
684           | ((uint32_t)buf[i + 2] << 16);
685       addr <<= 2;
686       addr -= s->pos + (uint32_t)i + 8;
687       addr >>= 2;
688       buf[i] = (uint8_t)addr;
689       buf[i + 1] = (uint8_t)(addr >> 8);
690       buf[i + 2] = (uint8_t)(addr >> 16);
691     }
692   }
693 
694   return i;
695 }
696 #endif
697 
698 #ifdef XZ_DEC_ARMTHUMB
699 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
700 {
701   size_t i;
702   uint32_t addr;
703 
704   for (i = 0; i + 4 <= size; i += 2) {
705     if ((buf[i + 1] & 0xF8) == 0xF0
706         && (buf[i + 3] & 0xF8) == 0xF8) {
707       addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
708           | ((uint32_t)buf[i] << 11)
709           | (((uint32_t)buf[i + 3] & 0x07) << 8)
710           | (uint32_t)buf[i + 2];
711       addr <<= 1;
712       addr -= s->pos + (uint32_t)i + 4;
713       addr >>= 1;
714       buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
715       buf[i] = (uint8_t)(addr >> 11);
716       buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
717       buf[i + 2] = (uint8_t)addr;
718       i += 2;
719     }
720   }
721 
722   return i;
723 }
724 #endif
725 
726 #ifdef XZ_DEC_SPARC
727 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
728 {
729   size_t i;
730   uint32_t instr;
731 
732   for (i = 0; i + 4 <= size; i += 4) {
733     instr = get_unaligned_be32(buf + i);
734     if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
735       instr <<= 2;
736       instr -= s->pos + (uint32_t)i;
737       instr >>= 2;
738       instr = ((uint32_t)0x40000000 - (instr & 0x400000))
739           | 0x40000000 | (instr & 0x3FFFFF);
740       put_unaligned_be32(instr, buf + i);
741     }
742   }
743 
744   return i;
745 }
746 #endif
747 
748 /*
749  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
750  * of data that got filtered.
751  *
752  * NOTE: This is implemented as a switch statement to avoid using function
753  * pointers, which could be problematic in the kernel boot code, which must
754  * avoid pointers to static data (at least on x86).
755  */
756 static void bcj_apply(struct xz_dec_bcj *s,
757           uint8_t *buf, size_t *pos, size_t size)
758 {
759   size_t filtered;
760 
761   buf += *pos;
762   size -= *pos;
763 
764   switch (s->type) {
765 #ifdef XZ_DEC_X86
766   case BCJ_X86:
767     filtered = bcj_x86(s, buf, size);
768     break;
769 #endif
770 #ifdef XZ_DEC_POWERPC
771   case BCJ_POWERPC:
772     filtered = bcj_powerpc(s, buf, size);
773     break;
774 #endif
775 #ifdef XZ_DEC_IA64
776   case BCJ_IA64:
777     filtered = bcj_ia64(s, buf, size);
778     break;
779 #endif
780 #ifdef XZ_DEC_ARM
781   case BCJ_ARM:
782     filtered = bcj_arm(s, buf, size);
783     break;
784 #endif
785 #ifdef XZ_DEC_ARMTHUMB
786   case BCJ_ARMTHUMB:
787     filtered = bcj_armthumb(s, buf, size);
788     break;
789 #endif
790 #ifdef XZ_DEC_SPARC
791   case BCJ_SPARC:
792     filtered = bcj_sparc(s, buf, size);
793     break;
794 #endif
795   default:
796     /* Never reached but silence compiler warnings. */
797     filtered = 0;
798     break;
799   }
800 
801   *pos += filtered;
802   s->pos += filtered;
803 }
804 
805 /*
806  * Flush pending filtered data from temp to the output buffer.
807  * Move the remaining mixture of possibly filtered and unfiltered
808  * data to the beginning of temp.
809  */
810 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
811 {
812   size_t copy_size;
813 
814   copy_size = minof(s->temp.filtered, b->out_size - b->out_pos);
815   memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
816   b->out_pos += copy_size;
817 
818   s->temp.filtered -= copy_size;
819   s->temp.size -= copy_size;
820   memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
821 }
822 
823 /*
824  * The BCJ filter functions are primitive in sense that they process the
825  * data in chunks of 1-16 bytes. To hide this issue, this function does
826  * some buffering.
827  */
828 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
829              struct xz_dec_lzma2 *lzma2,
830              struct xz_buf *b)
831 {
832   size_t out_start;
833 
834   /*
835    * Flush pending already filtered data to the output buffer. Return
836    * immediatelly if we couldn't flush everything, or if the next
837    * filter in the chain had already returned XZ_STREAM_END.
838    */
839   if (s->temp.filtered > 0) {
840     bcj_flush(s, b);
841     if (s->temp.filtered > 0)
842       return XZ_OK;
843 
844     if (s->ret == XZ_STREAM_END)
845       return XZ_STREAM_END;
846   }
847 
848   /*
849    * If we have more output space than what is currently pending in
850    * temp, copy the unfiltered data from temp to the output buffer
851    * and try to fill the output buffer by decoding more data from the
852    * next filter in the chain. Apply the BCJ filter on the new data
853    * in the output buffer. If everything cannot be filtered, copy it
854    * to temp and rewind the output buffer position accordingly.
855    *
856    * This needs to be always run when temp.size == 0 to handle a special
857    * case where the output buffer is full and the next filter has no
858    * more output coming but hasn't returned XZ_STREAM_END yet.
859    */
860   if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
861     out_start = b->out_pos;
862     memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
863     b->out_pos += s->temp.size;
864 
865     s->ret = xz_dec_lzma2_run(lzma2, b);
866     if (s->ret != XZ_STREAM_END
867         && (s->ret != XZ_OK ))
868       return s->ret;
869 
870     bcj_apply(s, b->out, &out_start, b->out_pos);
871 
872     /*
873      * As an exception, if the next filter returned XZ_STREAM_END,
874      * we can do that too, since the last few bytes that remain
875      * unfiltered are meant to remain unfiltered.
876      */
877     if (s->ret == XZ_STREAM_END)
878       return XZ_STREAM_END;
879 
880     s->temp.size = b->out_pos - out_start;
881     b->out_pos -= s->temp.size;
882     memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
883 
884     /*
885      * If there wasn't enough input to the next filter to fill
886      * the output buffer with unfiltered data, there's no point
887      * to try decoding more data to temp.
888      */
889     if (b->out_pos + s->temp.size < b->out_size)
890       return XZ_OK;
891   }
892 
893   /*
894    * We have unfiltered data in temp. If the output buffer isn't full
895    * yet, try to fill the temp buffer by decoding more data from the
896    * next filter. Apply the BCJ filter on temp. Then we hopefully can
897    * fill the actual output buffer by copying filtered data from temp.
898    * A mix of filtered and unfiltered data may be left in temp; it will
899    * be taken care on the next call to this function.
900    */
901   if (b->out_pos < b->out_size) {
902     /* Make b->out{,_pos,_size} temporarily point to s->temp. */
903     s->out = b->out;
904     s->out_pos = b->out_pos;
905     s->out_size = b->out_size;
906     b->out = s->temp.buf;
907     b->out_pos = s->temp.size;
908     b->out_size = sizeof(s->temp.buf);
909 
910     s->ret = xz_dec_lzma2_run(lzma2, b);
911 
912     s->temp.size = b->out_pos;
913     b->out = s->out;
914     b->out_pos = s->out_pos;
915     b->out_size = s->out_size;
916 
917     if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
918       return s->ret;
919 
920     bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
921 
922     /*
923      * If the next filter returned XZ_STREAM_END, we mark that
924      * everything is filtered, since the last unfiltered bytes
925      * of the stream are meant to be left as is.
926      */
927     if (s->ret == XZ_STREAM_END)
928       s->temp.filtered = s->temp.size;
929 
930     bcj_flush(s, b);
931     if (s->temp.filtered > 0)
932       return XZ_OK;
933   }
934 
935   return s->ret;
936 }
937 
938 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
939 {
940   switch (id) {
941 #ifdef XZ_DEC_X86
942   case BCJ_X86:
943 #endif
944 #ifdef XZ_DEC_POWERPC
945   case BCJ_POWERPC:
946 #endif
947 #ifdef XZ_DEC_IA64
948   case BCJ_IA64:
949 #endif
950 #ifdef XZ_DEC_ARM
951   case BCJ_ARM:
952 #endif
953 #ifdef XZ_DEC_ARMTHUMB
954   case BCJ_ARMTHUMB:
955 #endif
956 #ifdef XZ_DEC_SPARC
957   case BCJ_SPARC:
958 #endif
959     break;
960 
961   default:
962     /* Unsupported Filter ID */
963     return XZ_OPTIONS_ERROR;
964   }
965 
966   s->type = id;
967   s->ret = XZ_OK;
968   s->pos = 0;
969   s->x86_prev_mask = 0;
970   s->temp.filtered = 0;
971   s->temp.size = 0;
972 
973   return XZ_OK;
974 }
975 
976 #endif
977 /*
978  * LZMA2 decoder
979  */
980 
981 
982 // BEGIN xz_lzma2.h
983 /*
984  * LZMA2 definitions
985  *
986  */
987 
988 
989 /* Range coder constants */
990 #define RC_SHIFT_BITS 8
991 #define RC_TOP_BITS 24
992 #define RC_TOP_VALUE (1 << RC_TOP_BITS)
993 #define RC_BIT_MODEL_TOTAL_BITS 11
994 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
995 #define RC_MOVE_BITS 5
996 
997 /*
998  * Maximum number of position states. A position state is the lowest pb
999  * number of bits of the current uncompressed offset. In some places there
1000  * are different sets of probabilities for different position states.
1001  */
1002 #define POS_STATES_MAX (1 << 4)
1003 
1004 /*
1005  * This enum is used to track which LZMA symbols have occurred most recently
1006  * and in which order. This information is used to predict the next symbol.
1007  *
1008  * Symbols:
1009  *  - Literal: One 8-bit byte
1010  *  - Match: Repeat a chunk of data at some distance
1011  *  - Long repeat: Multi-byte match at a recently seen distance
1012  *  - Short repeat: One-byte repeat at a recently seen distance
1013  *
1014  * The symbol names are in from STATE_oldest_older_previous. REP means
1015  * either short or long repeated match, and NONLIT means any non-literal.
1016  */
1017 enum lzma_state {
1018   STATE_LIT_LIT,
1019   STATE_MATCH_LIT_LIT,
1020   STATE_REP_LIT_LIT,
1021   STATE_SHORTREP_LIT_LIT,
1022   STATE_MATCH_LIT,
1023   STATE_REP_LIT,
1024   STATE_SHORTREP_LIT,
1025   STATE_LIT_MATCH,
1026   STATE_LIT_LONGREP,
1027   STATE_LIT_SHORTREP,
1028   STATE_NONLIT_MATCH,
1029   STATE_NONLIT_REP
1030 };
1031 
1032 /* Total number of states */
1033 #define STATES 12
1034 
1035 /* The lowest 7 states indicate that the previous state was a literal. */
1036 #define LIT_STATES 7
1037 
1038 /* Indicate that the latest symbol was a literal. */
1039 static inline void lzma_state_literal(enum lzma_state *state)
1040 {
1041   if (*state <= STATE_SHORTREP_LIT_LIT)
1042     *state = STATE_LIT_LIT;
1043   else if (*state <= STATE_LIT_SHORTREP)
1044     *state -= 3;
1045   else
1046     *state -= 6;
1047 }
1048 
1049 /* Indicate that the latest symbol was a match. */
1050 static inline void lzma_state_match(enum lzma_state *state)
1051 {
1052   *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
1053 }
1054 
1055 /* Indicate that the latest state was a long repeated match. */
1056 static inline void lzma_state_long_rep(enum lzma_state *state)
1057 {
1058   *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
1059 }
1060 
1061 /* Indicate that the latest symbol was a short match. */
1062 static inline void lzma_state_short_rep(enum lzma_state *state)
1063 {
1064   *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
1065 }
1066 
1067 /* Test if the previous symbol was a literal. */
1068 static inline int lzma_state_is_literal(enum lzma_state state)
1069 {
1070   return state < LIT_STATES;
1071 }
1072 
1073 /* Each literal coder is divided in three sections:
1074  *   - 0x001-0x0FF: Without match byte
1075  *   - 0x101-0x1FF: With match byte; match bit is 0
1076  *   - 0x201-0x2FF: With match byte; match bit is 1
1077  *
1078  * Match byte is used when the previous LZMA symbol was something else than
1079  * a literal (that is, it was some kind of match).
1080  */
1081 #define LITERAL_CODER_SIZE 0x300
1082 
1083 /* Maximum number of literal coders */
1084 #define LITERAL_CODERS_MAX (1 << 4)
1085 
1086 /* Minimum length of a match is two bytes. */
1087 #define MATCH_LEN_MIN 2
1088 
1089 /* Match length is encoded with 4, 5, or 10 bits.
1090  *
1091  * Length   Bits
1092  *  2-9      4 = Choice=0 + 3 bits
1093  * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
1094  * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
1095  */
1096 #define LEN_LOW_BITS 3
1097 #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
1098 #define LEN_MID_BITS 3
1099 #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
1100 #define LEN_HIGH_BITS 8
1101 #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
1102 #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
1103 
1104 /*
1105  * Maximum length of a match is 273 which is a result of the encoding
1106  * described above.
1107  */
1108 #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
1109 
1110 /*
1111  * Different sets of probabilities are used for match distances that have
1112  * very short match length: Lengths of 2, 3, and 4 bytes have a separate
1113  * set of probabilities for each length. The matches with longer length
1114  * use a shared set of probabilities.
1115  */
1116 #define DIST_STATES 4
1117 
1118 /*
1119  * Get the index of the appropriate probability array for decoding
1120  * the distance slot.
1121  */
1122 static inline uint32_t lzma_get_dist_state(uint32_t len)
1123 {
1124   return len < DIST_STATES + MATCH_LEN_MIN
1125       ? len - MATCH_LEN_MIN : DIST_STATES - 1;
1126 }
1127 
1128 /*
1129  * The highest two bits of a 32-bit match distance are encoded using six bits.
1130  * This six-bit value is called a distance slot. This way encoding a 32-bit
1131  * value takes 6-36 bits, larger values taking more bits.
1132  */
1133 #define DIST_SLOT_BITS 6
1134 #define DIST_SLOTS (1 << DIST_SLOT_BITS)
1135 
1136 /* Match distances up to 127 are fully encoded using probabilities. Since
1137  * the highest two bits (distance slot) are always encoded using six bits,
1138  * the distances 0-3 don't need any additional bits to encode, since the
1139  * distance slot itself is the same as the actual distance. DIST_MODEL_START
1140  * indicates the first distance slot where at least one additional bit is
1141  * needed.
1142  */
1143 #define DIST_MODEL_START 4
1144 
1145 /*
1146  * Match distances greater than 127 are encoded in three pieces:
1147  *   - distance slot: the highest two bits
1148  *   - direct bits: 2-26 bits below the highest two bits
1149  *   - alignment bits: four lowest bits
1150  *
1151  * Direct bits don't use any probabilities.
1152  *
1153  * The distance slot value of 14 is for distances 128-191.
1154  */
1155 #define DIST_MODEL_END 14
1156 
1157 /* Distance slots that indicate a distance <= 127. */
1158 #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
1159 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
1160 
1161 /*
1162  * For match distances greater than 127, only the highest two bits and the
1163  * lowest four bits (alignment) is encoded using probabilities.
1164  */
1165 #define ALIGN_BITS 4
1166 #define ALIGN_SIZE (1 << ALIGN_BITS)
1167 #define ALIGN_MASK (ALIGN_SIZE - 1)
1168 
1169 /* Total number of all probability variables */
1170 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
1171 
1172 /*
1173  * LZMA remembers the four most recent match distances. Reusing these
1174  * distances tends to take less space than re-encoding the actual
1175  * distance value.
1176  */
1177 #define REPS 4
1178 
1179 
1180 // END xz_lzma2.h
1181 
1182 /*
1183  * Range decoder initialization eats the first five bytes of each LZMA chunk.
1184  */
1185 #define RC_INIT_BYTES 5
1186 
1187 /*
1188  * Minimum number of usable input buffer to safely decode one LZMA symbol.
1189  * The worst case is that we decode 22 bits using probabilities and 26
1190  * direct bits. This may decode at maximum of 20 bytes of input. However,
1191  * lzma_main() does an extra normalization before returning, thus we
1192  * need to put 21 here.
1193  */
1194 #define LZMA_IN_REQUIRED 21
1195 
1196 /*
1197  * Dictionary (history buffer)
1198  *
1199  * These are always true:
1200  *    start <= pos <= full <= end
1201  *    pos <= limit <= end
1202  *    end == size
1203  *    size <= size_max
1204  *    allocated <= size
1205  *
1206  * Most of these variables are size_t as a relic of single-call mode,
1207  * in which the dictionary variables address the actual output
1208  * buffer directly.
1209  */
1210 struct dictionary {
1211   /* Beginning of the history buffer */
1212   uint8_t *buf;
1213 
1214   /* Old position in buf (before decoding more data) */
1215   size_t start;
1216 
1217   /* Position in buf */
1218   size_t pos;
1219 
1220   /*
1221    * How full dictionary is. This is used to detect corrupt input that
1222    * would read beyond the beginning of the uncompressed stream.
1223    */
1224   size_t full;
1225 
1226   /* Write limit; we don't write to buf[limit] or later bytes. */
1227   size_t limit;
1228 
1229   /* End of the dictionary buffer. This is the same as the dictionary size. */
1230   size_t end;
1231 
1232   /*
1233    * Size of the dictionary as specified in Block Header. This is used
1234    * together with "full" to detect corrupt input that would make us
1235    * read beyond the beginning of the uncompressed stream.
1236    */
1237   uint32_t size;
1238 
1239   /*
1240    * Maximum allowed dictionary size.
1241    */
1242   uint32_t size_max;
1243 
1244   /*
1245    * Amount of memory currently allocated for the dictionary.
1246    */
1247   uint32_t allocated;
1248 };
1249 
1250 /* Range decoder */
1251 struct rc_dec {
1252   uint32_t range;
1253   uint32_t code;
1254 
1255   /*
1256    * Number of initializing bytes remaining to be read
1257    * by rc_read_init().
1258    */
1259   uint32_t init_bytes_left;
1260 
1261   /*
1262    * Buffer from which we read our input. It can be either
1263    * temp.buf or the caller-provided input buffer.
1264    */
1265   const uint8_t *in;
1266   size_t in_pos;
1267   size_t in_limit;
1268 };
1269 
1270 /* Probabilities for a length decoder. */
1271 struct lzma_len_dec {
1272   /* Probability of match length being at least 10 */
1273   uint16_t choice;
1274 
1275   /* Probability of match length being at least 18 */
1276   uint16_t choice2;
1277 
1278   /* Probabilities for match lengths 2-9 */
1279   uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1280 
1281   /* Probabilities for match lengths 10-17 */
1282   uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1283 
1284   /* Probabilities for match lengths 18-273 */
1285   uint16_t high[LEN_HIGH_SYMBOLS];
1286 };
1287 
1288 struct lzma_dec {
1289   /* Distances of latest four matches */
1290   uint32_t rep0;
1291   uint32_t rep1;
1292   uint32_t rep2;
1293   uint32_t rep3;
1294 
1295   /* Types of the most recently seen LZMA symbols */
1296   enum lzma_state state;
1297 
1298   /*
1299    * Length of a match. This is updated so that dict_repeat can
1300    * be called again to finish repeating the whole match.
1301    */
1302   uint32_t len;
1303 
1304   /*
1305    * LZMA properties or related bit masks (number of literal
1306    * context bits, a mask dervied from the number of literal
1307    * position bits, and a mask dervied from the number
1308    * position bits)
1309    */
1310   uint32_t lc;
1311   uint32_t literal_pos_mask; /* (1 << lp) - 1 */
1312   uint32_t pos_mask;         /* (1 << pb) - 1 */
1313 
1314   /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
1315   uint16_t is_match[STATES][POS_STATES_MAX];
1316 
1317   /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
1318   uint16_t is_rep[STATES];
1319 
1320   /*
1321    * If 0, distance of a repeated match is rep0.
1322    * Otherwise check is_rep1.
1323    */
1324   uint16_t is_rep0[STATES];
1325 
1326   /*
1327    * If 0, distance of a repeated match is rep1.
1328    * Otherwise check is_rep2.
1329    */
1330   uint16_t is_rep1[STATES];
1331 
1332   /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
1333   uint16_t is_rep2[STATES];
1334 
1335   /*
1336    * If 1, the repeated match has length of one byte. Otherwise
1337    * the length is decoded from rep_len_decoder.
1338    */
1339   uint16_t is_rep0_long[STATES][POS_STATES_MAX];
1340 
1341   /*
1342    * Probability tree for the highest two bits of the match
1343    * distance. There is a separate probability tree for match
1344    * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
1345    */
1346   uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
1347 
1348   /*
1349    * Probility trees for additional bits for match distance
1350    * when the distance is in the range [4, 127].
1351    */
1352   uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
1353 
1354   /*
1355    * Probability tree for the lowest four bits of a match
1356    * distance that is equal to or greater than 128.
1357    */
1358   uint16_t dist_align[ALIGN_SIZE];
1359 
1360   /* Length of a normal match */
1361   struct lzma_len_dec match_len_dec;
1362 
1363   /* Length of a repeated match */
1364   struct lzma_len_dec rep_len_dec;
1365 
1366   /* Probabilities of literals */
1367   uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1368 };
1369 
1370 struct lzma2_dec {
1371   /* Position in xz_dec_lzma2_run(). */
1372   enum lzma2_seq {
1373     SEQ_CONTROL,
1374     SEQ_UNCOMPRESSED_1,
1375     SEQ_UNCOMPRESSED_2,
1376     SEQ_COMPRESSED_0,
1377     SEQ_COMPRESSED_1,
1378     SEQ_PROPERTIES,
1379     SEQ_LZMA_PREPARE,
1380     SEQ_LZMA_RUN,
1381     SEQ_COPY
1382   } sequence;
1383 
1384   /* Next position after decoding the compressed size of the chunk. */
1385   enum lzma2_seq next_sequence;
1386 
1387   /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
1388   uint32_t uncompressed;
1389 
1390   /*
1391    * Compressed size of LZMA chunk or compressed/uncompressed
1392    * size of uncompressed chunk (64 KiB at maximum)
1393    */
1394   uint32_t compressed;
1395 
1396   /*
1397    * True if dictionary reset is needed. This is false before
1398    * the first chunk (LZMA or uncompressed).
1399    */
1400   int need_dict_reset;
1401 
1402   /*
1403    * True if new LZMA properties are needed. This is false
1404    * before the first LZMA chunk.
1405    */
1406   int need_props;
1407 };
1408 
1409 struct xz_dec_lzma2 {
1410   /*
1411    * The order below is important on x86 to reduce code size and
1412    * it shouldn't hurt on other platforms. Everything up to and
1413    * including lzma.pos_mask are in the first 128 bytes on x86-32,
1414    * which allows using smaller instructions to access those
1415    * variables. On x86-64, fewer variables fit into the first 128
1416    * bytes, but this is still the best order without sacrificing
1417    * the readability by splitting the structures.
1418    */
1419   struct rc_dec rc;
1420   struct dictionary dict;
1421   struct lzma2_dec lzma2;
1422   struct lzma_dec lzma;
1423 
1424   /*
1425    * Temporary buffer which holds small number of input bytes between
1426    * decoder calls. See lzma2_lzma() for details.
1427    */
1428   struct {
1429     uint32_t size;
1430     uint8_t buf[3 * LZMA_IN_REQUIRED];
1431   } temp;
1432 };
1433 
1434 /**************
1435  * Dictionary *
1436  **************/
1437 
1438 /* Reset the dictionary state. */
1439 static void dict_reset(struct dictionary *dict)
1440 {
1441   dict->start = 0;
1442   dict->pos = 0;
1443   dict->limit = 0;
1444   dict->full = 0;
1445 }
1446 
1447 /* Set dictionary write limit */
1448 static void dict_limit(struct dictionary *dict, size_t out_max)
1449 {
1450   if (dict->end - dict->pos <= out_max)
1451     dict->limit = dict->end;
1452   else
1453     dict->limit = dict->pos + out_max;
1454 }
1455 
1456 /* Return true if at least one byte can be written into the dictionary. */
1457 static inline int dict_has_space(const struct dictionary *dict)
1458 {
1459   return dict->pos < dict->limit;
1460 }
1461 
1462 /*
1463  * Get a byte from the dictionary at the given distance. The distance is
1464  * assumed to valid, or as a special case, zero when the dictionary is
1465  * still empty. This special case is needed for single-call decoding to
1466  * avoid writing a '\0' to the end of the destination buffer.
1467  */
1468 static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
1469 {
1470   size_t offset = dict->pos - dist - 1;
1471 
1472   if (dist >= dict->pos)
1473     offset += dict->end;
1474 
1475   return dict->full > 0 ? dict->buf[offset] : 0;
1476 }
1477 
1478 /*
1479  * Put one byte into the dictionary. It is assumed that there is space for it.
1480  */
1481 static inline void dict_put(struct dictionary *dict, uint8_t byte)
1482 {
1483   dict->buf[dict->pos++] = byte;
1484 
1485   if (dict->full < dict->pos)
1486     dict->full = dict->pos;
1487 }
1488 
1489 /*
1490  * Repeat given number of bytes from the given distance. If the distance is
1491  * invalid, false is returned. On success, true is returned and *len is
1492  * updated to indicate how many bytes were left to be repeated.
1493  */
1494 static int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1495 {
1496   size_t back;
1497   uint32_t left;
1498 
1499   if (dist >= dict->full || dist >= dict->size) return 0;
1500 
1501   left = minof(dict->limit - dict->pos, *len);
1502   *len -= left;
1503 
1504   back = dict->pos - dist - 1;
1505   if (dist >= dict->pos)
1506     back += dict->end;
1507 
1508   do {
1509     dict->buf[dict->pos++] = dict->buf[back++];
1510     if (back == dict->end)
1511       back = 0;
1512   } while (--left > 0);
1513 
1514   if (dict->full < dict->pos)
1515     dict->full = dict->pos;
1516 
1517   return 1;
1518 }
1519 
1520 /* Copy uncompressed data as is from input to dictionary and output buffers. */
1521 static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
1522             uint32_t *left)
1523 {
1524   size_t copy_size;
1525 
1526   while (*left > 0 && b->in_pos < b->in_size
1527       && b->out_pos < b->out_size) {
1528     copy_size = minof(b->in_size - b->in_pos,
1529         b->out_size - b->out_pos);
1530     if (copy_size > dict->end - dict->pos)
1531       copy_size = dict->end - dict->pos;
1532     if (copy_size > *left)
1533       copy_size = *left;
1534 
1535     *left -= copy_size;
1536 
1537     memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
1538     dict->pos += copy_size;
1539 
1540     if (dict->full < dict->pos)
1541       dict->full = dict->pos;
1542 
1543     if (dict->pos == dict->end)
1544       dict->pos = 0;
1545 
1546     memcpy(b->out + b->out_pos, b->in + b->in_pos,
1547         copy_size);
1548 
1549     dict->start = dict->pos;
1550 
1551     b->out_pos += copy_size;
1552     b->in_pos += copy_size;
1553   }
1554 }
1555 
1556 /*
1557  * Flush pending data from dictionary to b->out. It is assumed that there is
1558  * enough space in b->out. This is guaranteed because caller uses dict_limit()
1559  * before decoding data into the dictionary.
1560  */
1561 static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
1562 {
1563   size_t copy_size = dict->pos - dict->start;
1564 
1565   if (dict->pos == dict->end)
1566     dict->pos = 0;
1567 
1568   memcpy(b->out + b->out_pos, dict->buf + dict->start,
1569       copy_size);
1570 
1571   dict->start = dict->pos;
1572   b->out_pos += copy_size;
1573   return copy_size;
1574 }
1575 
1576 /*****************
1577  * Range decoder *
1578  *****************/
1579 
1580 /* Reset the range decoder. */
1581 static void rc_reset(struct rc_dec *rc)
1582 {
1583   rc->range = (uint32_t)-1;
1584   rc->code = 0;
1585   rc->init_bytes_left = RC_INIT_BYTES;
1586 }
1587 
1588 /*
1589  * Read the first five initial bytes into rc->code if they haven't been
1590  * read already. (Yes, the first byte gets completely ignored.)
1591  */
1592 static int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1593 {
1594   while (rc->init_bytes_left > 0) {
1595     if (b->in_pos == b->in_size) return 0;
1596 
1597     rc->code = (rc->code << 8) + b->in[b->in_pos++];
1598     --rc->init_bytes_left;
1599   }
1600 
1601   return 1;
1602 }
1603 
1604 /* Return true if there may not be enough input for the next decoding loop. */
1605 static inline int rc_limit_exceeded(const struct rc_dec *rc)
1606 {
1607   return rc->in_pos > rc->in_limit;
1608 }
1609 
1610 /*
1611  * Return true if it is possible (from point of view of range decoder) that
1612  * we have reached the end of the LZMA chunk.
1613  */
1614 static inline int rc_is_finished(const struct rc_dec *rc)
1615 {
1616   return rc->code == 0;
1617 }
1618 
1619 /* Read the next input byte if needed. */
1620 static inline void rc_normalize(struct rc_dec *rc)
1621 {
1622   if (rc->range < RC_TOP_VALUE) {
1623     rc->range <<= RC_SHIFT_BITS;
1624     rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
1625   }
1626 }
1627 
1628 /*
1629  * Decode one bit. In some versions, this function has been splitted in three
1630  * functions so that the compiler is supposed to be able to more easily avoid
1631  * an extra branch. In this particular version of the LZMA decoder, this
1632  * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1633  * on x86). Using a non-splitted version results in nicer looking code too.
1634  *
1635  * NOTE: This must return an int. Do not make it return a bool or the speed
1636  * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1637  * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1638  */
1639 static inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1640 {
1641   uint32_t bound;
1642   int bit;
1643 
1644   rc_normalize(rc);
1645   bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
1646   if (rc->code < bound) {
1647     rc->range = bound;
1648     *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
1649     bit = 0;
1650   } else {
1651     rc->range -= bound;
1652     rc->code -= bound;
1653     *prob -= *prob >> RC_MOVE_BITS;
1654     bit = 1;
1655   }
1656 
1657   return bit;
1658 }
1659 
1660 /* Decode a bittree starting from the most significant bit. */
1661 static inline uint32_t rc_bittree(struct rc_dec *rc,
1662              uint16_t *probs, uint32_t limit)
1663 {
1664   uint32_t symbol = 1;
1665 
1666   do {
1667     if (rc_bit(rc, &probs[symbol]))
1668       symbol = (symbol << 1) + 1;
1669     else
1670       symbol <<= 1;
1671   } while (symbol < limit);
1672 
1673   return symbol;
1674 }
1675 
1676 /* Decode a bittree starting from the least significant bit. */
1677 static inline void rc_bittree_reverse(struct rc_dec *rc,
1678                  uint16_t *probs,
1679                  uint32_t *dest, uint32_t limit)
1680 {
1681   uint32_t symbol = 1;
1682   uint32_t i = 0;
1683 
1684   do {
1685     if (rc_bit(rc, &probs[symbol])) {
1686       symbol = (symbol << 1) + 1;
1687       *dest += 1 << i;
1688     } else {
1689       symbol <<= 1;
1690     }
1691   } while (++i < limit);
1692 }
1693 
1694 /* Decode direct bits (fixed fifty-fifty probability) */
1695 static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1696 {
1697   uint32_t mask;
1698 
1699   do {
1700     rc_normalize(rc);
1701     rc->range >>= 1;
1702     rc->code -= rc->range;
1703     mask = (uint32_t)0 - (rc->code >> 31);
1704     rc->code += rc->range & mask;
1705     *dest = (*dest << 1) + (mask + 1);
1706   } while (--limit > 0);
1707 }
1708 
1709 /********
1710  * LZMA *
1711  ********/
1712 
1713 /* Get pointer to literal coder probability array. */
1714 static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
1715 {
1716   uint32_t prev_byte = dict_get(&s->dict, 0);
1717   uint32_t low = prev_byte >> (8 - s->lzma.lc);
1718   uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
1719   return s->lzma.literal[low + high];
1720 }
1721 
1722 /* Decode a literal (one 8-bit byte) */
1723 static void lzma_literal(struct xz_dec_lzma2 *s)
1724 {
1725   uint16_t *probs;
1726   uint32_t symbol;
1727   uint32_t match_byte;
1728   uint32_t match_bit;
1729   uint32_t offset;
1730   uint32_t i;
1731 
1732   probs = lzma_literal_probs(s);
1733 
1734   if (lzma_state_is_literal(s->lzma.state)) {
1735     symbol = rc_bittree(&s->rc, probs, 0x100);
1736   } else {
1737     symbol = 1;
1738     match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
1739     offset = 0x100;
1740 
1741     do {
1742       match_bit = match_byte & offset;
1743       match_byte <<= 1;
1744       i = offset + match_bit + symbol;
1745 
1746       if (rc_bit(&s->rc, &probs[i])) {
1747         symbol = (symbol << 1) + 1;
1748         offset &= match_bit;
1749       } else {
1750         symbol <<= 1;
1751         offset &= ~match_bit;
1752       }
1753     } while (symbol < 0x100);
1754   }
1755 
1756   dict_put(&s->dict, (uint8_t)symbol);
1757   lzma_state_literal(&s->lzma.state);
1758 }
1759 
1760 /* Decode the length of the match into s->lzma.len. */
1761 static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
1762          uint32_t pos_state)
1763 {
1764   uint16_t *probs;
1765   uint32_t limit;
1766 
1767   if (!rc_bit(&s->rc, &l->choice)) {
1768     probs = l->low[pos_state];
1769     limit = LEN_LOW_SYMBOLS;
1770     s->lzma.len = MATCH_LEN_MIN;
1771   } else {
1772     if (!rc_bit(&s->rc, &l->choice2)) {
1773       probs = l->mid[pos_state];
1774       limit = LEN_MID_SYMBOLS;
1775       s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
1776     } else {
1777       probs = l->high;
1778       limit = LEN_HIGH_SYMBOLS;
1779       s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
1780           + LEN_MID_SYMBOLS;
1781     }
1782   }
1783 
1784   s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1785 }
1786 
1787 /* Decode a match. The distance will be stored in s->lzma.rep0. */
1788 static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1789 {
1790   uint16_t *probs;
1791   uint32_t dist_slot;
1792   uint32_t limit;
1793 
1794   lzma_state_match(&s->lzma.state);
1795 
1796   s->lzma.rep3 = s->lzma.rep2;
1797   s->lzma.rep2 = s->lzma.rep1;
1798   s->lzma.rep1 = s->lzma.rep0;
1799 
1800   lzma_len(s, &s->lzma.match_len_dec, pos_state);
1801 
1802   probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
1803   dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1804 
1805   if (dist_slot < DIST_MODEL_START) {
1806     s->lzma.rep0 = dist_slot;
1807   } else {
1808     limit = (dist_slot >> 1) - 1;
1809     s->lzma.rep0 = 2 + (dist_slot & 1);
1810 
1811     if (dist_slot < DIST_MODEL_END) {
1812       s->lzma.rep0 <<= limit;
1813       probs = s->lzma.dist_special + s->lzma.rep0
1814           - dist_slot - 1;
1815       rc_bittree_reverse(&s->rc, probs,
1816           &s->lzma.rep0, limit);
1817     } else {
1818       rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
1819       s->lzma.rep0 <<= ALIGN_BITS;
1820       rc_bittree_reverse(&s->rc, s->lzma.dist_align,
1821           &s->lzma.rep0, ALIGN_BITS);
1822     }
1823   }
1824 }
1825 
1826 /*
1827  * Decode a repeated match. The distance is one of the four most recently
1828  * seen matches. The distance will be stored in s->lzma.rep0.
1829  */
1830 static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1831 {
1832   uint32_t tmp;
1833 
1834   if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
1835     if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
1836         s->lzma.state][pos_state])) {
1837       lzma_state_short_rep(&s->lzma.state);
1838       s->lzma.len = 1;
1839       return;
1840     }
1841   } else {
1842     if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
1843       tmp = s->lzma.rep1;
1844     } else {
1845       if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
1846         tmp = s->lzma.rep2;
1847       } else {
1848         tmp = s->lzma.rep3;
1849         s->lzma.rep3 = s->lzma.rep2;
1850       }
1851 
1852       s->lzma.rep2 = s->lzma.rep1;
1853     }
1854 
1855     s->lzma.rep1 = s->lzma.rep0;
1856     s->lzma.rep0 = tmp;
1857   }
1858 
1859   lzma_state_long_rep(&s->lzma.state);
1860   lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1861 }
1862 
1863 /* LZMA decoder core */
1864 static int lzma_main(struct xz_dec_lzma2 *s)
1865 {
1866   uint32_t pos_state;
1867 
1868   /*
1869    * If the dictionary was reached during the previous call, try to
1870    * finish the possibly pending repeat in the dictionary.
1871    */
1872   if (dict_has_space(&s->dict) && s->lzma.len > 0)
1873     dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
1874 
1875   /*
1876    * Decode more LZMA symbols. One iteration may consume up to
1877    * LZMA_IN_REQUIRED - 1 bytes.
1878    */
1879   while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
1880     pos_state = s->dict.pos & s->lzma.pos_mask;
1881 
1882     if (!rc_bit(&s->rc, &s->lzma.is_match[
1883         s->lzma.state][pos_state])) {
1884       lzma_literal(s);
1885     } else {
1886       if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
1887         lzma_rep_match(s, pos_state);
1888       else
1889         lzma_match(s, pos_state);
1890 
1891       if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
1892         return 0;
1893     }
1894   }
1895 
1896   /*
1897    * Having the range decoder always normalized when we are outside
1898    * this function makes it easier to correctly handle end of the chunk.
1899    */
1900   rc_normalize(&s->rc);
1901 
1902   return 1;
1903 }
1904 
1905 /*
1906  * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1907  * here, because LZMA state may be reset without resetting the dictionary.
1908  */
1909 static void lzma_reset(struct xz_dec_lzma2 *s)
1910 {
1911   uint16_t *probs;
1912   size_t i;
1913 
1914   s->lzma.state = STATE_LIT_LIT;
1915   s->lzma.rep0 = 0;
1916   s->lzma.rep1 = 0;
1917   s->lzma.rep2 = 0;
1918   s->lzma.rep3 = 0;
1919 
1920   /*
1921    * All probabilities are initialized to the same value. This hack
1922    * makes the code smaller by avoiding a separate loop for each
1923    * probability array.
1924    *
1925    * This could be optimized so that only that part of literal
1926    * probabilities that are actually required. In the common case
1927    * we would write 12 KiB less.
1928    */
1929   probs = s->lzma.is_match[0];
1930   for (i = 0; i < PROBS_TOTAL; ++i)
1931     probs[i] = RC_BIT_MODEL_TOTAL / 2;
1932 
1933   rc_reset(&s->rc);
1934 }
1935 
1936 /*
1937  * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1938  * from the decoded lp and pb values. On success, the LZMA decoder state is
1939  * reset and true is returned.
1940  */
1941 static int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1942 {
1943   if (props > (4 * 5 + 4) * 9 + 8)
1944     return 0;
1945 
1946   s->lzma.pos_mask = 0;
1947   while (props >= 9 * 5) {
1948     props -= 9 * 5;
1949     ++s->lzma.pos_mask;
1950   }
1951 
1952   s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1953 
1954   s->lzma.literal_pos_mask = 0;
1955   while (props >= 9) {
1956     props -= 9;
1957     ++s->lzma.literal_pos_mask;
1958   }
1959 
1960   s->lzma.lc = props;
1961 
1962   if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
1963     return 0;
1964 
1965   s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1966 
1967   lzma_reset(s);
1968 
1969   return 1;
1970 }
1971 
1972 /*********
1973  * LZMA2 *
1974  *********/
1975 
1976 /*
1977  * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1978  * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1979  * wrapper function takes care of making the LZMA decoder's assumption safe.
1980  *
1981  * As long as there is plenty of input left to be decoded in the current LZMA
1982  * chunk, we decode directly from the caller-supplied input buffer until
1983  * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1984  * s->temp.buf, which (hopefully) gets filled on the next call to this
1985  * function. We decode a few bytes from the temporary buffer so that we can
1986  * continue decoding from the caller-supplied input buffer again.
1987  */
1988 static int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1989 {
1990   size_t in_avail;
1991   uint32_t tmp;
1992 
1993   in_avail = b->in_size - b->in_pos;
1994   if (s->temp.size > 0 || s->lzma2.compressed == 0) {
1995     tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
1996     if (tmp > s->lzma2.compressed - s->temp.size)
1997       tmp = s->lzma2.compressed - s->temp.size;
1998     if (tmp > in_avail)
1999       tmp = in_avail;
2000 
2001     memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
2002 
2003     if (s->temp.size + tmp == s->lzma2.compressed) {
2004       memset(s->temp.buf + s->temp.size + tmp, 0,
2005           sizeof(s->temp.buf)
2006             - s->temp.size - tmp);
2007       s->rc.in_limit = s->temp.size + tmp;
2008     } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
2009       s->temp.size += tmp;
2010       b->in_pos += tmp;
2011       return 1;
2012     } else {
2013       s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
2014     }
2015 
2016     s->rc.in = s->temp.buf;
2017     s->rc.in_pos = 0;
2018 
2019     if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
2020       return 0;
2021 
2022     s->lzma2.compressed -= s->rc.in_pos;
2023 
2024     if (s->rc.in_pos < s->temp.size) {
2025       s->temp.size -= s->rc.in_pos;
2026       memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
2027           s->temp.size);
2028       return 1;
2029     }
2030 
2031     b->in_pos += s->rc.in_pos - s->temp.size;
2032     s->temp.size = 0;
2033   }
2034 
2035   in_avail = b->in_size - b->in_pos;
2036   if (in_avail >= LZMA_IN_REQUIRED) {
2037     s->rc.in = b->in;
2038     s->rc.in_pos = b->in_pos;
2039 
2040     if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
2041       s->rc.in_limit = b->in_pos + s->lzma2.compressed;
2042     else
2043       s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
2044 
2045     if (!lzma_main(s))
2046       return 0;
2047 
2048     in_avail = s->rc.in_pos - b->in_pos;
2049     if (in_avail > s->lzma2.compressed) return 0;
2050 
2051     s->lzma2.compressed -= in_avail;
2052     b->in_pos = s->rc.in_pos;
2053   }
2054 
2055   in_avail = b->in_size - b->in_pos;
2056   if (in_avail < LZMA_IN_REQUIRED) {
2057     if (in_avail > s->lzma2.compressed)
2058       in_avail = s->lzma2.compressed;
2059 
2060     memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
2061     s->temp.size = in_avail;
2062     b->in_pos += in_avail;
2063   }
2064 
2065   return 1;
2066 }
2067 
2068 /*
2069  * Take care of the LZMA2 control layer, and forward the job of actual LZMA
2070  * decoding or copying of uncompressed chunks to other functions.
2071  */
2072 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
2073                struct xz_buf *b)
2074 {
2075   uint32_t tmp;
2076 
2077   while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
2078     switch (s->lzma2.sequence) {
2079     case SEQ_CONTROL:
2080       /*
2081        * LZMA2 control byte
2082        *
2083        * Exact values:
2084        *   0x00   End marker
2085        *   0x01   Dictionary reset followed by
2086        *          an uncompressed chunk
2087        *   0x02   Uncompressed chunk (no dictionary reset)
2088        *
2089        * Highest three bits (s->control & 0xE0):
2090        *   0xE0   Dictionary reset, new properties and state
2091        *          reset, followed by LZMA compressed chunk
2092        *   0xC0   New properties and state reset, followed
2093        *          by LZMA compressed chunk (no dictionary
2094        *          reset)
2095        *   0xA0   State reset using old properties,
2096        *          followed by LZMA compressed chunk (no
2097        *          dictionary reset)
2098        *   0x80   LZMA chunk (no dictionary or state reset)
2099        *
2100        * For LZMA compressed chunks, the lowest five bits
2101        * (s->control & 1F) are the highest bits of the
2102        * uncompressed size (bits 16-20).
2103        *
2104        * A new LZMA2 stream must begin with a dictionary
2105        * reset. The first LZMA chunk must set new
2106        * properties and reset the LZMA state.
2107        *
2108        * Values that don't match anything described above
2109        * are invalid and we return XZ_DATA_ERROR.
2110        */
2111       tmp = b->in[b->in_pos++];
2112 
2113       if (tmp == 0x00)
2114         return XZ_STREAM_END;
2115 
2116       if (tmp >= 0xE0 || tmp == 0x01) {
2117         s->lzma2.need_props = 1;
2118         s->lzma2.need_dict_reset = 0;
2119         dict_reset(&s->dict);
2120       } else if (s->lzma2.need_dict_reset) {
2121         return XZ_DATA_ERROR;
2122       }
2123 
2124       if (tmp >= 0x80) {
2125         s->lzma2.uncompressed = (tmp & 0x1F) << 16;
2126         s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
2127 
2128         if (tmp >= 0xC0) {
2129           /*
2130            * When there are new properties,
2131            * state reset is done at
2132            * SEQ_PROPERTIES.
2133            */
2134           s->lzma2.need_props = 0;
2135           s->lzma2.next_sequence
2136               = SEQ_PROPERTIES;
2137 
2138         } else if (s->lzma2.need_props) {
2139           return XZ_DATA_ERROR;
2140 
2141         } else {
2142           s->lzma2.next_sequence
2143               = SEQ_LZMA_PREPARE;
2144           if (tmp >= 0xA0)
2145             lzma_reset(s);
2146         }
2147       } else {
2148         if (tmp > 0x02)
2149           return XZ_DATA_ERROR;
2150 
2151         s->lzma2.sequence = SEQ_COMPRESSED_0;
2152         s->lzma2.next_sequence = SEQ_COPY;
2153       }
2154 
2155       break;
2156 
2157     case SEQ_UNCOMPRESSED_1:
2158       s->lzma2.uncompressed
2159           += (uint32_t)b->in[b->in_pos++] << 8;
2160       s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
2161       break;
2162 
2163     case SEQ_UNCOMPRESSED_2:
2164       s->lzma2.uncompressed
2165           += (uint32_t)b->in[b->in_pos++] + 1;
2166       s->lzma2.sequence = SEQ_COMPRESSED_0;
2167       break;
2168 
2169     case SEQ_COMPRESSED_0:
2170       s->lzma2.compressed
2171           = (uint32_t)b->in[b->in_pos++] << 8;
2172       s->lzma2.sequence = SEQ_COMPRESSED_1;
2173       break;
2174 
2175     case SEQ_COMPRESSED_1:
2176       s->lzma2.compressed
2177           += (uint32_t)b->in[b->in_pos++] + 1;
2178       s->lzma2.sequence = s->lzma2.next_sequence;
2179       break;
2180 
2181     case SEQ_PROPERTIES:
2182       if (!lzma_props(s, b->in[b->in_pos++]))
2183         return XZ_DATA_ERROR;
2184 
2185       s->lzma2.sequence = SEQ_LZMA_PREPARE;
2186 
2187     case SEQ_LZMA_PREPARE:
2188       if (s->lzma2.compressed < RC_INIT_BYTES)
2189         return XZ_DATA_ERROR;
2190 
2191       if (!rc_read_init(&s->rc, b))
2192         return XZ_OK;
2193 
2194       s->lzma2.compressed -= RC_INIT_BYTES;
2195       s->lzma2.sequence = SEQ_LZMA_RUN;
2196 
2197     case SEQ_LZMA_RUN:
2198       /*
2199        * Set dictionary limit to indicate how much we want
2200        * to be encoded at maximum. Decode new data into the
2201        * dictionary. Flush the new data from dictionary to
2202        * b->out. Check if we finished decoding this chunk.
2203        * In case the dictionary got full but we didn't fill
2204        * the output buffer yet, we may run this loop
2205        * multiple times without changing s->lzma2.sequence.
2206        */
2207       dict_limit(&s->dict, minof(b->out_size - b->out_pos,
2208           s->lzma2.uncompressed));
2209       if (!lzma2_lzma(s, b))
2210         return XZ_DATA_ERROR;
2211 
2212       s->lzma2.uncompressed -= dict_flush(&s->dict, b);
2213 
2214       if (s->lzma2.uncompressed == 0) {
2215         if (s->lzma2.compressed > 0 || s->lzma.len > 0
2216             || !rc_is_finished(&s->rc))
2217           return XZ_DATA_ERROR;
2218 
2219         rc_reset(&s->rc);
2220         s->lzma2.sequence = SEQ_CONTROL;
2221 
2222       } else if (b->out_pos == b->out_size
2223           || (b->in_pos == b->in_size
2224             && s->temp.size
2225             < s->lzma2.compressed)) {
2226         return XZ_OK;
2227       }
2228 
2229       break;
2230 
2231     case SEQ_COPY:
2232       dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
2233       if (s->lzma2.compressed > 0)
2234         return XZ_OK;
2235 
2236       s->lzma2.sequence = SEQ_CONTROL;
2237       break;
2238     }
2239   }
2240 
2241   return XZ_OK;
2242 }
2243 
2244 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
2245 {
2246   struct xz_dec_lzma2 *s = malloc(sizeof(*s));
2247   if (s == NULL)
2248     return NULL;
2249 
2250   s->dict.size_max = dict_max;
2251   s->dict.buf = NULL;
2252   s->dict.allocated = 0;
2253 
2254   return s;
2255 }
2256 
2257 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2258 {
2259   /* This limits dictionary size to 3 GiB to keep parsing simpler. */
2260   if (props > 39)
2261     return XZ_OPTIONS_ERROR;
2262 
2263   s->dict.size = 2 + (props & 1);
2264   s->dict.size <<= (props >> 1) + 11;
2265 
2266   if (s->dict.size > s->dict.size_max)
2267     return XZ_MEMLIMIT_ERROR;
2268 
2269   s->dict.end = s->dict.size;
2270 
2271   if (s->dict.allocated < s->dict.size) {
2272     free(s->dict.buf);
2273     s->dict.buf = malloc(s->dict.size);
2274     if (s->dict.buf == NULL) {
2275       s->dict.allocated = 0;
2276       return XZ_MEM_ERROR;
2277     }
2278   }
2279 
2280   s->lzma.len = 0;
2281 
2282   s->lzma2.sequence = SEQ_CONTROL;
2283   s->lzma2.need_dict_reset = 1;
2284 
2285   s->temp.size = 0;
2286 
2287   return XZ_OK;
2288 }
2289 
2290 /*
2291  * .xz Stream decoder
2292  */
2293 
2294 
2295 // BEGIN xz_stream.h
2296 /*
2297  * Definitions for handling the .xz file format
2298  */
2299 
2300 /*
2301  * See the .xz file format specification at
2302  * http://tukaani.org/xz/xz-file-format.txt
2303  * to understand the container format.
2304  */
2305 
2306 #define STREAM_HEADER_SIZE 12
2307 
2308 #define HEADER_MAGIC "\3757zXZ"
2309 #define HEADER_MAGIC_SIZE 6
2310 
2311 #define FOOTER_MAGIC "YZ"
2312 #define FOOTER_MAGIC_SIZE 2
2313 
2314 /*
2315  * Variable-length integer can hold a 63-bit unsigned integer or a special
2316  * value indicating that the value is unknown.
2317  *
2318  * Experimental: vli_type can be defined to uint32_t to save a few bytes
2319  * in code size (no effect on speed). Doing so limits the uncompressed and
2320  * compressed size of the file to less than 256 MiB and may also weaken
2321  * error detection slightly.
2322  */
2323 typedef uint64_t vli_type;
2324 
2325 #define VLI_MAX ((vli_type)-1 / 2)
2326 #define VLI_UNKNOWN ((vli_type)-1)
2327 
2328 /* Maximum encoded size of a VLI */
2329 #define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
2330 
2331 /* Integrity Check types */
2332 enum xz_check {
2333   XZ_CHECK_NONE = 0,
2334   XZ_CHECK_CRC32 = 1,
2335   XZ_CHECK_CRC64 = 4,
2336   XZ_CHECK_SHA256 = 10
2337 };
2338 
2339 /* Maximum possible Check ID */
2340 #define XZ_CHECK_MAX 15
2341 // END xz_stream.h
2342 
2343 #define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
2344 
2345 /* Hash used to validate the Index field */
2346 struct xz_dec_hash {
2347   vli_type unpadded;
2348   vli_type uncompressed;
2349   uint32_t crc32;
2350 };
2351 
2352 struct xz_dec {
2353   /* Position in dec_main() */
2354   enum {
2355     SEQ_STREAM_HEADER,
2356     SEQ_BLOCK_START,
2357     SEQ_BLOCK_HEADER,
2358     SEQ_BLOCK_UNCOMPRESS,
2359     SEQ_BLOCK_PADDING,
2360     SEQ_BLOCK_CHECK,
2361     SEQ_INDEX,
2362     SEQ_INDEX_PADDING,
2363     SEQ_INDEX_CRC32,
2364     SEQ_STREAM_FOOTER
2365   } sequence;
2366 
2367   /* Position in variable-length integers and Check fields */
2368   uint32_t pos;
2369 
2370   /* Variable-length integer decoded by dec_vli() */
2371   vli_type vli;
2372 
2373   /* Saved in_pos and out_pos */
2374   size_t in_start;
2375   size_t out_start;
2376 
2377   /* CRC32 or CRC64 value in Block or CRC32 value in Index */
2378   uint64_t crc;
2379 
2380   /* Type of the integrity check calculated from uncompressed data */
2381   enum xz_check check_type;
2382 
2383   /*
2384    * True if the next call to xz_dec_run() is allowed to return
2385    * XZ_BUF_ERROR.
2386    */
2387   int allow_buf_error;
2388 
2389   /* Information stored in Block Header */
2390   struct {
2391     /*
2392      * Value stored in the Compressed Size field, or
2393      * VLI_UNKNOWN if Compressed Size is not present.
2394      */
2395     vli_type compressed;
2396 
2397     /*
2398      * Value stored in the Uncompressed Size field, or
2399      * VLI_UNKNOWN if Uncompressed Size is not present.
2400      */
2401     vli_type uncompressed;
2402 
2403     /* Size of the Block Header field */
2404     uint32_t size;
2405   } block_header;
2406 
2407   /* Information collected when decoding Blocks */
2408   struct {
2409     /* Observed compressed size of the current Block */
2410     vli_type compressed;
2411 
2412     /* Observed uncompressed size of the current Block */
2413     vli_type uncompressed;
2414 
2415     /* Number of Blocks decoded so far */
2416     vli_type count;
2417 
2418     /*
2419      * Hash calculated from the Block sizes. This is used to
2420      * validate the Index field.
2421      */
2422     struct xz_dec_hash hash;
2423   } block;
2424 
2425   /* Variables needed when verifying the Index field */
2426   struct {
2427     /* Position in dec_index() */
2428     enum {
2429       SEQ_INDEX_COUNT,
2430       SEQ_INDEX_UNPADDED,
2431       SEQ_INDEX_UNCOMPRESSED
2432     } sequence;
2433 
2434     /* Size of the Index in bytes */
2435     vli_type size;
2436 
2437     /* Number of Records (matches block.count in valid files) */
2438     vli_type count;
2439 
2440     /*
2441      * Hash calculated from the Records (matches block.hash in
2442      * valid files).
2443      */
2444     struct xz_dec_hash hash;
2445   } index;
2446 
2447   /*
2448    * Temporary buffer needed to hold Stream Header, Block Header,
2449    * and Stream Footer. The Block Header is the biggest (1 KiB)
2450    * so we reserve space according to that. buf[] has to be aligned
2451    * to a multiple of four bytes; the size_t variables before it
2452    * should guarantee this.
2453    */
2454   struct {
2455     size_t pos;
2456     size_t size;
2457     uint8_t buf[1024];
2458   } temp;
2459 
2460   struct xz_dec_lzma2 *lzma2;
2461 
2462 #ifdef XZ_DEC_BCJ
2463   struct xz_dec_bcj *bcj;
2464   int bcj_active;
2465 #endif
2466 };
2467 
2468 /* Sizes of the Check field with different Check IDs */
2469 static const uint8_t check_sizes[16] = {
2470   0,
2471   4, 4, 4,
2472   8, 8, 8,
2473   16, 16, 16,
2474   32, 32, 32,
2475   64, 64, 64
2476 };
2477 
2478 /*
2479  * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2480  * must have set s->temp.pos to indicate how much data we are supposed
2481  * to copy into s->temp.buf. Return true once s->temp.pos has reached
2482  * s->temp.size.
2483  */
2484 static int fill_temp(struct xz_dec *s, struct xz_buf *b)
2485 {
2486   size_t copy_size = minof(b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2487 
2488   memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
2489   b->in_pos += copy_size;
2490   s->temp.pos += copy_size;
2491 
2492   if (s->temp.pos == s->temp.size) {
2493     s->temp.pos = 0;
2494     return 1;
2495   }
2496 
2497   return 0;
2498 }
2499 
2500 /* Decode a variable-length integer (little-endian base-128 encoding) */
2501 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
2502          size_t *in_pos, size_t in_size)
2503 {
2504   uint8_t byte;
2505 
2506   if (s->pos == 0)
2507     s->vli = 0;
2508 
2509   while (*in_pos < in_size) {
2510     byte = in[*in_pos];
2511     ++*in_pos;
2512 
2513     s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2514 
2515     if ((byte & 0x80) == 0) {
2516       /* Don't allow non-minimal encodings. */
2517       if (byte == 0 && s->pos != 0)
2518         return XZ_DATA_ERROR;
2519 
2520       s->pos = 0;
2521       return XZ_STREAM_END;
2522     }
2523 
2524     s->pos += 7;
2525     if (s->pos == 7 * VLI_BYTES_MAX)
2526       return XZ_DATA_ERROR;
2527   }
2528 
2529   return XZ_OK;
2530 }
2531 
2532 /*
2533  * Decode the Compressed Data field from a Block. Update and validate
2534  * the observed compressed and uncompressed sizes of the Block so that
2535  * they don't exceed the values possibly stored in the Block Header
2536  * (validation assumes that no integer overflow occurs, since vli_type
2537  * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
2538  * the CRC32 or CRC64 field was indicated in Stream Header.
2539  *
2540  * Once the decoding is finished, validate that the observed sizes match
2541  * the sizes possibly stored in the Block Header. Update the hash and
2542  * Block count, which are later used to validate the Index field.
2543  */
2544 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
2545 {
2546   enum xz_ret ret;
2547 
2548   s->in_start = b->in_pos;
2549   s->out_start = b->out_pos;
2550 
2551 #ifdef XZ_DEC_BCJ
2552   if (s->bcj_active)
2553     ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
2554   else
2555 #endif
2556     ret = xz_dec_lzma2_run(s->lzma2, b);
2557 
2558   s->block.compressed += b->in_pos - s->in_start;
2559   s->block.uncompressed += b->out_pos - s->out_start;
2560 
2561   /*
2562    * There is no need to separately check for VLI_UNKNOWN, since
2563    * the observed sizes are always smaller than VLI_UNKNOWN.
2564    */
2565   if (s->block.compressed > s->block_header.compressed
2566       || s->block.uncompressed
2567         > s->block_header.uncompressed)
2568     return XZ_DATA_ERROR;
2569 
2570   if (s->check_type == XZ_CHECK_CRC32)
2571     s->crc = xz_crc32(b->out + s->out_start,
2572         b->out_pos - s->out_start, s->crc);
2573   else if (s->check_type == XZ_CHECK_CRC64)
2574     s->crc = ~(s->crc);
2575     size_t size = b->out_pos - s->out_start;
2576     uint8_t *buf = b->out + s->out_start;
2577     while (size) {
2578       s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
2579       --size;
2580     }
2581     s->crc=~(s->crc);
2582 
2583   if (ret == XZ_STREAM_END) {
2584     if (s->block_header.compressed != VLI_UNKNOWN
2585         && s->block_header.compressed
2586           != s->block.compressed)
2587       return XZ_DATA_ERROR;
2588 
2589     if (s->block_header.uncompressed != VLI_UNKNOWN
2590         && s->block_header.uncompressed
2591           != s->block.uncompressed)
2592       return XZ_DATA_ERROR;
2593 
2594     s->block.hash.unpadded += s->block_header.size
2595         + s->block.compressed;
2596 
2597     s->block.hash.unpadded += check_sizes[s->check_type];
2598 
2599     s->block.hash.uncompressed += s->block.uncompressed;
2600     s->block.hash.crc32 = xz_crc32(
2601         (const uint8_t *)&s->block.hash,
2602         sizeof(s->block.hash), s->block.hash.crc32);
2603 
2604     ++s->block.count;
2605   }
2606 
2607   return ret;
2608 }
2609 
2610 /* Update the Index size and the CRC32 value. */
2611 static void index_update(struct xz_dec *s, const struct xz_buf *b)
2612 {
2613   size_t in_used = b->in_pos - s->in_start;
2614   s->index.size += in_used;
2615   s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
2616 }
2617 
2618 /*
2619  * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2620  * fields from the Index field. That is, Index Padding and CRC32 are not
2621  * decoded by this function.
2622  *
2623  * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2624  * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2625  */
2626 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
2627 {
2628   enum xz_ret ret;
2629 
2630   do {
2631     ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
2632     if (ret != XZ_STREAM_END) {
2633       index_update(s, b);
2634       return ret;
2635     }
2636 
2637     switch (s->index.sequence) {
2638     case SEQ_INDEX_COUNT:
2639       s->index.count = s->vli;
2640 
2641       /*
2642        * Validate that the Number of Records field
2643        * indicates the same number of Records as
2644        * there were Blocks in the Stream.
2645        */
2646       if (s->index.count != s->block.count)
2647         return XZ_DATA_ERROR;
2648 
2649       s->index.sequence = SEQ_INDEX_UNPADDED;
2650       break;
2651 
2652     case SEQ_INDEX_UNPADDED:
2653       s->index.hash.unpadded += s->vli;
2654       s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
2655       break;
2656 
2657     case SEQ_INDEX_UNCOMPRESSED:
2658       s->index.hash.uncompressed += s->vli;
2659       s->index.hash.crc32 = xz_crc32(
2660           (const uint8_t *)&s->index.hash,
2661           sizeof(s->index.hash),
2662           s->index.hash.crc32);
2663       --s->index.count;
2664       s->index.sequence = SEQ_INDEX_UNPADDED;
2665       break;
2666     }
2667   } while (s->index.count > 0);
2668 
2669   return XZ_STREAM_END;
2670 }
2671 
2672 /*
2673  * Validate that the next four or eight input bytes match the value
2674  * of s->crc. s->pos must be zero when starting to validate the first byte.
2675  * The "bits" argument allows using the same code for both CRC32 and CRC64.
2676  */
2677 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
2678         uint32_t bits)
2679 {
2680   do {
2681     if (b->in_pos == b->in_size)
2682       return XZ_OK;
2683 
2684     if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
2685       return XZ_DATA_ERROR;
2686 
2687     s->pos += 8;
2688 
2689   } while (s->pos < bits);
2690 
2691   s->crc = 0;
2692   s->pos = 0;
2693 
2694   return XZ_STREAM_END;
2695 }
2696 
2697 /*
2698  * Skip over the Check field when the Check ID is not supported.
2699  * Returns true once the whole Check field has been skipped over.
2700  */
2701 static int check_skip(struct xz_dec *s, struct xz_buf *b)
2702 {
2703   while (s->pos < check_sizes[s->check_type]) {
2704     if (b->in_pos == b->in_size) return 0;
2705 
2706     ++b->in_pos;
2707     ++s->pos;
2708   }
2709 
2710   s->pos = 0;
2711 
2712   return 1;
2713 }
2714 
2715 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
2716 static enum xz_ret dec_stream_header(struct xz_dec *s)
2717 {
2718   if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
2719     return XZ_FORMAT_ERROR;
2720 
2721   if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
2722       != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
2723     return XZ_DATA_ERROR;
2724 
2725   if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
2726     return XZ_OPTIONS_ERROR;
2727 
2728   /*
2729    * Of integrity checks, we support none (Check ID = 0),
2730    * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
2731    * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
2732    * check types too, but then the check won't be verified and
2733    * a warning (XZ_UNSUPPORTED_CHECK) will be given.
2734    */
2735   s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2736 
2737   if (s->check_type > XZ_CHECK_MAX)
2738     return XZ_OPTIONS_ERROR;
2739 
2740   if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
2741     return XZ_UNSUPPORTED_CHECK;
2742 
2743   return XZ_OK;
2744 }
2745 
2746 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
2747 static enum xz_ret dec_stream_footer(struct xz_dec *s)
2748 {
2749   if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
2750     return XZ_DATA_ERROR;
2751 
2752   if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
2753     return XZ_DATA_ERROR;
2754 
2755   /*
2756    * Validate Backward Size. Note that we never added the size of the
2757    * Index CRC32 field to s->index.size, thus we use s->index.size / 4
2758    * instead of s->index.size / 4 - 1.
2759    */
2760   if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
2761     return XZ_DATA_ERROR;
2762 
2763   if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
2764     return XZ_DATA_ERROR;
2765 
2766   /*
2767    * Use XZ_STREAM_END instead of XZ_OK to be more convenient
2768    * for the caller.
2769    */
2770   return XZ_STREAM_END;
2771 }
2772 
2773 /* Decode the Block Header and initialize the filter chain. */
2774 static enum xz_ret dec_block_header(struct xz_dec *s)
2775 {
2776   enum xz_ret ret;
2777 
2778   /*
2779    * Validate the CRC32. We know that the temp buffer is at least
2780    * eight bytes so this is safe.
2781    */
2782   s->temp.size -= 4;
2783   if (xz_crc32(s->temp.buf, s->temp.size, 0)
2784       != get_le32(s->temp.buf + s->temp.size))
2785     return XZ_DATA_ERROR;
2786 
2787   s->temp.pos = 2;
2788 
2789   /*
2790    * Catch unsupported Block Flags. We support only one or two filters
2791    * in the chain, so we catch that with the same test.
2792    */
2793 #ifdef XZ_DEC_BCJ
2794   if (s->temp.buf[1] & 0x3E)
2795 #else
2796   if (s->temp.buf[1] & 0x3F)
2797 #endif
2798     return XZ_OPTIONS_ERROR;
2799 
2800   /* Compressed Size */
2801   if (s->temp.buf[1] & 0x40) {
2802     if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2803           != XZ_STREAM_END)
2804       return XZ_DATA_ERROR;
2805 
2806     s->block_header.compressed = s->vli;
2807   } else {
2808     s->block_header.compressed = VLI_UNKNOWN;
2809   }
2810 
2811   /* Uncompressed Size */
2812   if (s->temp.buf[1] & 0x80) {
2813     if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2814         != XZ_STREAM_END)
2815       return XZ_DATA_ERROR;
2816 
2817     s->block_header.uncompressed = s->vli;
2818   } else {
2819     s->block_header.uncompressed = VLI_UNKNOWN;
2820   }
2821 
2822 #ifdef XZ_DEC_BCJ
2823   /* If there are two filters, the first one must be a BCJ filter. */
2824   s->bcj_active = s->temp.buf[1] & 0x01;
2825   if (s->bcj_active) {
2826     if (s->temp.size - s->temp.pos < 2)
2827       return XZ_OPTIONS_ERROR;
2828 
2829     ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
2830     if (ret != XZ_OK)
2831       return ret;
2832 
2833     /*
2834      * We don't support custom start offset,
2835      * so Size of Properties must be zero.
2836      */
2837     if (s->temp.buf[s->temp.pos++] != 0x00)
2838       return XZ_OPTIONS_ERROR;
2839   }
2840 #endif
2841 
2842   /* Valid Filter Flags always take at least two bytes. */
2843   if (s->temp.size - s->temp.pos < 2)
2844     return XZ_DATA_ERROR;
2845 
2846   /* Filter ID = LZMA2 */
2847   if (s->temp.buf[s->temp.pos++] != 0x21)
2848     return XZ_OPTIONS_ERROR;
2849 
2850   /* Size of Properties = 1-byte Filter Properties */
2851   if (s->temp.buf[s->temp.pos++] != 0x01)
2852     return XZ_OPTIONS_ERROR;
2853 
2854   /* Filter Properties contains LZMA2 dictionary size. */
2855   if (s->temp.size - s->temp.pos < 1)
2856     return XZ_DATA_ERROR;
2857 
2858   ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
2859   if (ret != XZ_OK)
2860     return ret;
2861 
2862   /* The rest must be Header Padding. */
2863   while (s->temp.pos < s->temp.size)
2864     if (s->temp.buf[s->temp.pos++] != 0x00)
2865       return XZ_OPTIONS_ERROR;
2866 
2867   s->temp.pos = 0;
2868   s->block.compressed = 0;
2869   s->block.uncompressed = 0;
2870 
2871   return XZ_OK;
2872 }
2873 
2874 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
2875 {
2876   enum xz_ret ret;
2877 
2878   /*
2879    * Store the start position for the case when we are in the middle
2880    * of the Index field.
2881    */
2882   s->in_start = b->in_pos;
2883 
2884   for (;;) {
2885     switch (s->sequence) {
2886     case SEQ_STREAM_HEADER:
2887       /*
2888        * Stream Header is copied to s->temp, and then
2889        * decoded from there. This way if the caller
2890        * gives us only little input at a time, we can
2891        * still keep the Stream Header decoding code
2892        * simple. Similar approach is used in many places
2893        * in this file.
2894        */
2895       if (!fill_temp(s, b))
2896         return XZ_OK;
2897 
2898       /*
2899        * If dec_stream_header() returns
2900        * XZ_UNSUPPORTED_CHECK, it is still possible
2901        * to continue decoding if working in multi-call
2902        * mode. Thus, update s->sequence before calling
2903        * dec_stream_header().
2904        */
2905       s->sequence = SEQ_BLOCK_START;
2906 
2907       ret = dec_stream_header(s);
2908       if (ret != XZ_OK)
2909         return ret;
2910 
2911     case SEQ_BLOCK_START:
2912       /* We need one byte of input to continue. */
2913       if (b->in_pos == b->in_size)
2914         return XZ_OK;
2915 
2916       /* See if this is the beginning of the Index field. */
2917       if (b->in[b->in_pos] == 0) {
2918         s->in_start = b->in_pos++;
2919         s->sequence = SEQ_INDEX;
2920         break;
2921       }
2922 
2923       /*
2924        * Calculate the size of the Block Header and
2925        * prepare to decode it.
2926        */
2927       s->block_header.size
2928         = ((uint32_t)b->in[b->in_pos] + 1) * 4;
2929 
2930       s->temp.size = s->block_header.size;
2931       s->temp.pos = 0;
2932       s->sequence = SEQ_BLOCK_HEADER;
2933 
2934     case SEQ_BLOCK_HEADER:
2935       if (!fill_temp(s, b))
2936         return XZ_OK;
2937 
2938       ret = dec_block_header(s);
2939       if (ret != XZ_OK)
2940         return ret;
2941 
2942       s->sequence = SEQ_BLOCK_UNCOMPRESS;
2943 
2944     case SEQ_BLOCK_UNCOMPRESS:
2945       ret = dec_block(s, b);
2946       if (ret != XZ_STREAM_END)
2947         return ret;
2948 
2949       s->sequence = SEQ_BLOCK_PADDING;
2950 
2951     case SEQ_BLOCK_PADDING:
2952       /*
2953        * Size of Compressed Data + Block Padding
2954        * must be a multiple of four. We don't need
2955        * s->block.compressed for anything else
2956        * anymore, so we use it here to test the size
2957        * of the Block Padding field.
2958        */
2959       while (s->block.compressed & 3) {
2960         if (b->in_pos == b->in_size)
2961           return XZ_OK;
2962 
2963         if (b->in[b->in_pos++] != 0)
2964           return XZ_DATA_ERROR;
2965 
2966         ++s->block.compressed;
2967       }
2968 
2969       s->sequence = SEQ_BLOCK_CHECK;
2970 
2971     case SEQ_BLOCK_CHECK:
2972       if (s->check_type == XZ_CHECK_CRC32) {
2973         ret = crc_validate(s, b, 32);
2974         if (ret != XZ_STREAM_END)
2975           return ret;
2976       }
2977       else if (IS_CRC64(s->check_type)) {
2978         ret = crc_validate(s, b, 64);
2979         if (ret != XZ_STREAM_END)
2980           return ret;
2981       }
2982       else if (!check_skip(s, b)) {
2983         return XZ_OK;
2984       }
2985 
2986       s->sequence = SEQ_BLOCK_START;
2987       break;
2988 
2989     case SEQ_INDEX:
2990       ret = dec_index(s, b);
2991       if (ret != XZ_STREAM_END)
2992         return ret;
2993 
2994       s->sequence = SEQ_INDEX_PADDING;
2995 
2996     case SEQ_INDEX_PADDING:
2997       while ((s->index.size + (b->in_pos - s->in_start))
2998           & 3) {
2999         if (b->in_pos == b->in_size) {
3000           index_update(s, b);
3001           return XZ_OK;
3002         }
3003 
3004         if (b->in[b->in_pos++] != 0)
3005           return XZ_DATA_ERROR;
3006       }
3007 
3008       /* Finish the CRC32 value and Index size. */
3009       index_update(s, b);
3010 
3011       /* Compare the hashes to validate the Index field. */
3012       if (!memeq(&s->block.hash, &s->index.hash,
3013           sizeof(s->block.hash)))
3014         return XZ_DATA_ERROR;
3015 
3016       s->sequence = SEQ_INDEX_CRC32;
3017 
3018     case SEQ_INDEX_CRC32:
3019       ret = crc_validate(s, b, 32);
3020       if (ret != XZ_STREAM_END)
3021         return ret;
3022 
3023       s->temp.size = STREAM_HEADER_SIZE;
3024       s->sequence = SEQ_STREAM_FOOTER;
3025 
3026     case SEQ_STREAM_FOOTER:
3027       if (!fill_temp(s, b))
3028         return XZ_OK;
3029 
3030       return dec_stream_footer(s);
3031     }
3032   }
3033 
3034   /* Never reached */
3035 }
3036 
3037 /*
3038  * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
3039  * multi-call and single-call decoding.
3040  *
3041  * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
3042  * are not going to make any progress anymore. This is to prevent the caller
3043  * from calling us infinitely when the input file is truncated or otherwise
3044  * corrupt. Since zlib-style API allows that the caller fills the input buffer
3045  * only when the decoder doesn't produce any new output, we have to be careful
3046  * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
3047  * after the second consecutive call to xz_dec_run() that makes no progress.
3048  *
3049  * In single-call mode, if we couldn't decode everything and no error
3050  * occurred, either the input is truncated or the output buffer is too small.
3051  * Since we know that the last input byte never produces any output, we know
3052  * that if all the input was consumed and decoding wasn't finished, the file
3053  * must be corrupt. Otherwise the output buffer has to be too small or the
3054  * file is corrupt in a way that decoding it produces too big output.
3055  *
3056  * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
3057  * their original values. This is because with some filter chains there won't
3058  * be any valid uncompressed data in the output buffer unless the decoding
3059  * actually succeeds (that's the price to pay of using the output buffer as
3060  * the workspace).
3061  */
3062 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
3063 {
3064   size_t in_start;
3065   size_t out_start;
3066   enum xz_ret ret;
3067 
3068   in_start = b->in_pos;
3069   out_start = b->out_pos;
3070   ret = dec_main(s, b);
3071 
3072   if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
3073     if (s->allow_buf_error)
3074       ret = XZ_BUF_ERROR;
3075 
3076     s->allow_buf_error = 1;
3077   } else {
3078     s->allow_buf_error = 0;
3079   }
3080 
3081   return ret;
3082 }
3083 
3084 struct xz_dec *xz_dec_init(uint32_t dict_max)
3085 {
3086   struct xz_dec *s = malloc(sizeof(*s));
3087   if (!s)
3088     return NULL;
3089 
3090 #ifdef XZ_DEC_BCJ
3091   s->bcj = malloc(sizeof(*s->bcj));
3092   if (!s->bcj)
3093     goto error_bcj;
3094 #endif
3095 
3096   s->lzma2 = xz_dec_lzma2_create(dict_max);
3097   if (s->lzma2 == NULL)
3098     goto error_lzma2;
3099 
3100   xz_dec_reset(s);
3101   return s;
3102 
3103 error_lzma2:
3104 #ifdef XZ_DEC_BCJ
3105   free(s->bcj);
3106 error_bcj:
3107 #endif
3108   free(s);
3109   return NULL;
3110 }
3111 
3112 void xz_dec_reset(struct xz_dec *s)
3113 {
3114   s->sequence = SEQ_STREAM_HEADER;
3115   s->allow_buf_error = 0;
3116   s->pos = 0;
3117   s->crc = 0;
3118   memset(&s->block, 0, sizeof(s->block));
3119   memset(&s->index, 0, sizeof(s->index));
3120   s->temp.pos = 0;
3121   s->temp.size = STREAM_HEADER_SIZE;
3122 }
3123 
3124 void xz_dec_end(struct xz_dec *s)
3125 {
3126   if (s != NULL) {
3127     free((s->lzma2)->dict.buf);
3128     free(s->lzma2);
3129 
3130 #ifdef XZ_DEC_BCJ
3131     free(s->bcj);
3132 #endif
3133     free(s);
3134   }
3135 }
3136