1 /* LzmaSpec.c -- LZMA Reference Decoder
2 2013-07-28 : Igor Pavlov : Public domain */
3
4 // This code implements LZMA file decoding according to LZMA specification.
5 // This code is not optimized for speed.
6
7 #include <stdio.h>
8
9 #ifdef _MSC_VER
10 #pragma warning(disable : 4710) // function not inlined
11 #pragma warning(disable : 4996) // This function or variable may be unsafe
12 #endif
13
14 typedef unsigned char Byte;
15 typedef unsigned short UInt16;
16
17 #ifdef _LZMA_UINT32_IS_ULONG
18 typedef unsigned long UInt32;
19 #else
20 typedef unsigned int UInt32;
21 #endif
22
23 #if defined(_MSC_VER) || defined(__BORLANDC__)
24 typedef unsigned __int64 UInt64;
25 #else
26 typedef unsigned long long int UInt64;
27 #endif
28
29
30 struct CInputStream
31 {
32 FILE *File;
33 UInt64 Processed;
34
InitCInputStream35 void Init() { Processed = 0; }
36
ReadByteCInputStream37 Byte ReadByte()
38 {
39 int c = getc(File);
40 if (c < 0)
41 throw "Unexpected end of file";
42 Processed++;
43 return (Byte)c;
44 }
45 };
46
47
48 struct COutStream
49 {
50 FILE *File;
51 UInt64 Processed;
52
InitCOutStream53 void Init() { Processed = 0; }
54
WriteByteCOutStream55 void WriteByte(Byte b)
56 {
57 if (putc(b, File) == EOF)
58 throw "File writing error";
59 Processed++;
60 }
61 };
62
63
64 class COutWindow
65 {
66 Byte *Buf;
67 UInt32 Pos;
68 UInt32 Size;
69 bool IsFull;
70
71 public:
72 unsigned TotalPos;
73 COutStream OutStream;
74
COutWindow()75 COutWindow(): Buf(NULL) {}
~COutWindow()76 ~COutWindow() { delete []Buf; }
77
Create(UInt32 dictSize)78 void Create(UInt32 dictSize)
79 {
80 Buf = new Byte[dictSize];
81 Pos = 0;
82 Size = dictSize;
83 IsFull = false;
84 TotalPos = 0;
85 }
86
PutByte(Byte b)87 void PutByte(Byte b)
88 {
89 TotalPos++;
90 Buf[Pos++] = b;
91 if (Pos == Size)
92 {
93 Pos = 0;
94 IsFull = true;
95 }
96 OutStream.WriteByte(b);
97 }
98
GetByte(UInt32 dist) const99 Byte GetByte(UInt32 dist) const
100 {
101 return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102 }
103
CopyMatch(UInt32 dist,unsigned len)104 void CopyMatch(UInt32 dist, unsigned len)
105 {
106 for (; len > 0; len--)
107 PutByte(GetByte(dist));
108 }
109
CheckDistance(UInt32 dist) const110 bool CheckDistance(UInt32 dist) const
111 {
112 return dist <= Pos || IsFull;
113 }
114
IsEmpty() const115 bool IsEmpty() const
116 {
117 return Pos == 0 && !IsFull;
118 }
119 };
120
121
122 #define kNumBitModelTotalBits 11
123 #define kNumMoveBits 5
124
125 typedef UInt16 CProb;
126
127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128
129 #define INIT_PROBS(p) \
130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131
132 class CRangeDecoder
133 {
134 UInt32 Range;
135 UInt32 Code;
136
137 void Normalize();
138
139 public:
140
141 CInputStream *InStream;
142 bool Corrupted;
143
144 void Init();
IsFinishedOK() const145 bool IsFinishedOK() const { return Code == 0; }
146
147 UInt32 DecodeDirectBits(unsigned numBits);
148 unsigned DecodeBit(CProb *prob);
149 };
150
Init()151 void CRangeDecoder::Init()
152 {
153 Corrupted = false;
154
155 if (InStream->ReadByte() != 0)
156 Corrupted = true;
157
158 Range = 0xFFFFFFFF;
159 Code = 0;
160 for (int i = 0; i < 4; i++)
161 Code = (Code << 8) | InStream->ReadByte();
162
163 if (Code == Range)
164 Corrupted = true;
165 }
166
167 #define kTopValue ((UInt32)1 << 24)
168
Normalize()169 void CRangeDecoder::Normalize()
170 {
171 if (Range < kTopValue)
172 {
173 Range <<= 8;
174 Code = (Code << 8) | InStream->ReadByte();
175 }
176 }
177
DecodeDirectBits(unsigned numBits)178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179 {
180 UInt32 res = 0;
181 do
182 {
183 Range >>= 1;
184 Code -= Range;
185 UInt32 t = 0 - ((UInt32)Code >> 31);
186 Code += Range & t;
187
188 if (Code == Range)
189 Corrupted = true;
190
191 Normalize();
192 res <<= 1;
193 res += t + 1;
194 }
195 while (--numBits);
196 return res;
197 }
198
DecodeBit(CProb * prob)199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
200 {
201 unsigned v = *prob;
202 UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203 unsigned symbol;
204 if (Code < bound)
205 {
206 v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207 Range = bound;
208 symbol = 0;
209 }
210 else
211 {
212 v -= v >> kNumMoveBits;
213 Code -= bound;
214 Range -= bound;
215 symbol = 1;
216 }
217 *prob = (CProb)v;
218 Normalize();
219 return symbol;
220 }
221
222
BitTreeReverseDecode(CProb * probs,unsigned numBits,CRangeDecoder * rc)223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224 {
225 unsigned m = 1;
226 unsigned symbol = 0;
227 for (unsigned i = 0; i < numBits; i++)
228 {
229 unsigned bit = rc->DecodeBit(&probs[m]);
230 m <<= 1;
231 m += bit;
232 symbol |= (bit << i);
233 }
234 return symbol;
235 }
236
237 template <unsigned NumBits>
238 class CBitTreeDecoder
239 {
240 CProb Probs[(unsigned)1 << NumBits];
241
242 public:
243
Init()244 void Init()
245 {
246 INIT_PROBS(Probs);
247 }
248
Decode(CRangeDecoder * rc)249 unsigned Decode(CRangeDecoder *rc)
250 {
251 unsigned m = 1;
252 for (unsigned i = 0; i < NumBits; i++)
253 m = (m << 1) + rc->DecodeBit(&Probs[m]);
254 return m - ((unsigned)1 << NumBits);
255 }
256
ReverseDecode(CRangeDecoder * rc)257 unsigned ReverseDecode(CRangeDecoder *rc)
258 {
259 return BitTreeReverseDecode(Probs, NumBits, rc);
260 }
261 };
262
263 #define kNumPosBitsMax 4
264
265 #define kNumStates 12
266 #define kNumLenToPosStates 4
267 #define kNumAlignBits 4
268 #define kStartPosModelIndex 4
269 #define kEndPosModelIndex 14
270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271 #define kMatchMinLen 2
272
273 class CLenDecoder
274 {
275 CProb Choice;
276 CProb Choice2;
277 CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278 CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279 CBitTreeDecoder<8> HighCoder;
280
281 public:
282
Init()283 void Init()
284 {
285 Choice = PROB_INIT_VAL;
286 Choice2 = PROB_INIT_VAL;
287 HighCoder.Init();
288 for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289 {
290 LowCoder[i].Init();
291 MidCoder[i].Init();
292 }
293 }
294
Decode(CRangeDecoder * rc,unsigned posState)295 unsigned Decode(CRangeDecoder *rc, unsigned posState)
296 {
297 if (rc->DecodeBit(&Choice) == 0)
298 return LowCoder[posState].Decode(rc);
299 if (rc->DecodeBit(&Choice2) == 0)
300 return 8 + MidCoder[posState].Decode(rc);
301 return 16 + HighCoder.Decode(rc);
302 }
303 };
304
UpdateState_Literal(unsigned state)305 unsigned UpdateState_Literal(unsigned state)
306 {
307 if (state < 4) return 0;
308 else if (state < 10) return state - 3;
309 else return state - 6;
310 }
UpdateState_Match(unsigned state)311 unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; }
UpdateState_Rep(unsigned state)312 unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; }
UpdateState_ShortRep(unsigned state)313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314
315 #define LZMA_DIC_MIN (1 << 12)
316
317 class CLzmaDecoder
318 {
319 public:
320 CRangeDecoder RangeDec;
321 COutWindow OutWindow;
322
323 bool markerIsMandatory;
324 unsigned lc, pb, lp;
325 UInt32 dictSize;
326 UInt32 dictSizeInProperties;
327
DecodeProperties(const Byte * properties)328 void DecodeProperties(const Byte *properties)
329 {
330 unsigned d = properties[0];
331 if (d >= (9 * 5 * 5))
332 throw "Incorrect LZMA properties";
333 lc = d % 9;
334 d /= 9;
335 pb = d / 5;
336 lp = d % 5;
337 dictSizeInProperties = 0;
338 for (int i = 0; i < 4; i++)
339 dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340 dictSize = dictSizeInProperties;
341 if (dictSize < LZMA_DIC_MIN)
342 dictSize = LZMA_DIC_MIN;
343 }
344
CLzmaDecoder()345 CLzmaDecoder(): LitProbs(NULL) {}
~CLzmaDecoder()346 ~CLzmaDecoder() { delete []LitProbs; }
347
Create()348 void Create()
349 {
350 OutWindow.Create(dictSize);
351 CreateLiterals();
352 }
353
354 int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355
356 private:
357
358 CProb *LitProbs;
359
CreateLiterals()360 void CreateLiterals()
361 {
362 LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363 }
364
InitLiterals()365 void InitLiterals()
366 {
367 UInt32 num = (UInt32)0x300 << (lc + lp);
368 for (UInt32 i = 0; i < num; i++)
369 LitProbs[i] = PROB_INIT_VAL;
370 }
371
DecodeLiteral(unsigned state,UInt32 rep0)372 void DecodeLiteral(unsigned state, UInt32 rep0)
373 {
374 unsigned prevByte = 0;
375 if (!OutWindow.IsEmpty())
376 prevByte = OutWindow.GetByte(1);
377
378 unsigned symbol = 1;
379 unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380 CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381
382 if (state >= 7)
383 {
384 unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385 do
386 {
387 unsigned matchBit = (matchByte >> 7) & 1;
388 matchByte <<= 1;
389 unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390 symbol = (symbol << 1) | bit;
391 if (matchBit != bit)
392 break;
393 }
394 while (symbol < 0x100);
395 }
396 while (symbol < 0x100)
397 symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398 OutWindow.PutByte((Byte)(symbol - 0x100));
399 }
400
401 CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402 CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403 CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404
InitDist()405 void InitDist()
406 {
407 for (unsigned i = 0; i < kNumLenToPosStates; i++)
408 PosSlotDecoder[i].Init();
409 AlignDecoder.Init();
410 INIT_PROBS(PosDecoders);
411 }
412
DecodeDistance(unsigned len)413 unsigned DecodeDistance(unsigned len)
414 {
415 unsigned lenState = len;
416 if (lenState > kNumLenToPosStates - 1)
417 lenState = kNumLenToPosStates - 1;
418
419 unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420 if (posSlot < 4)
421 return posSlot;
422
423 unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424 UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425 if (posSlot < kEndPosModelIndex)
426 dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427 else
428 {
429 dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430 dist += AlignDecoder.ReverseDecode(&RangeDec);
431 }
432 return dist;
433 }
434
435 CProb IsMatch[kNumStates << kNumPosBitsMax];
436 CProb IsRep[kNumStates];
437 CProb IsRepG0[kNumStates];
438 CProb IsRepG1[kNumStates];
439 CProb IsRepG2[kNumStates];
440 CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441
442 CLenDecoder LenDecoder;
443 CLenDecoder RepLenDecoder;
444
Init()445 void Init()
446 {
447 InitLiterals();
448 InitDist();
449
450 INIT_PROBS(IsMatch);
451 INIT_PROBS(IsRep);
452 INIT_PROBS(IsRepG0);
453 INIT_PROBS(IsRepG1);
454 INIT_PROBS(IsRepG2);
455 INIT_PROBS(IsRep0Long);
456
457 LenDecoder.Init();
458 RepLenDecoder.Init();
459 }
460 };
461
462
463 #define LZMA_RES_ERROR 0
464 #define LZMA_RES_FINISHED_WITH_MARKER 1
465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466
Decode(bool unpackSizeDefined,UInt64 unpackSize)467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468 {
469 Init();
470 RangeDec.Init();
471
472 UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
473 unsigned state = 0;
474
475 for (;;)
476 {
477 if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
478 if (RangeDec.IsFinishedOK())
479 return LZMA_RES_FINISHED_WITHOUT_MARKER;
480
481 unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
482
483 if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
484 {
485 if (unpackSizeDefined && unpackSize == 0)
486 return LZMA_RES_ERROR;
487 DecodeLiteral(state, rep0);
488 state = UpdateState_Literal(state);
489 unpackSize--;
490 continue;
491 }
492
493 unsigned len;
494
495 if (RangeDec.DecodeBit(&IsRep[state]) != 0)
496 {
497 if (unpackSizeDefined && unpackSize == 0)
498 return LZMA_RES_ERROR;
499 if (OutWindow.IsEmpty())
500 return LZMA_RES_ERROR;
501 if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
502 {
503 if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
504 {
505 state = UpdateState_ShortRep(state);
506 OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
507 unpackSize--;
508 continue;
509 }
510 }
511 else
512 {
513 UInt32 dist;
514 if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
515 dist = rep1;
516 else
517 {
518 if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
519 dist = rep2;
520 else
521 {
522 dist = rep3;
523 rep3 = rep2;
524 }
525 rep2 = rep1;
526 }
527 rep1 = rep0;
528 rep0 = dist;
529 }
530 len = RepLenDecoder.Decode(&RangeDec, posState);
531 state = UpdateState_Rep(state);
532 }
533 else
534 {
535 rep3 = rep2;
536 rep2 = rep1;
537 rep1 = rep0;
538 len = LenDecoder.Decode(&RangeDec, posState);
539 state = UpdateState_Match(state);
540 rep0 = DecodeDistance(len);
541 if (rep0 == 0xFFFFFFFF)
542 return RangeDec.IsFinishedOK() ?
543 LZMA_RES_FINISHED_WITH_MARKER :
544 LZMA_RES_ERROR;
545
546 if (unpackSizeDefined && unpackSize == 0)
547 return LZMA_RES_ERROR;
548 if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
549 return LZMA_RES_ERROR;
550 }
551 len += kMatchMinLen;
552 bool isError = false;
553 if (unpackSizeDefined && unpackSize < len)
554 {
555 len = (unsigned)unpackSize;
556 isError = true;
557 }
558 OutWindow.CopyMatch(rep0 + 1, len);
559 unpackSize -= len;
560 if (isError)
561 return LZMA_RES_ERROR;
562 }
563 }
564
Print(const char * s)565 static void Print(const char *s)
566 {
567 fputs(s, stdout);
568 }
569
PrintError(const char * s)570 static void PrintError(const char *s)
571 {
572 fputs(s, stderr);
573 }
574
575
576 #define CONVERT_INT_TO_STR(charType, tempSize) \
577
ConvertUInt64ToString(UInt64 val,char * s)578 void ConvertUInt64ToString(UInt64 val, char *s)
579 {
580 char temp[32];
581 unsigned i = 0;
582 while (val >= 10)
583 {
584 temp[i++] = (char)('0' + (unsigned)(val % 10));
585 val /= 10;
586 }
587 *s++ = (char)('0' + (unsigned)val);
588 while (i != 0)
589 {
590 i--;
591 *s++ = temp[i];
592 }
593 *s = 0;
594 }
595
PrintUInt64(const char * title,UInt64 v)596 void PrintUInt64(const char *title, UInt64 v)
597 {
598 Print(title);
599 Print(" : ");
600 char s[32];
601 ConvertUInt64ToString(v, s);
602 Print(s);
603 Print(" bytes \n");
604 }
605
main2(int numArgs,const char * args[])606 int main2(int numArgs, const char *args[])
607 {
608 Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n");
609 if (numArgs == 1)
610 Print("\nUse: lzmaSpec a.lzma outFile");
611
612 if (numArgs != 3)
613 throw "you must specify two parameters";
614
615 CInputStream inStream;
616 inStream.File = fopen(args[1], "rb");
617 inStream.Init();
618 if (inStream.File == 0)
619 throw "Can't open input file";
620
621 CLzmaDecoder lzmaDecoder;
622 lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
623 lzmaDecoder.OutWindow.OutStream.Init();
624 if (inStream.File == 0)
625 throw "Can't open output file";
626
627 Byte header[13];
628 int i;
629 for (i = 0; i < 13; i++)
630 header[i] = inStream.ReadByte();
631
632 lzmaDecoder.DecodeProperties(header);
633
634 printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
635 printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
636 printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize);
637
638 UInt64 unpackSize = 0;
639 bool unpackSizeDefined = false;
640 for (i = 0; i < 8; i++)
641 {
642 Byte b = header[5 + i];
643 if (b != 0xFF)
644 unpackSizeDefined = true;
645 unpackSize |= (UInt64)b << (8 * i);
646 }
647
648 lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
649
650 Print("\n");
651 if (unpackSizeDefined)
652 PrintUInt64("Uncompressed Size", unpackSize);
653 else
654 Print("End marker is expected\n");
655 lzmaDecoder.RangeDec.InStream = &inStream;
656
657 Print("\n");
658
659 lzmaDecoder.Create();
660 // we support the streams that have uncompressed size and marker.
661 int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
662
663 PrintUInt64("Read ", inStream.Processed);
664 PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
665
666 if (res == LZMA_RES_ERROR)
667 throw "LZMA decoding error";
668 else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
669 Print("Finished without end marker");
670 else if (res == LZMA_RES_FINISHED_WITH_MARKER)
671 {
672 if (unpackSizeDefined)
673 {
674 if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
675 throw "Finished with end marker before than specified size";
676 Print("Warning: ");
677 }
678 Print("Finished with end marker");
679 }
680 else
681 throw "Internal Error";
682
683 Print("\n");
684
685 if (lzmaDecoder.RangeDec.Corrupted)
686 {
687 Print("\nWarning: LZMA stream is corrupted\n");
688 }
689
690 return 0;
691 }
692
693
694 int
695 #ifdef _MSC_VER
696 __cdecl
697 #endif
main(int numArgs,const char * args[])698 main(int numArgs, const char *args[])
699 {
700 try { return main2(numArgs, args); }
701 catch (const char *s)
702 {
703 PrintError("\nError:\n");
704 PrintError(s);
705 PrintError("\n");
706 return 1;
707 }
708 catch(...)
709 {
710 PrintError("\nError\n");
711 return 1;
712 }
713 }
714