1 /* LzmaSpec.cpp -- LZMA Reference Decoder
2 2015-06-14 : Igor Pavlov : Public domain */
3
4 // This code implements LZMA file decoding according to LZMA specification.
5 // This code is not optimized for speed.
6
7 #include <stdio.h>
8
9 #ifdef _MSC_VER
10 #pragma warning(disable : 4710) // function not inlined
11 #pragma warning(disable : 4996) // This function or variable may be unsafe
12 #endif
13
14 typedef unsigned char Byte;
15 typedef unsigned short UInt16;
16
17 #ifdef _LZMA_UINT32_IS_ULONG
18 typedef unsigned long UInt32;
19 #else
20 typedef unsigned int UInt32;
21 #endif
22
23 #if defined(_MSC_VER) || defined(__BORLANDC__)
24 typedef unsigned __int64 UInt64;
25 #else
26 typedef unsigned long long int UInt64;
27 #endif
28
29
30 struct CInputStream
31 {
32 FILE *File;
33 UInt64 Processed;
34
InitCInputStream35 void Init() { Processed = 0; }
36
ReadByteCInputStream37 Byte ReadByte()
38 {
39 int c = getc(File);
40 if (c < 0)
41 throw "Unexpected end of file";
42 Processed++;
43 return (Byte)c;
44 }
45 };
46
47
48 struct COutStream
49 {
50 FILE *File;
51 UInt64 Processed;
52
InitCOutStream53 void Init() { Processed = 0; }
54
WriteByteCOutStream55 void WriteByte(Byte b)
56 {
57 if (putc(b, File) == EOF)
58 throw "File writing error";
59 Processed++;
60 }
61 };
62
63
64 class COutWindow
65 {
66 Byte *Buf;
67 UInt32 Pos;
68 UInt32 Size;
69 bool IsFull;
70
71 public:
72 unsigned TotalPos;
73 COutStream OutStream;
74
COutWindow()75 COutWindow(): Buf(NULL) {}
~COutWindow()76 ~COutWindow() { delete []Buf; }
77
Create(UInt32 dictSize)78 void Create(UInt32 dictSize)
79 {
80 Buf = new Byte[dictSize];
81 Pos = 0;
82 Size = dictSize;
83 IsFull = false;
84 TotalPos = 0;
85 }
86
PutByte(Byte b)87 void PutByte(Byte b)
88 {
89 TotalPos++;
90 Buf[Pos++] = b;
91 if (Pos == Size)
92 {
93 Pos = 0;
94 IsFull = true;
95 }
96 OutStream.WriteByte(b);
97 }
98
GetByte(UInt32 dist) const99 Byte GetByte(UInt32 dist) const
100 {
101 return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102 }
103
CopyMatch(UInt32 dist,unsigned len)104 void CopyMatch(UInt32 dist, unsigned len)
105 {
106 for (; len > 0; len--)
107 PutByte(GetByte(dist));
108 }
109
CheckDistance(UInt32 dist) const110 bool CheckDistance(UInt32 dist) const
111 {
112 return dist <= Pos || IsFull;
113 }
114
IsEmpty() const115 bool IsEmpty() const
116 {
117 return Pos == 0 && !IsFull;
118 }
119 };
120
121
122 #define kNumBitModelTotalBits 11
123 #define kNumMoveBits 5
124
125 typedef UInt16 CProb;
126
127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128
129 #define INIT_PROBS(p) \
130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131
132 class CRangeDecoder
133 {
134 UInt32 Range;
135 UInt32 Code;
136
137 void Normalize();
138
139 public:
140
141 CInputStream *InStream;
142 bool Corrupted;
143
144 bool Init();
IsFinishedOK() const145 bool IsFinishedOK() const { return Code == 0; }
146
147 UInt32 DecodeDirectBits(unsigned numBits);
148 unsigned DecodeBit(CProb *prob);
149 };
150
Init()151 bool CRangeDecoder::Init()
152 {
153 Corrupted = false;
154 Range = 0xFFFFFFFF;
155 Code = 0;
156
157 Byte b = InStream->ReadByte();
158
159 for (int i = 0; i < 4; i++)
160 Code = (Code << 8) | InStream->ReadByte();
161
162 if (b != 0 || Code == Range)
163 Corrupted = true;
164 return b == 0;
165 }
166
167 #define kTopValue ((UInt32)1 << 24)
168
Normalize()169 void CRangeDecoder::Normalize()
170 {
171 if (Range < kTopValue)
172 {
173 Range <<= 8;
174 Code = (Code << 8) | InStream->ReadByte();
175 }
176 }
177
DecodeDirectBits(unsigned numBits)178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179 {
180 UInt32 res = 0;
181 do
182 {
183 Range >>= 1;
184 Code -= Range;
185 UInt32 t = 0 - ((UInt32)Code >> 31);
186 Code += Range & t;
187
188 if (Code == Range)
189 Corrupted = true;
190
191 Normalize();
192 res <<= 1;
193 res += t + 1;
194 }
195 while (--numBits);
196 return res;
197 }
198
DecodeBit(CProb * prob)199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
200 {
201 unsigned v = *prob;
202 UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203 unsigned symbol;
204 if (Code < bound)
205 {
206 v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207 Range = bound;
208 symbol = 0;
209 }
210 else
211 {
212 v -= v >> kNumMoveBits;
213 Code -= bound;
214 Range -= bound;
215 symbol = 1;
216 }
217 *prob = (CProb)v;
218 Normalize();
219 return symbol;
220 }
221
222
BitTreeReverseDecode(CProb * probs,unsigned numBits,CRangeDecoder * rc)223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224 {
225 unsigned m = 1;
226 unsigned symbol = 0;
227 for (unsigned i = 0; i < numBits; i++)
228 {
229 unsigned bit = rc->DecodeBit(&probs[m]);
230 m <<= 1;
231 m += bit;
232 symbol |= (bit << i);
233 }
234 return symbol;
235 }
236
237 template <unsigned NumBits>
238 class CBitTreeDecoder
239 {
240 CProb Probs[(unsigned)1 << NumBits];
241
242 public:
243
Init()244 void Init()
245 {
246 INIT_PROBS(Probs);
247 }
248
Decode(CRangeDecoder * rc)249 unsigned Decode(CRangeDecoder *rc)
250 {
251 unsigned m = 1;
252 for (unsigned i = 0; i < NumBits; i++)
253 m = (m << 1) + rc->DecodeBit(&Probs[m]);
254 return m - ((unsigned)1 << NumBits);
255 }
256
ReverseDecode(CRangeDecoder * rc)257 unsigned ReverseDecode(CRangeDecoder *rc)
258 {
259 return BitTreeReverseDecode(Probs, NumBits, rc);
260 }
261 };
262
263 #define kNumPosBitsMax 4
264
265 #define kNumStates 12
266 #define kNumLenToPosStates 4
267 #define kNumAlignBits 4
268 #define kStartPosModelIndex 4
269 #define kEndPosModelIndex 14
270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271 #define kMatchMinLen 2
272
273 class CLenDecoder
274 {
275 CProb Choice;
276 CProb Choice2;
277 CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278 CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279 CBitTreeDecoder<8> HighCoder;
280
281 public:
282
Init()283 void Init()
284 {
285 Choice = PROB_INIT_VAL;
286 Choice2 = PROB_INIT_VAL;
287 HighCoder.Init();
288 for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289 {
290 LowCoder[i].Init();
291 MidCoder[i].Init();
292 }
293 }
294
Decode(CRangeDecoder * rc,unsigned posState)295 unsigned Decode(CRangeDecoder *rc, unsigned posState)
296 {
297 if (rc->DecodeBit(&Choice) == 0)
298 return LowCoder[posState].Decode(rc);
299 if (rc->DecodeBit(&Choice2) == 0)
300 return 8 + MidCoder[posState].Decode(rc);
301 return 16 + HighCoder.Decode(rc);
302 }
303 };
304
UpdateState_Literal(unsigned state)305 unsigned UpdateState_Literal(unsigned state)
306 {
307 if (state < 4) return 0;
308 else if (state < 10) return state - 3;
309 else return state - 6;
310 }
UpdateState_Match(unsigned state)311 unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; }
UpdateState_Rep(unsigned state)312 unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; }
UpdateState_ShortRep(unsigned state)313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314
315 #define LZMA_DIC_MIN (1 << 12)
316
317 class CLzmaDecoder
318 {
319 public:
320 CRangeDecoder RangeDec;
321 COutWindow OutWindow;
322
323 bool markerIsMandatory;
324 unsigned lc, pb, lp;
325 UInt32 dictSize;
326 UInt32 dictSizeInProperties;
327
DecodeProperties(const Byte * properties)328 void DecodeProperties(const Byte *properties)
329 {
330 unsigned d = properties[0];
331 if (d >= (9 * 5 * 5))
332 throw "Incorrect LZMA properties";
333 lc = d % 9;
334 d /= 9;
335 pb = d / 5;
336 lp = d % 5;
337 dictSizeInProperties = 0;
338 for (int i = 0; i < 4; i++)
339 dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340 dictSize = dictSizeInProperties;
341 if (dictSize < LZMA_DIC_MIN)
342 dictSize = LZMA_DIC_MIN;
343 }
344
CLzmaDecoder()345 CLzmaDecoder(): LitProbs(NULL) {}
~CLzmaDecoder()346 ~CLzmaDecoder() { delete []LitProbs; }
347
Create()348 void Create()
349 {
350 OutWindow.Create(dictSize);
351 CreateLiterals();
352 }
353
354 int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355
356 private:
357
358 CProb *LitProbs;
359
CreateLiterals()360 void CreateLiterals()
361 {
362 LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363 }
364
InitLiterals()365 void InitLiterals()
366 {
367 UInt32 num = (UInt32)0x300 << (lc + lp);
368 for (UInt32 i = 0; i < num; i++)
369 LitProbs[i] = PROB_INIT_VAL;
370 }
371
DecodeLiteral(unsigned state,UInt32 rep0)372 void DecodeLiteral(unsigned state, UInt32 rep0)
373 {
374 unsigned prevByte = 0;
375 if (!OutWindow.IsEmpty())
376 prevByte = OutWindow.GetByte(1);
377
378 unsigned symbol = 1;
379 unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380 CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381
382 if (state >= 7)
383 {
384 unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385 do
386 {
387 unsigned matchBit = (matchByte >> 7) & 1;
388 matchByte <<= 1;
389 unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390 symbol = (symbol << 1) | bit;
391 if (matchBit != bit)
392 break;
393 }
394 while (symbol < 0x100);
395 }
396 while (symbol < 0x100)
397 symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398 OutWindow.PutByte((Byte)(symbol - 0x100));
399 }
400
401 CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402 CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403 CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404
InitDist()405 void InitDist()
406 {
407 for (unsigned i = 0; i < kNumLenToPosStates; i++)
408 PosSlotDecoder[i].Init();
409 AlignDecoder.Init();
410 INIT_PROBS(PosDecoders);
411 }
412
DecodeDistance(unsigned len)413 unsigned DecodeDistance(unsigned len)
414 {
415 unsigned lenState = len;
416 if (lenState > kNumLenToPosStates - 1)
417 lenState = kNumLenToPosStates - 1;
418
419 unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420 if (posSlot < 4)
421 return posSlot;
422
423 unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424 UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425 if (posSlot < kEndPosModelIndex)
426 dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427 else
428 {
429 dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430 dist += AlignDecoder.ReverseDecode(&RangeDec);
431 }
432 return dist;
433 }
434
435 CProb IsMatch[kNumStates << kNumPosBitsMax];
436 CProb IsRep[kNumStates];
437 CProb IsRepG0[kNumStates];
438 CProb IsRepG1[kNumStates];
439 CProb IsRepG2[kNumStates];
440 CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441
442 CLenDecoder LenDecoder;
443 CLenDecoder RepLenDecoder;
444
Init()445 void Init()
446 {
447 InitLiterals();
448 InitDist();
449
450 INIT_PROBS(IsMatch);
451 INIT_PROBS(IsRep);
452 INIT_PROBS(IsRepG0);
453 INIT_PROBS(IsRepG1);
454 INIT_PROBS(IsRepG2);
455 INIT_PROBS(IsRep0Long);
456
457 LenDecoder.Init();
458 RepLenDecoder.Init();
459 }
460 };
461
462
463 #define LZMA_RES_ERROR 0
464 #define LZMA_RES_FINISHED_WITH_MARKER 1
465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466
Decode(bool unpackSizeDefined,UInt64 unpackSize)467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468 {
469 if (!RangeDec.Init())
470 return LZMA_RES_ERROR;
471
472 Init();
473
474 UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
475 unsigned state = 0;
476
477 for (;;)
478 {
479 if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
480 if (RangeDec.IsFinishedOK())
481 return LZMA_RES_FINISHED_WITHOUT_MARKER;
482
483 unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
484
485 if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
486 {
487 if (unpackSizeDefined && unpackSize == 0)
488 return LZMA_RES_ERROR;
489 DecodeLiteral(state, rep0);
490 state = UpdateState_Literal(state);
491 unpackSize--;
492 continue;
493 }
494
495 unsigned len;
496
497 if (RangeDec.DecodeBit(&IsRep[state]) != 0)
498 {
499 if (unpackSizeDefined && unpackSize == 0)
500 return LZMA_RES_ERROR;
501 if (OutWindow.IsEmpty())
502 return LZMA_RES_ERROR;
503 if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
504 {
505 if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
506 {
507 state = UpdateState_ShortRep(state);
508 OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
509 unpackSize--;
510 continue;
511 }
512 }
513 else
514 {
515 UInt32 dist;
516 if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
517 dist = rep1;
518 else
519 {
520 if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
521 dist = rep2;
522 else
523 {
524 dist = rep3;
525 rep3 = rep2;
526 }
527 rep2 = rep1;
528 }
529 rep1 = rep0;
530 rep0 = dist;
531 }
532 len = RepLenDecoder.Decode(&RangeDec, posState);
533 state = UpdateState_Rep(state);
534 }
535 else
536 {
537 rep3 = rep2;
538 rep2 = rep1;
539 rep1 = rep0;
540 len = LenDecoder.Decode(&RangeDec, posState);
541 state = UpdateState_Match(state);
542 rep0 = DecodeDistance(len);
543 if (rep0 == 0xFFFFFFFF)
544 return RangeDec.IsFinishedOK() ?
545 LZMA_RES_FINISHED_WITH_MARKER :
546 LZMA_RES_ERROR;
547
548 if (unpackSizeDefined && unpackSize == 0)
549 return LZMA_RES_ERROR;
550 if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
551 return LZMA_RES_ERROR;
552 }
553 len += kMatchMinLen;
554 bool isError = false;
555 if (unpackSizeDefined && unpackSize < len)
556 {
557 len = (unsigned)unpackSize;
558 isError = true;
559 }
560 OutWindow.CopyMatch(rep0 + 1, len);
561 unpackSize -= len;
562 if (isError)
563 return LZMA_RES_ERROR;
564 }
565 }
566
Print(const char * s)567 static void Print(const char *s)
568 {
569 fputs(s, stdout);
570 }
571
PrintError(const char * s)572 static void PrintError(const char *s)
573 {
574 fputs(s, stderr);
575 }
576
577
578 #define CONVERT_INT_TO_STR(charType, tempSize) \
579
ConvertUInt64ToString(UInt64 val,char * s)580 void ConvertUInt64ToString(UInt64 val, char *s)
581 {
582 char temp[32];
583 unsigned i = 0;
584 while (val >= 10)
585 {
586 temp[i++] = (char)('0' + (unsigned)(val % 10));
587 val /= 10;
588 }
589 *s++ = (char)('0' + (unsigned)val);
590 while (i != 0)
591 {
592 i--;
593 *s++ = temp[i];
594 }
595 *s = 0;
596 }
597
PrintUInt64(const char * title,UInt64 v)598 void PrintUInt64(const char *title, UInt64 v)
599 {
600 Print(title);
601 Print(" : ");
602 char s[32];
603 ConvertUInt64ToString(v, s);
604 Print(s);
605 Print(" bytes \n");
606 }
607
main2(int numArgs,const char * args[])608 int main2(int numArgs, const char *args[])
609 {
610 Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n");
611 if (numArgs == 1)
612 Print("\nUse: lzmaSpec a.lzma outFile");
613
614 if (numArgs != 3)
615 throw "you must specify two parameters";
616
617 CInputStream inStream;
618 inStream.File = fopen(args[1], "rb");
619 inStream.Init();
620 if (inStream.File == 0)
621 throw "Can't open input file";
622
623 CLzmaDecoder lzmaDecoder;
624 lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
625 lzmaDecoder.OutWindow.OutStream.Init();
626 if (inStream.File == 0)
627 throw "Can't open output file";
628
629 Byte header[13];
630 int i;
631 for (i = 0; i < 13; i++)
632 header[i] = inStream.ReadByte();
633
634 lzmaDecoder.DecodeProperties(header);
635
636 printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
637 printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
638 printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize);
639
640 UInt64 unpackSize = 0;
641 bool unpackSizeDefined = false;
642 for (i = 0; i < 8; i++)
643 {
644 Byte b = header[5 + i];
645 if (b != 0xFF)
646 unpackSizeDefined = true;
647 unpackSize |= (UInt64)b << (8 * i);
648 }
649
650 lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
651
652 Print("\n");
653 if (unpackSizeDefined)
654 PrintUInt64("Uncompressed Size", unpackSize);
655 else
656 Print("End marker is expected\n");
657 lzmaDecoder.RangeDec.InStream = &inStream;
658
659 Print("\n");
660
661 lzmaDecoder.Create();
662
663 int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
664
665 PrintUInt64("Read ", inStream.Processed);
666 PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
667
668 if (res == LZMA_RES_ERROR)
669 throw "LZMA decoding error";
670 else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
671 Print("Finished without end marker");
672 else if (res == LZMA_RES_FINISHED_WITH_MARKER)
673 {
674 if (unpackSizeDefined)
675 {
676 if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
677 throw "Finished with end marker before than specified size";
678 Print("Warning: ");
679 }
680 Print("Finished with end marker");
681 }
682 else
683 throw "Internal Error";
684
685 Print("\n");
686
687 if (lzmaDecoder.RangeDec.Corrupted)
688 {
689 Print("\nWarning: LZMA stream is corrupted\n");
690 }
691
692 return 0;
693 }
694
695
696 int
697 #ifdef _MSC_VER
698 __cdecl
699 #endif
main(int numArgs,const char * args[])700 main(int numArgs, const char *args[])
701 {
702 try { return main2(numArgs, args); }
703 catch (const char *s)
704 {
705 PrintError("\nError:\n");
706 PrintError(s);
707 PrintError("\n");
708 return 1;
709 }
710 catch(...)
711 {
712 PrintError("\nError\n");
713 return 1;
714 }
715 }
716