1 /* LzmaSpec.c -- LZMA Reference Decoder
2 2013-07-28 : Igor Pavlov : Public domain */
3 
4 // This code implements LZMA file decoding according to LZMA specification.
5 // This code is not optimized for speed.
6 
7 #include <stdio.h>
8 
9 #ifdef _MSC_VER
10   #pragma warning(disable : 4710) // function not inlined
11   #pragma warning(disable : 4996) // This function or variable may be unsafe
12 #endif
13 
14 typedef unsigned char Byte;
15 typedef unsigned short UInt16;
16 
17 #ifdef _LZMA_UINT32_IS_ULONG
18   typedef unsigned long UInt32;
19 #else
20   typedef unsigned int UInt32;
21 #endif
22 
23 #if defined(_MSC_VER) || defined(__BORLANDC__)
24   typedef unsigned __int64 UInt64;
25 #else
26   typedef unsigned long long int UInt64;
27 #endif
28 
29 
30 struct CInputStream
31 {
32   FILE *File;
33   UInt64 Processed;
34 
InitCInputStream35   void Init() { Processed = 0; }
36 
ReadByteCInputStream37   Byte ReadByte()
38   {
39     int c = getc(File);
40     if (c < 0)
41       throw "Unexpected end of file";
42     Processed++;
43     return (Byte)c;
44   }
45 };
46 
47 
48 struct COutStream
49 {
50   FILE *File;
51   UInt64 Processed;
52 
InitCOutStream53   void Init() { Processed = 0; }
54 
WriteByteCOutStream55   void WriteByte(Byte b)
56   {
57     if (putc(b, File) == EOF)
58       throw "File writing error";
59     Processed++;
60   }
61 };
62 
63 
64 class COutWindow
65 {
66   Byte *Buf;
67   UInt32 Pos;
68   UInt32 Size;
69   bool IsFull;
70 
71 public:
72   unsigned TotalPos;
73   COutStream OutStream;
74 
COutWindow()75   COutWindow(): Buf(NULL) {}
~COutWindow()76   ~COutWindow() { delete []Buf; }
77 
Create(UInt32 dictSize)78   void Create(UInt32 dictSize)
79   {
80     Buf = new Byte[dictSize];
81     Pos = 0;
82     Size = dictSize;
83     IsFull = false;
84     TotalPos = 0;
85   }
86 
PutByte(Byte b)87   void PutByte(Byte b)
88   {
89     TotalPos++;
90     Buf[Pos++] = b;
91     if (Pos == Size)
92     {
93       Pos = 0;
94       IsFull = true;
95     }
96     OutStream.WriteByte(b);
97   }
98 
GetByte(UInt32 dist) const99   Byte GetByte(UInt32 dist) const
100   {
101     return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102   }
103 
CopyMatch(UInt32 dist,unsigned len)104   void CopyMatch(UInt32 dist, unsigned len)
105   {
106     for (; len > 0; len--)
107       PutByte(GetByte(dist));
108   }
109 
CheckDistance(UInt32 dist) const110   bool CheckDistance(UInt32 dist) const
111   {
112     return dist <= Pos || IsFull;
113   }
114 
IsEmpty() const115   bool IsEmpty() const
116   {
117     return Pos == 0 && !IsFull;
118   }
119 };
120 
121 
122 #define kNumBitModelTotalBits 11
123 #define kNumMoveBits 5
124 
125 typedef UInt16 CProb;
126 
127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128 
129 #define INIT_PROBS(p) \
130  { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131 
132 class CRangeDecoder
133 {
134   UInt32 Range;
135   UInt32 Code;
136 
137   void Normalize();
138 
139 public:
140 
141   CInputStream *InStream;
142   bool Corrupted;
143 
144   void Init();
IsFinishedOK() const145   bool IsFinishedOK() const { return Code == 0; }
146 
147   UInt32 DecodeDirectBits(unsigned numBits);
148   unsigned DecodeBit(CProb *prob);
149 };
150 
Init()151 void CRangeDecoder::Init()
152 {
153   Corrupted = false;
154 
155   if (InStream->ReadByte() != 0)
156     Corrupted = true;
157 
158   Range = 0xFFFFFFFF;
159   Code = 0;
160   for (int i = 0; i < 4; i++)
161     Code = (Code << 8) | InStream->ReadByte();
162 
163   if (Code == Range)
164     Corrupted = true;
165 }
166 
167 #define kTopValue ((UInt32)1 << 24)
168 
Normalize()169 void CRangeDecoder::Normalize()
170 {
171   if (Range < kTopValue)
172   {
173     Range <<= 8;
174     Code = (Code << 8) | InStream->ReadByte();
175   }
176 }
177 
DecodeDirectBits(unsigned numBits)178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179 {
180   UInt32 res = 0;
181   do
182   {
183     Range >>= 1;
184     Code -= Range;
185     UInt32 t = 0 - ((UInt32)Code >> 31);
186     Code += Range & t;
187 
188     if (Code == Range)
189       Corrupted = true;
190 
191     Normalize();
192     res <<= 1;
193     res += t + 1;
194   }
195   while (--numBits);
196   return res;
197 }
198 
DecodeBit(CProb * prob)199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
200 {
201   unsigned v = *prob;
202   UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203   unsigned symbol;
204   if (Code < bound)
205   {
206     v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207     Range = bound;
208     symbol = 0;
209   }
210   else
211   {
212     v -= v >> kNumMoveBits;
213     Code -= bound;
214     Range -= bound;
215     symbol = 1;
216   }
217   *prob = (CProb)v;
218   Normalize();
219   return symbol;
220 }
221 
222 
BitTreeReverseDecode(CProb * probs,unsigned numBits,CRangeDecoder * rc)223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224 {
225   unsigned m = 1;
226   unsigned symbol = 0;
227   for (unsigned i = 0; i < numBits; i++)
228   {
229     unsigned bit = rc->DecodeBit(&probs[m]);
230     m <<= 1;
231     m += bit;
232     symbol |= (bit << i);
233   }
234   return symbol;
235 }
236 
237 template <unsigned NumBits>
238 class CBitTreeDecoder
239 {
240   CProb Probs[(unsigned)1 << NumBits];
241 
242 public:
243 
Init()244   void Init()
245   {
246     INIT_PROBS(Probs);
247   }
248 
Decode(CRangeDecoder * rc)249   unsigned Decode(CRangeDecoder *rc)
250   {
251     unsigned m = 1;
252     for (unsigned i = 0; i < NumBits; i++)
253       m = (m << 1) + rc->DecodeBit(&Probs[m]);
254     return m - ((unsigned)1 << NumBits);
255   }
256 
ReverseDecode(CRangeDecoder * rc)257   unsigned ReverseDecode(CRangeDecoder *rc)
258   {
259     return BitTreeReverseDecode(Probs, NumBits, rc);
260   }
261 };
262 
263 #define kNumPosBitsMax 4
264 
265 #define kNumStates 12
266 #define kNumLenToPosStates 4
267 #define kNumAlignBits 4
268 #define kStartPosModelIndex 4
269 #define kEndPosModelIndex 14
270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271 #define kMatchMinLen 2
272 
273 class CLenDecoder
274 {
275   CProb Choice;
276   CProb Choice2;
277   CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278   CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279   CBitTreeDecoder<8> HighCoder;
280 
281 public:
282 
Init()283   void Init()
284   {
285     Choice = PROB_INIT_VAL;
286     Choice2 = PROB_INIT_VAL;
287     HighCoder.Init();
288     for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289     {
290       LowCoder[i].Init();
291       MidCoder[i].Init();
292     }
293   }
294 
Decode(CRangeDecoder * rc,unsigned posState)295   unsigned Decode(CRangeDecoder *rc, unsigned posState)
296   {
297     if (rc->DecodeBit(&Choice) == 0)
298       return LowCoder[posState].Decode(rc);
299     if (rc->DecodeBit(&Choice2) == 0)
300       return 8 + MidCoder[posState].Decode(rc);
301     return 16 + HighCoder.Decode(rc);
302   }
303 };
304 
UpdateState_Literal(unsigned state)305 unsigned UpdateState_Literal(unsigned state)
306 {
307   if (state < 4) return 0;
308   else if (state < 10) return state - 3;
309   else return state - 6;
310 }
UpdateState_Match(unsigned state)311 unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
UpdateState_Rep(unsigned state)312 unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
UpdateState_ShortRep(unsigned state)313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314 
315 #define LZMA_DIC_MIN (1 << 12)
316 
317 class CLzmaDecoder
318 {
319 public:
320   CRangeDecoder RangeDec;
321   COutWindow OutWindow;
322 
323   bool markerIsMandatory;
324   unsigned lc, pb, lp;
325   UInt32 dictSize;
326   UInt32 dictSizeInProperties;
327 
DecodeProperties(const Byte * properties)328   void DecodeProperties(const Byte *properties)
329   {
330     unsigned d = properties[0];
331     if (d >= (9 * 5 * 5))
332       throw "Incorrect LZMA properties";
333     lc = d % 9;
334     d /= 9;
335     pb = d / 5;
336     lp = d % 5;
337     dictSizeInProperties = 0;
338     for (int i = 0; i < 4; i++)
339       dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340     dictSize = dictSizeInProperties;
341     if (dictSize < LZMA_DIC_MIN)
342       dictSize = LZMA_DIC_MIN;
343   }
344 
CLzmaDecoder()345   CLzmaDecoder(): LitProbs(NULL) {}
~CLzmaDecoder()346   ~CLzmaDecoder() { delete []LitProbs; }
347 
Create()348   void Create()
349   {
350     OutWindow.Create(dictSize);
351     CreateLiterals();
352   }
353 
354   int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355 
356 private:
357 
358   CProb *LitProbs;
359 
CreateLiterals()360   void CreateLiterals()
361   {
362     LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363   }
364 
InitLiterals()365   void InitLiterals()
366   {
367     UInt32 num = (UInt32)0x300 << (lc + lp);
368     for (UInt32 i = 0; i < num; i++)
369       LitProbs[i] = PROB_INIT_VAL;
370   }
371 
DecodeLiteral(unsigned state,UInt32 rep0)372   void DecodeLiteral(unsigned state, UInt32 rep0)
373   {
374     unsigned prevByte = 0;
375     if (!OutWindow.IsEmpty())
376       prevByte = OutWindow.GetByte(1);
377 
378     unsigned symbol = 1;
379     unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380     CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381 
382     if (state >= 7)
383     {
384       unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385       do
386       {
387         unsigned matchBit = (matchByte >> 7) & 1;
388         matchByte <<= 1;
389         unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390         symbol = (symbol << 1) | bit;
391         if (matchBit != bit)
392           break;
393       }
394       while (symbol < 0x100);
395     }
396     while (symbol < 0x100)
397       symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398     OutWindow.PutByte((Byte)(symbol - 0x100));
399   }
400 
401   CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402   CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403   CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404 
InitDist()405   void InitDist()
406   {
407     for (unsigned i = 0; i < kNumLenToPosStates; i++)
408       PosSlotDecoder[i].Init();
409     AlignDecoder.Init();
410     INIT_PROBS(PosDecoders);
411   }
412 
DecodeDistance(unsigned len)413   unsigned DecodeDistance(unsigned len)
414   {
415     unsigned lenState = len;
416     if (lenState > kNumLenToPosStates - 1)
417       lenState = kNumLenToPosStates - 1;
418 
419     unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420     if (posSlot < 4)
421       return posSlot;
422 
423     unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424     UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425     if (posSlot < kEndPosModelIndex)
426       dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427     else
428     {
429       dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430       dist += AlignDecoder.ReverseDecode(&RangeDec);
431     }
432     return dist;
433   }
434 
435   CProb IsMatch[kNumStates << kNumPosBitsMax];
436   CProb IsRep[kNumStates];
437   CProb IsRepG0[kNumStates];
438   CProb IsRepG1[kNumStates];
439   CProb IsRepG2[kNumStates];
440   CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441 
442   CLenDecoder LenDecoder;
443   CLenDecoder RepLenDecoder;
444 
Init()445   void Init()
446   {
447     InitLiterals();
448     InitDist();
449 
450     INIT_PROBS(IsMatch);
451     INIT_PROBS(IsRep);
452     INIT_PROBS(IsRepG0);
453     INIT_PROBS(IsRepG1);
454     INIT_PROBS(IsRepG2);
455     INIT_PROBS(IsRep0Long);
456 
457     LenDecoder.Init();
458     RepLenDecoder.Init();
459   }
460 };
461 
462 
463 #define LZMA_RES_ERROR                   0
464 #define LZMA_RES_FINISHED_WITH_MARKER    1
465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466 
Decode(bool unpackSizeDefined,UInt64 unpackSize)467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468 {
469   Init();
470   RangeDec.Init();
471 
472   UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
473   unsigned state = 0;
474 
475   for (;;)
476   {
477     if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
478       if (RangeDec.IsFinishedOK())
479         return LZMA_RES_FINISHED_WITHOUT_MARKER;
480 
481     unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
482 
483     if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
484     {
485       if (unpackSizeDefined && unpackSize == 0)
486         return LZMA_RES_ERROR;
487       DecodeLiteral(state, rep0);
488       state = UpdateState_Literal(state);
489       unpackSize--;
490       continue;
491     }
492 
493     unsigned len;
494 
495     if (RangeDec.DecodeBit(&IsRep[state]) != 0)
496     {
497       if (unpackSizeDefined && unpackSize == 0)
498         return LZMA_RES_ERROR;
499       if (OutWindow.IsEmpty())
500         return LZMA_RES_ERROR;
501       if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
502       {
503         if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
504         {
505           state = UpdateState_ShortRep(state);
506           OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
507           unpackSize--;
508           continue;
509         }
510       }
511       else
512       {
513         UInt32 dist;
514         if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
515           dist = rep1;
516         else
517         {
518           if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
519             dist = rep2;
520           else
521           {
522             dist = rep3;
523             rep3 = rep2;
524           }
525           rep2 = rep1;
526         }
527         rep1 = rep0;
528         rep0 = dist;
529       }
530       len = RepLenDecoder.Decode(&RangeDec, posState);
531       state = UpdateState_Rep(state);
532     }
533     else
534     {
535       rep3 = rep2;
536       rep2 = rep1;
537       rep1 = rep0;
538       len = LenDecoder.Decode(&RangeDec, posState);
539       state = UpdateState_Match(state);
540       rep0 = DecodeDistance(len);
541       if (rep0 == 0xFFFFFFFF)
542         return RangeDec.IsFinishedOK() ?
543             LZMA_RES_FINISHED_WITH_MARKER :
544             LZMA_RES_ERROR;
545 
546       if (unpackSizeDefined && unpackSize == 0)
547         return LZMA_RES_ERROR;
548       if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
549         return LZMA_RES_ERROR;
550     }
551     len += kMatchMinLen;
552     bool isError = false;
553     if (unpackSizeDefined && unpackSize < len)
554     {
555       len = (unsigned)unpackSize;
556       isError = true;
557     }
558     OutWindow.CopyMatch(rep0 + 1, len);
559     unpackSize -= len;
560     if (isError)
561       return LZMA_RES_ERROR;
562   }
563 }
564 
Print(const char * s)565 static void Print(const char *s)
566 {
567   fputs(s, stdout);
568 }
569 
PrintError(const char * s)570 static void PrintError(const char *s)
571 {
572   fputs(s, stderr);
573 }
574 
575 
576 #define CONVERT_INT_TO_STR(charType, tempSize) \
577 
ConvertUInt64ToString(UInt64 val,char * s)578 void ConvertUInt64ToString(UInt64 val, char *s)
579 {
580   char temp[32];
581   unsigned i = 0;
582   while (val >= 10)
583   {
584     temp[i++] = (char)('0' + (unsigned)(val % 10));
585     val /= 10;
586   }
587   *s++ = (char)('0' + (unsigned)val);
588   while (i != 0)
589   {
590     i--;
591     *s++ = temp[i];
592   }
593   *s = 0;
594 }
595 
PrintUInt64(const char * title,UInt64 v)596 void PrintUInt64(const char *title, UInt64 v)
597 {
598   Print(title);
599   Print(" : ");
600   char s[32];
601   ConvertUInt64ToString(v, s);
602   Print(s);
603   Print(" bytes \n");
604 }
605 
main2(int numArgs,const char * args[])606 int main2(int numArgs, const char *args[])
607 {
608   Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n");
609   if (numArgs == 1)
610     Print("\nUse: lzmaSpec a.lzma outFile");
611 
612   if (numArgs != 3)
613     throw "you must specify two parameters";
614 
615   CInputStream inStream;
616   inStream.File = fopen(args[1], "rb");
617   inStream.Init();
618   if (inStream.File == 0)
619     throw "Can't open input file";
620 
621   CLzmaDecoder lzmaDecoder;
622   lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
623   lzmaDecoder.OutWindow.OutStream.Init();
624   if (inStream.File == 0)
625     throw "Can't open output file";
626 
627   Byte header[13];
628   int i;
629   for (i = 0; i < 13; i++)
630     header[i] = inStream.ReadByte();
631 
632   lzmaDecoder.DecodeProperties(header);
633 
634   printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
635   printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
636   printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
637 
638   UInt64 unpackSize = 0;
639   bool unpackSizeDefined = false;
640   for (i = 0; i < 8; i++)
641   {
642     Byte b = header[5 + i];
643     if (b != 0xFF)
644       unpackSizeDefined = true;
645     unpackSize |= (UInt64)b << (8 * i);
646   }
647 
648   lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
649 
650   Print("\n");
651   if (unpackSizeDefined)
652     PrintUInt64("Uncompressed Size", unpackSize);
653   else
654     Print("End marker is expected\n");
655   lzmaDecoder.RangeDec.InStream = &inStream;
656 
657   Print("\n");
658 
659   lzmaDecoder.Create();
660   // we support the streams that have uncompressed size and marker.
661   int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
662 
663   PrintUInt64("Read    ", inStream.Processed);
664   PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
665 
666   if (res == LZMA_RES_ERROR)
667     throw "LZMA decoding error";
668   else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
669     Print("Finished without end marker");
670   else if (res == LZMA_RES_FINISHED_WITH_MARKER)
671   {
672     if (unpackSizeDefined)
673     {
674       if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
675         throw "Finished with end marker before than specified size";
676       Print("Warning: ");
677     }
678     Print("Finished with end marker");
679   }
680   else
681     throw "Internal Error";
682 
683   Print("\n");
684 
685   if (lzmaDecoder.RangeDec.Corrupted)
686   {
687     Print("\nWarning: LZMA stream is corrupted\n");
688   }
689 
690   return 0;
691 }
692 
693 
694 int
695   #ifdef _MSC_VER
696     __cdecl
697   #endif
main(int numArgs,const char * args[])698 main(int numArgs, const char *args[])
699 {
700   try { return main2(numArgs, args); }
701   catch (const char *s)
702   {
703     PrintError("\nError:\n");
704     PrintError(s);
705     PrintError("\n");
706     return 1;
707   }
708   catch(...)
709   {
710     PrintError("\nError\n");
711     return 1;
712   }
713 }
714