1 ///////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // *       Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // *       Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // *       Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
33 ///////////////////////////////////////////////////////////////////////////
34 
35 
36 
37 
38 //-----------------------------------------------------------------------------
39 //
40 //	16-bit Huffman compression and decompression.
41 //
42 //	The source code in this file is derived from the 8-bit
43 //	Huffman compression and decompression routines written
44 //	by Christian Rouet for his PIZ image file format.
45 //
46 //-----------------------------------------------------------------------------
47 
48 #include <ImfHuf.h>
49 #include <ImfInt64.h>
50 #include <ImfAutoArray.h>
51 #include "Iex.h"
52 #include <string.h>
53 #include <assert.h>
54 #include <algorithm>
55 
56 
57 using namespace std;
58 using namespace Iex;
59 
60 namespace Imf {
61 namespace {
62 
63 
64 const int HUF_ENCBITS = 16;			// literal (value) bit length
65 const int HUF_DECBITS = 14;			// decoding bit size (>= 8)
66 
67 const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1;	// encoding table size
68 const int HUF_DECSIZE =  1 << HUF_DECBITS;	// decoding table size
69 const int HUF_DECMASK = HUF_DECSIZE - 1;
70 
71 
72 struct HufDec
73 {				// short code		long code
74                 //-------------------------------
75     int		len:8;		// code length		0
76     int		lit:24;		// lit			p size
77     int	*	p;		// 0			lits
78 };
79 
80 
81 void
invalidNBits()82 invalidNBits ()
83 {
84     throw InputExc ("Error in header for Huffman-encoded data "
85             "(invalid number of bits).");
86 }
87 
88 
89 void
tooMuchData()90 tooMuchData ()
91 {
92     throw InputExc ("Error in Huffman-encoded data "
93             "(decoded data are longer than expected).");
94 }
95 
96 
97 void
notEnoughData()98 notEnoughData ()
99 {
100     throw InputExc ("Error in Huffman-encoded data "
101             "(decoded data are shorter than expected).");
102 }
103 
104 
105 void
invalidCode()106 invalidCode ()
107 {
108     throw InputExc ("Error in Huffman-encoded data "
109             "(invalid code).");
110 }
111 
112 
113 void
invalidTableSize()114 invalidTableSize ()
115 {
116     throw InputExc ("Error in Huffman-encoded data "
117             "(invalid code table size).");
118 }
119 
120 
121 void
unexpectedEndOfTable()122 unexpectedEndOfTable ()
123 {
124     throw InputExc ("Error in Huffman-encoded data "
125             "(unexpected end of code table data).");
126 }
127 
128 
129 void
tableTooLong()130 tableTooLong ()
131 {
132     throw InputExc ("Error in Huffman-encoded data "
133             "(code table is longer than expected).");
134 }
135 
136 
137 void
invalidTableEntry()138 invalidTableEntry ()
139 {
140     throw InputExc ("Error in Huffman-encoded data "
141             "(invalid code table entry).");
142 }
143 
144 
145 inline Int64
hufLength(Int64 code)146 hufLength (Int64 code)
147 {
148     return code & 63;
149 }
150 
151 
152 inline Int64
hufCode(Int64 code)153 hufCode (Int64 code)
154 {
155     return code >> 6;
156 }
157 
158 
159 inline void
outputBits(int nBits,Int64 bits,Int64 & c,int & lc,char * & out)160 outputBits (int nBits, Int64 bits, Int64 &c, int &lc, char *&out)
161 {
162     c <<= nBits;
163     lc += nBits;
164 
165     c |= bits;
166 
167     while (lc >= 8)
168     *out++ = (c >> (lc -= 8));
169 }
170 
171 
172 inline Int64
getBits(int nBits,Int64 & c,int & lc,const char * & in)173 getBits (int nBits, Int64 &c, int &lc, const char *&in)
174 {
175     while (lc < nBits)
176     {
177     c = (c << 8) | *(unsigned char *)(in++);
178     lc += 8;
179     }
180 
181     lc -= nBits;
182     return (c >> lc) & ((1 << nBits) - 1);
183 }
184 
185 
186 //
187 // ENCODING TABLE BUILDING & (UN)PACKING
188 //
189 
190 //
191 // Build a "canonical" Huffman code table:
192 //	- for each (uncompressed) symbol, hcode contains the length
193 //	  of the corresponding code (in the compressed data)
194 //	- canonical codes are computed and stored in hcode
195 //	- the rules for constructing canonical codes are as follows:
196 //	  * shorter codes (if filled with zeroes to the right)
197 //	    have a numerically higher value than longer codes
198 //	  * for codes with the same length, numerical values
199 //	    increase with numerical symbol values
200 //	- because the canonical code table can be constructed from
201 //	  symbol lengths alone, the code table can be transmitted
202 //	  without sending the actual code values
203 //	- see http://www.compressconsult.com/huffman/
204 //
205 
206 void
hufCanonicalCodeTable(Int64 hcode[HUF_ENCSIZE])207 hufCanonicalCodeTable (Int64 hcode[HUF_ENCSIZE])
208 {
209     Int64 n[59];
210 
211     //
212     // For each i from 0 through 58, count the
213     // number of different codes of length i, and
214     // store the count in n[i].
215     //
216 
217     for (int i = 0; i <= 58; ++i)
218     n[i] = 0;
219 
220     for (int i = 0; i < HUF_ENCSIZE; ++i)
221     n[hcode[i]] += 1;
222 
223     //
224     // For each i from 58 through 1, compute the
225     // numerically lowest code with length i, and
226     // store that code in n[i].
227     //
228 
229     Int64 c = 0;
230 
231     for (int i = 58; i > 0; --i)
232     {
233     Int64 nc = ((c + n[i]) >> 1);
234     n[i] = c;
235     c = nc;
236     }
237 
238     //
239     // hcode[i] contains the length, l, of the
240     // code for symbol i.  Assign the next available
241     // code of length l to the symbol and store both
242     // l and the code in hcode[i].
243     //
244 
245     for (int i = 0; i < HUF_ENCSIZE; ++i)
246     {
247     int l = hcode[i];
248 
249     if (l > 0)
250         hcode[i] = l | (n[l]++ << 6);
251     }
252 }
253 
254 
255 //
256 // Compute Huffman codes (based on frq input) and store them in frq:
257 //	- code structure is : [63:lsb - 6:msb] | [5-0: bit length];
258 //	- max code length is 58 bits;
259 //	- codes outside the range [im-iM] have a null length (unused values);
260 //	- original frequencies are destroyed;
261 //	- encoding tables are used by hufEncode() and hufBuildDecTable();
262 //
263 
264 
265 struct FHeapCompare
266 {
operator ()Imf::__anon1b7e4f930111::FHeapCompare267     bool operator () (Int64 *a, Int64 *b) {return *a > *b;}
268 };
269 
270 
271 void
hufBuildEncTable(Int64 * frq,int * im,int * iM)272 hufBuildEncTable
273     (Int64*	frq,	// io: input frequencies [HUF_ENCSIZE], output table
274      int*	im,	//  o: min frq index
275      int*	iM)	//  o: max frq index
276 {
277     //
278     // This function assumes that when it is called, array frq
279     // indicates the frequency of all possible symbols in the data
280     // that are to be Huffman-encoded.  (frq[i] contains the number
281     // of occurrences of symbol i in the data.)
282     //
283     // The loop below does three things:
284     //
285     // 1) Finds the minimum and maximum indices that point
286     //    to non-zero entries in frq:
287     //
288     //     frq[im] != 0, and frq[i] == 0 for all i < im
289     //     frq[iM] != 0, and frq[i] == 0 for all i > iM
290     //
291     // 2) Fills array fHeap with pointers to all non-zero
292     //    entries in frq.
293     //
294     // 3) Initializes array hlink such that hlink[i] == i
295     //    for all array entries.
296     //
297 
298     AutoArray <int, HUF_ENCSIZE> hlink;
299     AutoArray <Int64 *, HUF_ENCSIZE> fHeap;
300 
301     *im = 0;
302 
303     while (!frq[*im])
304     (*im)++;
305 
306     int nf = 0;
307 
308     for (int i = *im; i < HUF_ENCSIZE; i++)
309     {
310     hlink[i] = i;
311 
312     if (frq[i])
313     {
314         fHeap[nf] = &frq[i];
315         nf++;
316         *iM = i;
317     }
318     }
319 
320     //
321     // Add a pseudo-symbol, with a frequency count of 1, to frq;
322     // adjust the fHeap and hlink array accordingly.  Function
323     // hufEncode() uses the pseudo-symbol for run-length encoding.
324     //
325 
326     (*iM)++;
327     frq[*iM] = 1;
328     fHeap[nf] = &frq[*iM];
329     nf++;
330 
331     //
332     // Build an array, scode, such that scode[i] contains the number
333     // of bits assigned to symbol i.  Conceptually this is done by
334     // constructing a tree whose leaves are the symbols with non-zero
335     // frequency:
336     //
337     //     Make a heap that contains all symbols with a non-zero frequency,
338     //     with the least frequent symbol on top.
339     //
340     //     Repeat until only one symbol is left on the heap:
341     //
342     //         Take the two least frequent symbols off the top of the heap.
343     //         Create a new node that has first two nodes as children, and
344     //         whose frequency is the sum of the frequencies of the first
345     //         two nodes.  Put the new node back into the heap.
346     //
347     // The last node left on the heap is the root of the tree.  For each
348     // leaf node, the distance between the root and the leaf is the length
349     // of the code for the corresponding symbol.
350     //
351     // The loop below doesn't actually build the tree; instead we compute
352     // the distances of the leaves from the root on the fly.  When a new
353     // node is added to the heap, then that node's descendants are linked
354     // into a single linear list that starts at the new node, and the code
355     // lengths of the descendants (that is, their distance from the root
356     // of the tree) are incremented by one.
357     //
358 
359     make_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
360 
361     AutoArray <Int64, HUF_ENCSIZE> scode;
362     memset (scode, 0, sizeof (Int64) * HUF_ENCSIZE);
363 
364     while (nf > 1)
365     {
366     //
367     // Find the indices, mm and m, of the two smallest non-zero frq
368     // values in fHeap, add the smallest frq to the second-smallest
369     // frq, and remove the smallest frq value from fHeap.
370     //
371 
372     int mm = fHeap[0] - frq;
373     pop_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
374     --nf;
375 
376     int m = fHeap[0] - frq;
377     pop_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
378 
379     frq[m ] += frq[mm];
380     push_heap (&fHeap[0], &fHeap[nf], FHeapCompare());
381 
382     //
383     // The entries in scode are linked into lists with the
384     // entries in hlink serving as "next" pointers and with
385     // the end of a list marked by hlink[j] == j.
386     //
387     // Traverse the lists that start at scode[m] and scode[mm].
388     // For each element visited, increment the length of the
389     // corresponding code by one bit. (If we visit scode[j]
390     // during the traversal, then the code for symbol j becomes
391     // one bit longer.)
392     //
393     // Merge the lists that start at scode[m] and scode[mm]
394     // into a single list that starts at scode[m].
395     //
396 
397     //
398     // Add a bit to all codes in the first list.
399     //
400 
401     for (int j = m; true; j = hlink[j])
402     {
403         scode[j]++;
404 
405         assert (scode[j] <= 58);
406 
407         if (hlink[j] == j)
408         {
409         //
410         // Merge the two lists.
411         //
412 
413         hlink[j] = mm;
414         break;
415         }
416     }
417 
418     //
419     // Add a bit to all codes in the second list
420     //
421 
422     for (int j = mm; true; j = hlink[j])
423     {
424         scode[j]++;
425 
426         assert (scode[j] <= 58);
427 
428         if (hlink[j] == j)
429         break;
430     }
431     }
432 
433     //
434     // Build a canonical Huffman code table, replacing the code
435     // lengths in scode with (code, code length) pairs.  Copy the
436     // code table from scode into frq.
437     //
438 
439     hufCanonicalCodeTable (scode);
440     memcpy (frq, scode, sizeof (Int64) * HUF_ENCSIZE);
441 }
442 
443 
444 //
445 // Pack an encoding table:
446 //	- only code lengths, not actual codes, are stored
447 //	- runs of zeroes are compressed as follows:
448 //
449 //	  unpacked		packed
450 //	  --------------------------------
451 //	  1 zero		0	(6 bits)
452 //	  2 zeroes		59
453 //	  3 zeroes		60
454 //	  4 zeroes		61
455 //	  5 zeroes		62
456 //	  n zeroes (6 or more)	63 n-6	(6 + 8 bits)
457 //
458 
459 const int SHORT_ZEROCODE_RUN = 59;
460 const int LONG_ZEROCODE_RUN  = 63;
461 const int SHORTEST_LONG_RUN  = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
462 const int LONGEST_LONG_RUN   = 255 + SHORTEST_LONG_RUN;
463 
464 
465 void
hufPackEncTable(const Int64 * hcode,int im,int iM,char ** pcode)466 hufPackEncTable
467     (const Int64*	hcode,		// i : encoding table [HUF_ENCSIZE]
468      int		im,		// i : min hcode index
469      int		iM,		// i : max hcode index
470      char**		pcode)		//  o: ptr to packed table (updated)
471 {
472     char *p = *pcode;
473     Int64 c = 0;
474     int lc = 0;
475 
476     for (; im <= iM; im++)
477     {
478     int l = hufLength (hcode[im]);
479 
480     if (l == 0)
481     {
482         int zerun = 1;
483 
484         while ((im < iM) && (zerun < LONGEST_LONG_RUN))
485         {
486         if (hufLength (hcode[im+1]) > 0 )
487             break;
488         im++;
489         zerun++;
490         }
491 
492         if (zerun >= 2)
493         {
494         if (zerun >= SHORTEST_LONG_RUN)
495         {
496             outputBits (6, LONG_ZEROCODE_RUN, c, lc, p);
497             outputBits (8, zerun - SHORTEST_LONG_RUN, c, lc, p);
498         }
499         else
500         {
501             outputBits (6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p);
502         }
503         continue;
504         }
505     }
506 
507     outputBits (6, l, c, lc, p);
508     }
509 
510     if (lc > 0)
511     *p++ = (unsigned char) (c << (8 - lc));
512 
513     *pcode = p;
514 }
515 
516 
517 //
518 // Unpack an encoding table packed by hufPackEncTable():
519 //
520 
521 void
hufUnpackEncTable(const char ** pcode,int ni,int im,int iM,Int64 * hcode)522 hufUnpackEncTable
523     (const char**	pcode,		// io: ptr to packed table (updated)
524      int		ni,		// i : input size (in bytes)
525      int		im,		// i : min hcode index
526      int		iM,		// i : max hcode index
527      Int64*		hcode)		//  o: encoding table [HUF_ENCSIZE]
528 {
529     memset (hcode, 0, sizeof (Int64) * HUF_ENCSIZE);
530 
531     const char *p = *pcode;
532     Int64 c = 0;
533     int lc = 0;
534 
535     for (; im <= iM; im++)
536     {
537     if (p - *pcode > ni)
538         unexpectedEndOfTable();
539 
540     Int64 l = hcode[im] = getBits (6, c, lc, p); // code length
541 
542     if (l == (Int64) LONG_ZEROCODE_RUN)
543     {
544         if (p - *pcode > ni)
545         unexpectedEndOfTable();
546 
547         int zerun = getBits (8, c, lc, p) + SHORTEST_LONG_RUN;
548 
549         if (im + zerun > iM + 1)
550         tableTooLong();
551 
552         while (zerun--)
553         hcode[im++] = 0;
554 
555         im--;
556     }
557     else if (l >= (Int64) SHORT_ZEROCODE_RUN)
558     {
559         int zerun = l - SHORT_ZEROCODE_RUN + 2;
560 
561         if (im + zerun > iM + 1)
562         tableTooLong();
563 
564         while (zerun--)
565         hcode[im++] = 0;
566 
567         im--;
568     }
569     }
570 
571     *pcode = (char *) p;
572 
573     hufCanonicalCodeTable (hcode);
574 }
575 
576 
577 //
578 // DECODING TABLE BUILDING
579 //
580 
581 //
582 // Clear a newly allocated decoding table so that it contains only zeroes.
583 //
584 
585 void
hufClearDecTable(HufDec * hdecod)586 hufClearDecTable
587     (HufDec *		hdecod)		// io: (allocated by caller)
588                         //     decoding table [HUF_DECSIZE]
589 {
590     memset (hdecod, 0, sizeof (HufDec) * HUF_DECSIZE);
591 }
592 
593 
594 //
595 // Build a decoding hash table based on the encoding table hcode:
596 //	- short codes (<= HUF_DECBITS) are resolved with a single table access;
597 //	- long code entry allocations are not optimized, because long codes are
598 //	  unfrequent;
599 //	- decoding tables are used by hufDecode();
600 //
601 
602 void
hufBuildDecTable(const Int64 * hcode,int im,int iM,HufDec * hdecod)603 hufBuildDecTable
604     (const Int64*	hcode,		// i : encoding table
605      int		im,		// i : min index in hcode
606      int		iM,		// i : max index in hcode
607      HufDec *		hdecod)		//  o: (allocated by caller)
608                         //     decoding table [HUF_DECSIZE]
609 {
610     //
611     // Init hashtable & loop on all codes.
612     // Assumes that hufClearDecTable(hdecod) has already been called.
613     //
614 
615     for (; im <= iM; im++)
616     {
617     Int64 c = hufCode (hcode[im]);
618     int l = hufLength (hcode[im]);
619 
620     if (c >> l)
621     {
622         //
623         // Error: c is supposed to be an l-bit code,
624         // but c contains a value that is greater
625         // than the largest l-bit number.
626         //
627 
628         invalidTableEntry();
629     }
630 
631     if (l > HUF_DECBITS)
632     {
633         //
634         // Long code: add a secondary entry
635         //
636 
637         HufDec *pl = hdecod + (c >> (l - HUF_DECBITS));
638 
639         if (pl->len)
640         {
641         //
642         // Error: a short code has already
643         // been stored in table entry *pl.
644         //
645 
646         invalidTableEntry();
647         }
648 
649         pl->lit++;
650 
651         if (pl->p)
652         {
653         int *p = pl->p;
654         pl->p = new int [pl->lit];
655 
656         for (int i = 0; i < pl->lit - 1; ++i)
657             pl->p[i] = p[i];
658 
659         delete [] p;
660         }
661         else
662         {
663         pl->p = new int [1];
664         }
665 
666         pl->p[pl->lit - 1]= im;
667     }
668     else if (l)
669     {
670         //
671         // Short code: init all primary entries
672         //
673 
674         HufDec *pl = hdecod + (c << (HUF_DECBITS - l));
675 
676         for (Int64 i = 1 << (HUF_DECBITS - l); i > 0; i--, pl++)
677         {
678         if (pl->len || pl->p)
679         {
680             //
681             // Error: a short code or a long code has
682             // already been stored in table entry *pl.
683             //
684 
685             invalidTableEntry();
686         }
687 
688         pl->len = l;
689         pl->lit = im;
690         }
691     }
692     }
693 }
694 
695 
696 //
697 // Free the long code entries of a decoding table built by hufBuildDecTable()
698 //
699 
700 void
hufFreeDecTable(HufDec * hdecod)701 hufFreeDecTable (HufDec *hdecod)	// io: Decoding table
702 {
703     for (int i = 0; i < HUF_DECSIZE; i++)
704     {
705     if (hdecod[i].p)
706     {
707         delete [] hdecod[i].p;
708         hdecod[i].p = 0;
709     }
710     }
711 }
712 
713 
714 //
715 // ENCODING
716 //
717 
718 inline void
outputCode(Int64 code,Int64 & c,int & lc,char * & out)719 outputCode (Int64 code, Int64 &c, int &lc, char *&out)
720 {
721     outputBits (hufLength (code), hufCode (code), c, lc, out);
722 }
723 
724 
725 inline void
sendCode(Int64 sCode,int runCount,Int64 runCode,Int64 & c,int & lc,char * & out)726 sendCode (Int64 sCode, int runCount, Int64 runCode,
727       Int64 &c, int &lc, char *&out)
728 {
729     static const int RLMIN = 32; // min count to activate run-length coding
730 
731     if (runCount > RLMIN)
732     {
733     outputCode (sCode, c, lc, out);
734     outputCode (runCode, c, lc, out);
735     outputBits (8, runCount, c, lc, out);
736     }
737     else
738     {
739     while (runCount-- >= 0)
740         outputCode (sCode, c, lc, out);
741     }
742 }
743 
744 
745 //
746 // Encode (compress) ni values based on the Huffman encoding table hcode:
747 //
748 
749 int
hufEncode(const Int64 * hcode,const unsigned short * in,const int ni,int rlc,char * out)750 hufEncode				// return: output size (in bits)
751     (const Int64*  	    hcode,	// i : encoding table
752      const unsigned short*  in,		// i : uncompressed input buffer
753      const int     	    ni,		// i : input buffer size (in bytes)
754      int           	    rlc,	// i : rl code
755      char*         	    out)	//  o: compressed output buffer
756 {
757     char *outStart = out;
758     Int64 c = 0;	// bits not yet written to out
759     int lc = 0;		// number of valid bits in c (LSB)
760     int s = in[0];
761     int cs = 0;
762 
763     //
764     // Loop on input values
765     //
766 
767     for (int i = 1; i < ni; i++)
768     {
769     //
770     // Count same values or send code
771     //
772 
773     if (s == in[i] && cs < 255)
774     {
775         cs++;
776     }
777     else
778     {
779         sendCode (hcode[s], cs, hcode[rlc], c, lc, out);
780         cs=0;
781     }
782 
783     s = in[i];
784     }
785 
786     //
787     // Send remaining code
788     //
789 
790     sendCode (hcode[s], cs, hcode[rlc], c, lc, out);
791 
792     if (lc)
793     *out = (c << (8 - lc)) & 0xff;
794 
795     return (out - outStart) * 8 + lc;
796 }
797 
798 
799 //
800 // DECODING
801 //
802 
803 //
804 // In order to force the compiler to inline them,
805 // getChar() and getCode() are implemented as macros
806 // instead of "inline" functions.
807 //
808 
809 #define getChar(c, lc, in)			\
810 {						\
811     c = (c << 8) | *(unsigned char *)(in++);	\
812     lc += 8;					\
813 }
814 
815 
816 #define getCode(po, rlc, c, lc, in, out, oe)	\
817 {						\
818     if (po == rlc)				\
819     {						\
820     if (lc < 8)				\
821         getChar(c, lc, in);			\
822                         \
823     lc -= 8;				\
824                         \
825     unsigned char cs = (c >> lc);		\
826                         \
827     if (out + cs > oe)			\
828         tooMuchData();			\
829                         \
830     unsigned short s = out[-1];		\
831                         \
832     while (cs-- > 0)			\
833         *out++ = s;				\
834     }						\
835     else if (out < oe)				\
836     {						\
837     *out++ = po;				\
838     }						\
839     else					\
840     {						\
841     tooMuchData();				\
842     }						\
843 }
844 
845 
846 //
847 // Decode (uncompress) ni bits based on encoding & decoding tables:
848 //
849 
850 void
hufDecode(const Int64 * hcode,const HufDec * hdecod,const char * in,int ni,int rlc,int no,unsigned short * out)851 hufDecode
852     (const Int64 * 	hcode,	// i : encoding table
853      const HufDec * 	hdecod,	// i : decoding table
854      const char* 	in,	// i : compressed input buffer
855      int		ni,	// i : input size (in bits)
856      int		rlc,	// i : run-length code
857      int		no,	// i : expected output size (in bytes)
858      unsigned short*	out)	//  o: uncompressed output buffer
859 {
860     Int64 c = 0;
861     int lc = 0;
862     unsigned short * outb = out;
863     unsigned short * oe = out + no;
864     const char * ie = in + (ni + 7) / 8; // input byte size
865 
866     //
867     // Loop on input bytes
868     //
869 
870     while (in < ie)
871     {
872     getChar (c, lc, in);
873 
874     //
875     // Access decoding table
876     //
877 
878     while (lc >= HUF_DECBITS)
879     {
880         const HufDec pl = hdecod[(c >> (lc-HUF_DECBITS)) & HUF_DECMASK];
881 
882         if (pl.len)
883         {
884         //
885         // Get short code
886         //
887 
888         lc -= pl.len;
889         getCode (pl.lit, rlc, c, lc, in, out, oe);
890         }
891         else
892         {
893         if (!pl.p)
894             invalidCode(); // wrong code
895 
896         //
897         // Search long code
898         //
899 
900         int j;
901 
902         for (j = 0; j < pl.lit; j++)
903         {
904             int	l = hufLength (hcode[pl.p[j]]);
905 
906             while (lc < l && in < ie)	// get more bits
907             getChar (c, lc, in);
908 
909             if (lc >= l)
910             {
911             if (hufCode (hcode[pl.p[j]]) ==
912                 ((c >> (lc - l)) & ((Int64(1) << l) - 1)))
913             {
914                 //
915                 // Found : get long code
916                 //
917 
918                 lc -= l;
919                 getCode (pl.p[j], rlc, c, lc, in, out, oe);
920                 break;
921             }
922             }
923         }
924 
925         if (j == pl.lit)
926             invalidCode(); // Not found
927         }
928     }
929     }
930 
931     //
932     // Get remaining (short) codes
933     //
934 
935     int i = (8 - ni) & 7;
936     c >>= i;
937     lc -= i;
938 
939     while (lc > 0)
940     {
941     const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
942 
943     if (pl.len)
944     {
945         lc -= pl.len;
946         getCode (pl.lit, rlc, c, lc, in, out, oe);
947     }
948     else
949     {
950         invalidCode(); // wrong (long) code
951     }
952     }
953 
954     if (out - outb != no)
955     notEnoughData ();
956 }
957 
958 
959 void
countFrequencies(Int64 freq[HUF_ENCSIZE],const unsigned short data[],int n)960 countFrequencies (Int64 freq[HUF_ENCSIZE],
961           const unsigned short data[/*n*/],
962           int n)
963 {
964     for (int i = 0; i < HUF_ENCSIZE; ++i)
965     freq[i] = 0;
966 
967     for (int i = 0; i < n; ++i)
968     ++freq[data[i]];
969 }
970 
971 
972 void
writeUInt(char buf[4],unsigned int i)973 writeUInt (char buf[4], unsigned int i)
974 {
975     unsigned char *b = (unsigned char *) buf;
976 
977     b[0] = i;
978     b[1] = i >> 8;
979     b[2] = i >> 16;
980     b[3] = i >> 24;
981 }
982 
983 
984 unsigned int
readUInt(const char buf[4])985 readUInt (const char buf[4])
986 {
987     const unsigned char *b = (const unsigned char *) buf;
988 
989     return ( b[0]        & 0x000000ff) |
990        ((b[1] <<  8) & 0x0000ff00) |
991        ((b[2] << 16) & 0x00ff0000) |
992        ((b[3] << 24) & 0xff000000);
993 }
994 
995 } // namespace
996 
997 
998 //
999 // EXTERNAL INTERFACE
1000 //
1001 
1002 
1003 int
hufCompress(const unsigned short raw[],int nRaw,char compressed[])1004 hufCompress (const unsigned short raw[],
1005          int nRaw,
1006          char compressed[])
1007 {
1008     if (nRaw == 0)
1009     return 0;
1010 
1011     AutoArray <Int64, HUF_ENCSIZE> freq;
1012 
1013     countFrequencies (freq, raw, nRaw);
1014 
1015     int im, iM;
1016     hufBuildEncTable (freq, &im, &iM);
1017 
1018     char *tableStart = compressed + 20;
1019     char *tableEnd   = tableStart;
1020     hufPackEncTable (freq, im, iM, &tableEnd);
1021     int tableLength = tableEnd - tableStart;
1022 
1023     char *dataStart = tableEnd;
1024     int nBits = hufEncode (freq, raw, nRaw, iM, dataStart);
1025     int dataLength = (nBits + 7) / 8;
1026 
1027     writeUInt (compressed,      im);
1028     writeUInt (compressed +  4, iM);
1029     writeUInt (compressed +  8, tableLength);
1030     writeUInt (compressed + 12, nBits);
1031     writeUInt (compressed + 16, 0);	// room for future extensions
1032 
1033     return dataStart + dataLength - compressed;
1034 }
1035 
1036 
1037 void
hufUncompress(const char compressed[],int nCompressed,unsigned short raw[],int nRaw)1038 hufUncompress (const char compressed[],
1039            int nCompressed,
1040            unsigned short raw[],
1041            int nRaw)
1042 {
1043     if (nCompressed == 0)
1044     {
1045     if (nRaw != 0)
1046         notEnoughData();
1047 
1048     return;
1049     }
1050 
1051     int im = readUInt (compressed);
1052     int iM = readUInt (compressed + 4);
1053     // int tableLength = readUInt (compressed + 8);
1054     int nBits = readUInt (compressed + 12);
1055 
1056     if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE)
1057     invalidTableSize();
1058 
1059     const char *ptr = compressed + 20;
1060 
1061     AutoArray <Int64, HUF_ENCSIZE> freq;
1062     AutoArray <HufDec, HUF_DECSIZE> hdec;
1063 
1064     hufClearDecTable (hdec);
1065 
1066     hufUnpackEncTable (&ptr, nCompressed - (ptr - compressed), im, iM, freq);
1067 
1068     try
1069     {
1070     if (nBits > 8 * (nCompressed - (ptr - compressed)))
1071         invalidNBits();
1072 
1073     hufBuildDecTable (freq, im, iM, hdec);
1074     hufDecode (freq, hdec, ptr, nBits, iM, nRaw, raw);
1075     }
1076     catch (...)
1077     {
1078     hufFreeDecTable (hdec);
1079     throw;
1080     }
1081 
1082     hufFreeDecTable (hdec);
1083 }
1084 
1085 
1086 } // namespace Imf
1087