• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  
2  /*-------------------------------------------------------------*/
3  /*--- Block sorting machinery                               ---*/
4  /*---                                           blocksort.c ---*/
5  /*-------------------------------------------------------------*/
6  
7  /* ------------------------------------------------------------------
8     This file is part of bzip2/libbzip2, a program and library for
9     lossless, block-sorting data compression.
10  
11     bzip2/libbzip2 version 1.0.6 of 6 September 2010
12     Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
13  
14     Please read the WARNING, DISCLAIMER and PATENTS sections in the
15     README file.
16  
17     This program is released under the terms of the license contained
18     in the file LICENSE.
19     ------------------------------------------------------------------ */
20  
21  
22  #include "bzlib_private.h"
23  
24  /*---------------------------------------------*/
25  /*--- Fallback O(N log(N)^2) sorting        ---*/
26  /*--- algorithm, for repetitive blocks      ---*/
27  /*---------------------------------------------*/
28  
29  /*---------------------------------------------*/
30  static
31  __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)32  void fallbackSimpleSort ( UInt32* fmap,
33                            UInt32* eclass,
34                            Int32   lo,
35                            Int32   hi )
36  {
37     Int32 i, j, tmp;
38     UInt32 ec_tmp;
39  
40     if (lo == hi) return;
41  
42     if (hi - lo > 3) {
43        for ( i = hi-4; i >= lo; i-- ) {
44           tmp = fmap[i];
45           ec_tmp = eclass[tmp];
46           for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47              fmap[j-4] = fmap[j];
48           fmap[j-4] = tmp;
49        }
50     }
51  
52     for ( i = hi-1; i >= lo; i-- ) {
53        tmp = fmap[i];
54        ec_tmp = eclass[tmp];
55        for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56           fmap[j-1] = fmap[j];
57        fmap[j-1] = tmp;
58     }
59  }
60  
61  
62  /*---------------------------------------------*/
63  #define fswap(zz1, zz2) \
64     { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65  
66  #define fvswap(zzp1, zzp2, zzn)       \
67  {                                     \
68     Int32 yyp1 = (zzp1);               \
69     Int32 yyp2 = (zzp2);               \
70     Int32 yyn  = (zzn);                \
71     while (yyn > 0) {                  \
72        fswap(fmap[yyp1], fmap[yyp2]);  \
73        yyp1++; yyp2++; yyn--;          \
74     }                                  \
75  }
76  
77  
78  #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79  
80  #define fpush(lz,hz) { stackLo[sp] = lz; \
81                         stackHi[sp] = hz; \
82                         sp++; }
83  
84  #define fpop(lz,hz) { sp--;              \
85                        lz = stackLo[sp];  \
86                        hz = stackHi[sp]; }
87  
88  #define FALLBACK_QSORT_SMALL_THRESH 10
89  #define FALLBACK_QSORT_STACK_SIZE   100
90  
91  
92  static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)93  void fallbackQSort3 ( UInt32* fmap,
94                        UInt32* eclass,
95                        Int32   loSt,
96                        Int32   hiSt )
97  {
98     Int32 unLo, unHi, ltLo, gtHi, n, m;
99     Int32 sp, lo, hi;
100     UInt32 med, r, r3;
101     Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102     Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103  
104     r = 0;
105  
106     sp = 0;
107     fpush ( loSt, hiSt );
108  
109     while (sp > 0) {
110  
111        AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112  
113        fpop ( lo, hi );
114        if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115           fallbackSimpleSort ( fmap, eclass, lo, hi );
116           continue;
117        }
118  
119        /* Random partitioning.  Median of 3 sometimes fails to
120           avoid bad cases.  Median of 9 seems to help but
121           looks rather expensive.  This too seems to work but
122           is cheaper.  Guidance for the magic constants
123           7621 and 32768 is taken from Sedgewick's algorithms
124           book, chapter 35.
125        */
126        r = ((r * 7621) + 1) % 32768;
127        r3 = r % 3;
128        if (r3 == 0) med = eclass[fmap[lo]]; else
129        if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130                     med = eclass[fmap[hi]];
131  
132        unLo = ltLo = lo;
133        unHi = gtHi = hi;
134  
135        while (1) {
136           while (1) {
137              if (unLo > unHi) break;
138              n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139              if (n == 0) {
140                 fswap(fmap[unLo], fmap[ltLo]);
141                 ltLo++; unLo++;
142                 continue;
143              };
144              if (n > 0) break;
145              unLo++;
146           }
147           while (1) {
148              if (unLo > unHi) break;
149              n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150              if (n == 0) {
151                 fswap(fmap[unHi], fmap[gtHi]);
152                 gtHi--; unHi--;
153                 continue;
154              };
155              if (n < 0) break;
156              unHi--;
157           }
158           if (unLo > unHi) break;
159           fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160        }
161  
162        AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163  
164        if (gtHi < ltLo) continue;
165  
166        n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167        m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168  
169        n = lo + unLo - ltLo - 1;
170        m = hi - (gtHi - unHi) + 1;
171  
172        if (n - lo > hi - m) {
173           fpush ( lo, n );
174           fpush ( m, hi );
175        } else {
176           fpush ( m, hi );
177           fpush ( lo, n );
178        }
179     }
180  }
181  
182  #undef fmin
183  #undef fpush
184  #undef fpop
185  #undef fswap
186  #undef fvswap
187  #undef FALLBACK_QSORT_SMALL_THRESH
188  #undef FALLBACK_QSORT_STACK_SIZE
189  
190  
191  /*---------------------------------------------*/
192  /* Pre:
193        nblock > 0
194        eclass exists for [0 .. nblock-1]
195        ((UChar*)eclass) [0 .. nblock-1] holds block
196        ptr exists for [0 .. nblock-1]
197  
198     Post:
199        ((UChar*)eclass) [0 .. nblock-1] holds block
200        All other areas of eclass destroyed
201        fmap [0 .. nblock-1] holds sorted order
202        bhtab [ 0 .. 2+(nblock/32) ] destroyed
203  */
204  
205  #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
206  #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
207  #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
208  #define      WORD_BH(zz)  bhtab[(zz) >> 5]
209  #define UNALIGNED_BH(zz)  ((zz) & 0x01f)
210  
211  static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)212  void fallbackSort ( UInt32* fmap,
213                      UInt32* eclass,
214                      UInt32* bhtab,
215                      Int32   nblock,
216                      Int32   verb )
217  {
218     Int32 ftab[257];
219     Int32 ftabCopy[256];
220     Int32 H, i, j, k, l, r, cc, cc1;
221     Int32 nNotDone;
222     Int32 nBhtab;
223     UChar* eclass8 = (UChar*)eclass;
224  
225     /*--
226        Initial 1-char radix sort to generate
227        initial fmap and initial BH bits.
228     --*/
229     if (verb >= 4)
230        VPrintf0 ( "        bucket sorting ...\n" );
231     for (i = 0; i < 257;    i++) ftab[i] = 0;
232     for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233     for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
234     for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
235  
236     for (i = 0; i < nblock; i++) {
237        j = eclass8[i];
238        k = ftab[j] - 1;
239        ftab[j] = k;
240        fmap[k] = i;
241     }
242  
243     nBhtab = 2 + (nblock / 32);
244     for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245     for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246  
247     /*--
248        Inductively refine the buckets.  Kind-of an
249        "exponential radix sort" (!), inspired by the
250        Manber-Myers suffix array construction algorithm.
251     --*/
252  
253     /*-- set sentinel bits for block-end detection --*/
254     for (i = 0; i < 32; i++) {
255        SET_BH(nblock + 2*i);
256        CLEAR_BH(nblock + 2*i + 1);
257     }
258  
259     /*-- the log(N) loop --*/
260     H = 1;
261     while (1) {
262  
263        if (verb >= 4)
264           VPrintf1 ( "        depth %6d has ", H );
265  
266        j = 0;
267        for (i = 0; i < nblock; i++) {
268           if (ISSET_BH(i)) j = i;
269           k = fmap[i] - H; if (k < 0) k += nblock;
270           eclass[k] = j;
271        }
272  
273        nNotDone = 0;
274        r = -1;
275        while (1) {
276  
277  	 /*-- find the next non-singleton bucket --*/
278           k = r + 1;
279           while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280           if (ISSET_BH(k)) {
281              while (WORD_BH(k) == 0xffffffff) k += 32;
282              while (ISSET_BH(k)) k++;
283           }
284           l = k - 1;
285           if (l >= nblock) break;
286           while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287           if (!ISSET_BH(k)) {
288              while (WORD_BH(k) == 0x00000000) k += 32;
289              while (!ISSET_BH(k)) k++;
290           }
291           r = k - 1;
292           if (r >= nblock) break;
293  
294           /*-- now [l, r] bracket current bucket --*/
295           if (r > l) {
296              nNotDone += (r - l + 1);
297              fallbackQSort3 ( fmap, eclass, l, r );
298  
299              /*-- scan bucket and generate header bits-- */
300              cc = -1;
301              for (i = l; i <= r; i++) {
302                 cc1 = eclass[fmap[i]];
303                 if (cc != cc1) { SET_BH(i); cc = cc1; };
304              }
305           }
306        }
307  
308        if (verb >= 4)
309           VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310  
311        H *= 2;
312        if (H > nblock || nNotDone == 0) break;
313     }
314  
315     /*--
316        Reconstruct the original block in
317        eclass8 [0 .. nblock-1], since the
318        previous phase destroyed it.
319     --*/
320     if (verb >= 4)
321        VPrintf0 ( "        reconstructing block ...\n" );
322     j = 0;
323     for (i = 0; i < nblock; i++) {
324        while (ftabCopy[j] == 0) j++;
325        ftabCopy[j]--;
326        eclass8[fmap[i]] = (UChar)j;
327     }
328     AssertH ( j < 256, 1005 );
329  }
330  
331  #undef       SET_BH
332  #undef     CLEAR_BH
333  #undef     ISSET_BH
334  #undef      WORD_BH
335  #undef UNALIGNED_BH
336  
337  
338  /*---------------------------------------------*/
339  /*--- The main, O(N^2 log(N)) sorting       ---*/
340  /*--- algorithm.  Faster for "normal"       ---*/
341  /*--- non-repetitive blocks.                ---*/
342  /*---------------------------------------------*/
343  
344  /*---------------------------------------------*/
345  static
346  __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)347  Bool mainGtU ( UInt32  i1,
348                 UInt32  i2,
349                 UChar*  block,
350                 UInt16* quadrant,
351                 UInt32  nblock,
352                 Int32*  budget )
353  {
354     Int32  k;
355     UChar  c1, c2;
356     UInt16 s1, s2;
357  
358     AssertD ( i1 != i2, "mainGtU" );
359     /* 1 */
360     c1 = block[i1]; c2 = block[i2];
361     if (c1 != c2) return (c1 > c2);
362     i1++; i2++;
363     /* 2 */
364     c1 = block[i1]; c2 = block[i2];
365     if (c1 != c2) return (c1 > c2);
366     i1++; i2++;
367     /* 3 */
368     c1 = block[i1]; c2 = block[i2];
369     if (c1 != c2) return (c1 > c2);
370     i1++; i2++;
371     /* 4 */
372     c1 = block[i1]; c2 = block[i2];
373     if (c1 != c2) return (c1 > c2);
374     i1++; i2++;
375     /* 5 */
376     c1 = block[i1]; c2 = block[i2];
377     if (c1 != c2) return (c1 > c2);
378     i1++; i2++;
379     /* 6 */
380     c1 = block[i1]; c2 = block[i2];
381     if (c1 != c2) return (c1 > c2);
382     i1++; i2++;
383     /* 7 */
384     c1 = block[i1]; c2 = block[i2];
385     if (c1 != c2) return (c1 > c2);
386     i1++; i2++;
387     /* 8 */
388     c1 = block[i1]; c2 = block[i2];
389     if (c1 != c2) return (c1 > c2);
390     i1++; i2++;
391     /* 9 */
392     c1 = block[i1]; c2 = block[i2];
393     if (c1 != c2) return (c1 > c2);
394     i1++; i2++;
395     /* 10 */
396     c1 = block[i1]; c2 = block[i2];
397     if (c1 != c2) return (c1 > c2);
398     i1++; i2++;
399     /* 11 */
400     c1 = block[i1]; c2 = block[i2];
401     if (c1 != c2) return (c1 > c2);
402     i1++; i2++;
403     /* 12 */
404     c1 = block[i1]; c2 = block[i2];
405     if (c1 != c2) return (c1 > c2);
406     i1++; i2++;
407  
408     k = nblock + 8;
409  
410     do {
411        /* 1 */
412        c1 = block[i1]; c2 = block[i2];
413        if (c1 != c2) return (c1 > c2);
414        s1 = quadrant[i1]; s2 = quadrant[i2];
415        if (s1 != s2) return (s1 > s2);
416        i1++; i2++;
417        /* 2 */
418        c1 = block[i1]; c2 = block[i2];
419        if (c1 != c2) return (c1 > c2);
420        s1 = quadrant[i1]; s2 = quadrant[i2];
421        if (s1 != s2) return (s1 > s2);
422        i1++; i2++;
423        /* 3 */
424        c1 = block[i1]; c2 = block[i2];
425        if (c1 != c2) return (c1 > c2);
426        s1 = quadrant[i1]; s2 = quadrant[i2];
427        if (s1 != s2) return (s1 > s2);
428        i1++; i2++;
429        /* 4 */
430        c1 = block[i1]; c2 = block[i2];
431        if (c1 != c2) return (c1 > c2);
432        s1 = quadrant[i1]; s2 = quadrant[i2];
433        if (s1 != s2) return (s1 > s2);
434        i1++; i2++;
435        /* 5 */
436        c1 = block[i1]; c2 = block[i2];
437        if (c1 != c2) return (c1 > c2);
438        s1 = quadrant[i1]; s2 = quadrant[i2];
439        if (s1 != s2) return (s1 > s2);
440        i1++; i2++;
441        /* 6 */
442        c1 = block[i1]; c2 = block[i2];
443        if (c1 != c2) return (c1 > c2);
444        s1 = quadrant[i1]; s2 = quadrant[i2];
445        if (s1 != s2) return (s1 > s2);
446        i1++; i2++;
447        /* 7 */
448        c1 = block[i1]; c2 = block[i2];
449        if (c1 != c2) return (c1 > c2);
450        s1 = quadrant[i1]; s2 = quadrant[i2];
451        if (s1 != s2) return (s1 > s2);
452        i1++; i2++;
453        /* 8 */
454        c1 = block[i1]; c2 = block[i2];
455        if (c1 != c2) return (c1 > c2);
456        s1 = quadrant[i1]; s2 = quadrant[i2];
457        if (s1 != s2) return (s1 > s2);
458        i1++; i2++;
459  
460        if (i1 >= nblock) i1 -= nblock;
461        if (i2 >= nblock) i2 -= nblock;
462  
463        k -= 8;
464        (*budget)--;
465     }
466        while (k >= 0);
467  
468     return False;
469  }
470  
471  
472  /*---------------------------------------------*/
473  /*--
474     Knuth's increments seem to work better
475     than Incerpi-Sedgewick here.  Possibly
476     because the number of elems to sort is
477     usually small, typically <= 20.
478  --*/
479  static
480  Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481                     9841, 29524, 88573, 265720,
482                     797161, 2391484 };
483  
484  static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)485  void mainSimpleSort ( UInt32* ptr,
486                        UChar*  block,
487                        UInt16* quadrant,
488                        Int32   nblock,
489                        Int32   lo,
490                        Int32   hi,
491                        Int32   d,
492                        Int32*  budget )
493  {
494     Int32 i, j, h, bigN, hp;
495     UInt32 v;
496  
497     bigN = hi - lo + 1;
498     if (bigN < 2) return;
499  
500     hp = 0;
501     while (incs[hp] < bigN) hp++;
502     hp--;
503  
504     for (; hp >= 0; hp--) {
505        h = incs[hp];
506  
507        i = lo + h;
508        while (True) {
509  
510           /*-- copy 1 --*/
511           if (i > hi) break;
512           v = ptr[i];
513           j = i;
514           while ( mainGtU (
515                      ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516                   ) ) {
517              ptr[j] = ptr[j-h];
518              j = j - h;
519              if (j <= (lo + h - 1)) break;
520           }
521           ptr[j] = v;
522           i++;
523  
524           /*-- copy 2 --*/
525           if (i > hi) break;
526           v = ptr[i];
527           j = i;
528           while ( mainGtU (
529                      ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530                   ) ) {
531              ptr[j] = ptr[j-h];
532              j = j - h;
533              if (j <= (lo + h - 1)) break;
534           }
535           ptr[j] = v;
536           i++;
537  
538           /*-- copy 3 --*/
539           if (i > hi) break;
540           v = ptr[i];
541           j = i;
542           while ( mainGtU (
543                      ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544                   ) ) {
545              ptr[j] = ptr[j-h];
546              j = j - h;
547              if (j <= (lo + h - 1)) break;
548           }
549           ptr[j] = v;
550           i++;
551  
552           if (*budget < 0) return;
553        }
554     }
555  }
556  
557  
558  /*---------------------------------------------*/
559  /*--
560     The following is an implementation of
561     an elegant 3-way quicksort for strings,
562     described in a paper "Fast Algorithms for
563     Sorting and Searching Strings", by Robert
564     Sedgewick and Jon L. Bentley.
565  --*/
566  
567  #define mswap(zz1, zz2) \
568     { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569  
570  #define mvswap(zzp1, zzp2, zzn)       \
571  {                                     \
572     Int32 yyp1 = (zzp1);               \
573     Int32 yyp2 = (zzp2);               \
574     Int32 yyn  = (zzn);                \
575     while (yyn > 0) {                  \
576        mswap(ptr[yyp1], ptr[yyp2]);    \
577        yyp1++; yyp2++; yyn--;          \
578     }                                  \
579  }
580  
581  static
582  __inline__
mmed3(UChar a,UChar b,UChar c)583  UChar mmed3 ( UChar a, UChar b, UChar c )
584  {
585     UChar t;
586     if (a > b) { t = a; a = b; b = t; };
587     if (b > c) {
588        b = c;
589        if (a > b) b = a;
590     }
591     return b;
592  }
593  
594  #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595  
596  #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597                            stackHi[sp] = hz; \
598                            stackD [sp] = dz; \
599                            sp++; }
600  
601  #define mpop(lz,hz,dz) { sp--;             \
602                           lz = stackLo[sp]; \
603                           hz = stackHi[sp]; \
604                           dz = stackD [sp]; }
605  
606  
607  #define mnextsize(az) (nextHi[az]-nextLo[az])
608  
609  #define mnextswap(az,bz)                                        \
610     { Int32 tz;                                                  \
611       tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612       tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613       tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614  
615  
616  #define MAIN_QSORT_SMALL_THRESH 20
617  #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618  #define MAIN_QSORT_STACK_SIZE 100
619  
620  static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)621  void mainQSort3 ( UInt32* ptr,
622                    UChar*  block,
623                    UInt16* quadrant,
624                    Int32   nblock,
625                    Int32   loSt,
626                    Int32   hiSt,
627                    Int32   dSt,
628                    Int32*  budget )
629  {
630     Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631     Int32 sp, lo, hi, d;
632  
633     Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634     Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635     Int32 stackD [MAIN_QSORT_STACK_SIZE];
636  
637     Int32 nextLo[3];
638     Int32 nextHi[3];
639     Int32 nextD [3];
640  
641     sp = 0;
642     mpush ( loSt, hiSt, dSt );
643  
644     while (sp > 0) {
645  
646        AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647  
648        mpop ( lo, hi, d );
649        if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650            d > MAIN_QSORT_DEPTH_THRESH) {
651           mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652           if (*budget < 0) return;
653           continue;
654        }
655  
656        med = (Int32)
657              mmed3 ( block[ptr[ lo         ]+d],
658                      block[ptr[ hi         ]+d],
659                      block[ptr[ (lo+hi)>>1 ]+d] );
660  
661        unLo = ltLo = lo;
662        unHi = gtHi = hi;
663  
664        while (True) {
665           while (True) {
666              if (unLo > unHi) break;
667              n = ((Int32)block[ptr[unLo]+d]) - med;
668              if (n == 0) {
669                 mswap(ptr[unLo], ptr[ltLo]);
670                 ltLo++; unLo++; continue;
671              };
672              if (n >  0) break;
673              unLo++;
674           }
675           while (True) {
676              if (unLo > unHi) break;
677              n = ((Int32)block[ptr[unHi]+d]) - med;
678              if (n == 0) {
679                 mswap(ptr[unHi], ptr[gtHi]);
680                 gtHi--; unHi--; continue;
681              };
682              if (n <  0) break;
683              unHi--;
684           }
685           if (unLo > unHi) break;
686           mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687        }
688  
689        AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690  
691        if (gtHi < ltLo) {
692           mpush(lo, hi, d+1 );
693           continue;
694        }
695  
696        n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697        m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698  
699        n = lo + unLo - ltLo - 1;
700        m = hi - (gtHi - unHi) + 1;
701  
702        nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
703        nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
704        nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705  
706        if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707        if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708        if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709  
710        AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711        AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712  
713        mpush (nextLo[0], nextHi[0], nextD[0]);
714        mpush (nextLo[1], nextHi[1], nextD[1]);
715        mpush (nextLo[2], nextHi[2], nextD[2]);
716     }
717  }
718  
719  #undef mswap
720  #undef mvswap
721  #undef mpush
722  #undef mpop
723  #undef mmin
724  #undef mnextsize
725  #undef mnextswap
726  #undef MAIN_QSORT_SMALL_THRESH
727  #undef MAIN_QSORT_DEPTH_THRESH
728  #undef MAIN_QSORT_STACK_SIZE
729  
730  
731  /*---------------------------------------------*/
732  /* Pre:
733        nblock > N_OVERSHOOT
734        block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735        ((UChar*)block32) [0 .. nblock-1] holds block
736        ptr exists for [0 .. nblock-1]
737  
738     Post:
739        ((UChar*)block32) [0 .. nblock-1] holds block
740        All other areas of block32 destroyed
741        ftab [0 .. 65536 ] destroyed
742        ptr [0 .. nblock-1] holds sorted order
743        if (*budget < 0), sorting was abandoned
744  */
745  
746  #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747  #define SETMASK (1 << 21)
748  #define CLEARMASK (~(SETMASK))
749  
750  static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)751  void mainSort ( UInt32* ptr,
752                  UChar*  block,
753                  UInt16* quadrant,
754                  UInt32* ftab,
755                  Int32   nblock,
756                  Int32   verb,
757                  Int32*  budget )
758  {
759     Int32  i, j, k, ss, sb;
760     Int32  runningOrder[256];
761     Bool   bigDone[256];
762     Int32  copyStart[256];
763     Int32  copyEnd  [256];
764     UChar  c1;
765     Int32  numQSorted;
766     UInt16 s;
767     if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
768  
769     /*-- set up the 2-byte frequency table --*/
770     for (i = 65536; i >= 0; i--) ftab[i] = 0;
771  
772     j = block[0] << 8;
773     i = nblock-1;
774     for (; i >= 3; i -= 4) {
775        quadrant[i] = 0;
776        j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777        ftab[j]++;
778        quadrant[i-1] = 0;
779        j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780        ftab[j]++;
781        quadrant[i-2] = 0;
782        j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783        ftab[j]++;
784        quadrant[i-3] = 0;
785        j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786        ftab[j]++;
787     }
788     for (; i >= 0; i--) {
789        quadrant[i] = 0;
790        j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791        ftab[j]++;
792     }
793  
794     /*-- (emphasises close relationship of block & quadrant) --*/
795     for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796        block   [nblock+i] = block[i];
797        quadrant[nblock+i] = 0;
798     }
799  
800     if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
801  
802     /*-- Complete the initial radix sort --*/
803     for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804  
805     s = block[0] << 8;
806     i = nblock-1;
807     for (; i >= 3; i -= 4) {
808        s = (s >> 8) | (block[i] << 8);
809        j = ftab[s] -1;
810        ftab[s] = j;
811        ptr[j] = i;
812        s = (s >> 8) | (block[i-1] << 8);
813        j = ftab[s] -1;
814        ftab[s] = j;
815        ptr[j] = i-1;
816        s = (s >> 8) | (block[i-2] << 8);
817        j = ftab[s] -1;
818        ftab[s] = j;
819        ptr[j] = i-2;
820        s = (s >> 8) | (block[i-3] << 8);
821        j = ftab[s] -1;
822        ftab[s] = j;
823        ptr[j] = i-3;
824     }
825     for (; i >= 0; i--) {
826        s = (s >> 8) | (block[i] << 8);
827        j = ftab[s] -1;
828        ftab[s] = j;
829        ptr[j] = i;
830     }
831  
832     /*--
833        Now ftab contains the first loc of every small bucket.
834        Calculate the running order, from smallest to largest
835        big bucket.
836     --*/
837     for (i = 0; i <= 255; i++) {
838        bigDone     [i] = False;
839        runningOrder[i] = i;
840     }
841  
842     {
843        Int32 vv;
844        Int32 h = 1;
845        do h = 3 * h + 1; while (h <= 256);
846        do {
847           h = h / 3;
848           for (i = h; i <= 255; i++) {
849              vv = runningOrder[i];
850              j = i;
851              while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852                 runningOrder[j] = runningOrder[j-h];
853                 j = j - h;
854                 if (j <= (h - 1)) goto zero;
855              }
856              zero:
857              runningOrder[j] = vv;
858           }
859        } while (h != 1);
860     }
861  
862     /*--
863        The main sorting loop.
864     --*/
865  
866     numQSorted = 0;
867  
868     for (i = 0; i <= 255; i++) {
869  
870        /*--
871           Process big buckets, starting with the least full.
872           Basically this is a 3-step process in which we call
873           mainQSort3 to sort the small buckets [ss, j], but
874           also make a big effort to avoid the calls if we can.
875        --*/
876        ss = runningOrder[i];
877  
878        /*--
879           Step 1:
880           Complete the big bucket [ss] by quicksorting
881           any unsorted small buckets [ss, j], for j != ss.
882           Hopefully previous pointer-scanning phases have already
883           completed many of the small buckets [ss, j], so
884           we don't have to sort them at all.
885        --*/
886        for (j = 0; j <= 255; j++) {
887           if (j != ss) {
888              sb = (ss << 8) + j;
889              if ( ! (ftab[sb] & SETMASK) ) {
890                 Int32 lo = ftab[sb]   & CLEARMASK;
891                 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892                 if (hi > lo) {
893                    if (verb >= 4)
894                       VPrintf4 ( "        qsort [0x%x, 0x%x]   "
895                                  "done %d   this %d\n",
896                                  ss, j, numQSorted, hi - lo + 1 );
897                    mainQSort3 (
898                       ptr, block, quadrant, nblock,
899                       lo, hi, BZ_N_RADIX, budget
900                    );
901                    numQSorted += (hi - lo + 1);
902                    if (*budget < 0) return;
903                 }
904              }
905              ftab[sb] |= SETMASK;
906           }
907        }
908  
909        AssertH ( !bigDone[ss], 1006 );
910  
911        /*--
912           Step 2:
913           Now scan this big bucket [ss] so as to synthesise the
914           sorted order for small buckets [t, ss] for all t,
915           including, magically, the bucket [ss,ss] too.
916           This will avoid doing Real Work in subsequent Step 1's.
917        --*/
918        {
919           for (j = 0; j <= 255; j++) {
920              copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
921              copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922           }
923           for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924              k = ptr[j]-1; if (k < 0) k += nblock;
925              c1 = block[k];
926              if (!bigDone[c1])
927                 ptr[ copyStart[c1]++ ] = k;
928           }
929           for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930              k = ptr[j]-1; if (k < 0) k += nblock;
931              c1 = block[k];
932              if (!bigDone[c1])
933                 ptr[ copyEnd[c1]-- ] = k;
934           }
935        }
936  
937        AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938                  ||
939                  /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940                     Necessity for this case is demonstrated by compressing
941                     a sequence of approximately 48.5 million of character
942                     251; 1.0.0/1.0.1 will then die here. */
943                  (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944                  1007 )
945  
946        for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947  
948        /*--
949           Step 3:
950           The [ss] big bucket is now done.  Record this fact,
951           and update the quadrant descriptors.  Remember to
952           update quadrants in the overshoot area too, if
953           necessary.  The "if (i < 255)" test merely skips
954           this updating for the last bucket processed, since
955           updating for the last bucket is pointless.
956  
957           The quadrant array provides a way to incrementally
958           cache sort orderings, as they appear, so as to
959           make subsequent comparisons in fullGtU() complete
960           faster.  For repetitive blocks this makes a big
961           difference (but not big enough to be able to avoid
962           the fallback sorting mechanism, exponential radix sort).
963  
964           The precise meaning is: at all times:
965  
966              for 0 <= i < nblock and 0 <= j <= nblock
967  
968              if block[i] != block[j],
969  
970                 then the relative values of quadrant[i] and
971                      quadrant[j] are meaningless.
972  
973                 else {
974                    if quadrant[i] < quadrant[j]
975                       then the string starting at i lexicographically
976                       precedes the string starting at j
977  
978                    else if quadrant[i] > quadrant[j]
979                       then the string starting at j lexicographically
980                       precedes the string starting at i
981  
982                    else
983                       the relative ordering of the strings starting
984                       at i and j has not yet been determined.
985                 }
986        --*/
987        bigDone[ss] = True;
988  
989        if (i < 255) {
990           Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
991           Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992           Int32 shifts   = 0;
993  
994           while ((bbSize >> shifts) > 65534) shifts++;
995  
996           for (j = bbSize-1; j >= 0; j--) {
997              Int32 a2update     = ptr[bbStart + j];
998              UInt16 qVal        = (UInt16)(j >> shifts);
999              quadrant[a2update] = qVal;
1000              if (a2update < BZ_N_OVERSHOOT)
1001                 quadrant[a2update + nblock] = qVal;
1002           }
1003           AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004        }
1005  
1006     }
1007  
1008     if (verb >= 4)
1009        VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1010                   nblock, numQSorted, nblock - numQSorted );
1011  }
1012  
1013  #undef BIGFREQ
1014  #undef SETMASK
1015  #undef CLEARMASK
1016  
1017  
1018  /*---------------------------------------------*/
1019  /* Pre:
1020        nblock > 0
1021        arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022        ((UChar*)arr2)  [0 .. nblock-1] holds block
1023        arr1 exists for [0 .. nblock-1]
1024  
1025     Post:
1026        ((UChar*)arr2) [0 .. nblock-1] holds block
1027        All other areas of block destroyed
1028        ftab [ 0 .. 65536 ] destroyed
1029        arr1 [0 .. nblock-1] holds sorted order
1030  */
BZ2_blockSort(EState * s)1031  void BZ2_blockSort ( EState* s )
1032  {
1033     UInt32* ptr    = s->ptr;
1034     UChar*  block  = s->block;
1035     UInt32* ftab   = s->ftab;
1036     Int32   nblock = s->nblock;
1037     Int32   verb   = s->verbosity;
1038     Int32   wfact  = s->workFactor;
1039     UInt16* quadrant;
1040     Int32   budget;
1041     Int32   budgetInit;
1042     Int32   i;
1043  
1044     if (nblock < 10000) {
1045        fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046     } else {
1047        /* Calculate the location for quadrant, remembering to get
1048           the alignment right.  Assumes that &(block[0]) is at least
1049           2-byte aligned -- this should be ok since block is really
1050           the first section of arr2.
1051        */
1052        i = nblock+BZ_N_OVERSHOOT;
1053        if (i & 1) i++;
1054        quadrant = (UInt16*)(&(block[i]));
1055  
1056        /* (wfact-1) / 3 puts the default-factor-30
1057           transition point at very roughly the same place as
1058           with v0.1 and v0.9.0.
1059           Not that it particularly matters any more, since the
1060           resulting compressed stream is now the same regardless
1061           of whether or not we use the main sort or fallback sort.
1062        */
1063        if (wfact < 1  ) wfact = 1;
1064        if (wfact > 100) wfact = 100;
1065        budgetInit = nblock * ((wfact-1) / 3);
1066        budget = budgetInit;
1067  
1068        mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069        if (verb >= 3)
1070           VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1071                      budgetInit - budget,
1072                      nblock,
1073                      (float)(budgetInit - budget) /
1074                      (float)(nblock==0 ? 1 : nblock) );
1075        if (budget < 0) {
1076           if (verb >= 2)
1077              VPrintf0 ( "    too repetitive; using fallback"
1078                         " sorting algorithm\n" );
1079           fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080        }
1081     }
1082  
1083     s->origPtr = -1;
1084     for (i = 0; i < s->nblock; i++)
1085        if (ptr[i] == 0)
1086           { s->origPtr = i; break; };
1087  
1088     AssertH( s->origPtr != -1, 1003 );
1089  }
1090  
1091  
1092  /*-------------------------------------------------------------*/
1093  /*--- end                                       blocksort.c ---*/
1094  /*-------------------------------------------------------------*/
1095