Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.

author: Anton Samokhvalov <pg83@yandex.ru> 2022-02-10 16:45:17 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:17 +0300
commit: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree: dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/libs/libbz2/blocksort.c
parent: 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download: ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
1 files changed, 1085 insertions, 1085 deletions
diff --git a/contrib/libs/libbz2/blocksort.c b/contrib/libs/libbz2/blocksort.c
index f788e88c80..92d81fe287 100644
--- a/contrib/libs/libbz2/blocksort.c
+++ b/contrib/libs/libbz2/blocksort.c
@@ -1,1094 +1,1094 @@
- 
-/*-------------------------------------------------------------*/ 
-/*--- Block sorting machinery                               ---*/ 
-/*---                                           blocksort.c ---*/ 
-/*-------------------------------------------------------------*/ 
- 
-/* ------------------------------------------------------------------ 
-   This file is part of bzip2/libbzip2, a program and library for 
-   lossless, block-sorting data compression. 
- 
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery                               ---*/
+/*---                                           blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+   This file is part of bzip2/libbzip2, a program and library for
+   lossless, block-sorting data compression.
+
    bzip2/libbzip2 version 1.0.8 of 13 July 2019
    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- 
-   Please read the WARNING, DISCLAIMER and PATENTS sections in the  
-   README file. 
- 
-   This program is released under the terms of the license contained 
-   in the file LICENSE. 
-   ------------------------------------------------------------------ */ 
- 
- 
-#include "bzlib_private.h" 
- 
-/*---------------------------------------------*/ 
-/*--- Fallback O(N log(N)^2) sorting        ---*/ 
-/*--- algorithm, for repetitive blocks      ---*/ 
-/*---------------------------------------------*/ 
- 
-/*---------------------------------------------*/ 
-static  
-__inline__ 
-void fallbackSimpleSort ( UInt32* fmap,  
-                          UInt32* eclass,  
-                          Int32   lo,  
-                          Int32   hi ) 
-{ 
-   Int32 i, j, tmp; 
-   UInt32 ec_tmp; 
- 
-   if (lo == hi) return; 
- 
-   if (hi - lo > 3) { 
-      for ( i = hi-4; i >= lo; i-- ) { 
-         tmp = fmap[i]; 
-         ec_tmp = eclass[tmp]; 
-         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 
-            fmap[j-4] = fmap[j]; 
-         fmap[j-4] = tmp; 
-      } 
-   } 
- 
-   for ( i = hi-1; i >= lo; i-- ) { 
-      tmp = fmap[i]; 
-      ec_tmp = eclass[tmp]; 
-      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 
-         fmap[j-1] = fmap[j]; 
-      fmap[j-1] = tmp; 
-   } 
-} 
- 
- 
-/*---------------------------------------------*/ 
-#define fswap(zz1, zz2) \ 
-   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 
- 
-#define fvswap(zzp1, zzp2, zzn)       \ 
-{                                     \ 
-   Int32 yyp1 = (zzp1);               \ 
-   Int32 yyp2 = (zzp2);               \ 
-   Int32 yyn  = (zzn);                \ 
-   while (yyn > 0) {                  \ 
-      fswap(fmap[yyp1], fmap[yyp2]);  \ 
-      yyp1++; yyp2++; yyn--;          \ 
-   }                                  \ 
-} 
- 
- 
-#define fmin(a,b) ((a) < (b)) ? (a) : (b) 
- 
-#define fpush(lz,hz) { stackLo[sp] = lz; \ 
-                       stackHi[sp] = hz; \ 
-                       sp++; } 
- 
-#define fpop(lz,hz) { sp--;              \ 
-                      lz = stackLo[sp];  \ 
-                      hz = stackHi[sp]; } 
- 
-#define FALLBACK_QSORT_SMALL_THRESH 10 
-#define FALLBACK_QSORT_STACK_SIZE   100 
- 
- 
+
+   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
+   README file.
+
+   This program is released under the terms of the license contained
+   in the file LICENSE.
+   ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
 static 
-void fallbackQSort3 ( UInt32* fmap,  
-                      UInt32* eclass, 
-                      Int32   loSt,  
-                      Int32   hiSt ) 
-{ 
-   Int32 unLo, unHi, ltLo, gtHi, n, m; 
-   Int32 sp, lo, hi; 
-   UInt32 med, r, r3; 
-   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 
-   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 
- 
-   r = 0; 
- 
-   sp = 0; 
-   fpush ( loSt, hiSt ); 
- 
-   while (sp > 0) { 
- 
-      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 
- 
-      fpop ( lo, hi ); 
-      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 
-         fallbackSimpleSort ( fmap, eclass, lo, hi ); 
-         continue; 
-      } 
- 
-      /* Random partitioning.  Median of 3 sometimes fails to 
-         avoid bad cases.  Median of 9 seems to help but  
-         looks rather expensive.  This too seems to work but 
-         is cheaper.  Guidance for the magic constants  
-         7621 and 32768 is taken from Sedgewick's algorithms 
-         book, chapter 35. 
-      */ 
-      r = ((r * 7621) + 1) % 32768; 
-      r3 = r % 3; 
-      if (r3 == 0) med = eclass[fmap[lo]]; else 
-      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 
-                   med = eclass[fmap[hi]]; 
- 
-      unLo = ltLo = lo; 
-      unHi = gtHi = hi; 
- 
-      while (1) { 
-         while (1) { 
-            if (unLo > unHi) break; 
-            n = (Int32)eclass[fmap[unLo]] - (Int32)med; 
-            if (n == 0) {  
-               fswap(fmap[unLo], fmap[ltLo]);  
-               ltLo++; unLo++;  
-               continue;  
-            }; 
-            if (n > 0) break; 
-            unLo++; 
-         } 
-         while (1) { 
-            if (unLo > unHi) break; 
-            n = (Int32)eclass[fmap[unHi]] - (Int32)med; 
-            if (n == 0) {  
-               fswap(fmap[unHi], fmap[gtHi]);  
-               gtHi--; unHi--;  
-               continue;  
-            }; 
-            if (n < 0) break; 
-            unHi--; 
-         } 
-         if (unLo > unHi) break; 
-         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 
-      } 
- 
-      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 
- 
-      if (gtHi < ltLo) continue; 
- 
-      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 
-      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 
- 
-      n = lo + unLo - ltLo - 1; 
-      m = hi - (gtHi - unHi) + 1; 
- 
-      if (n - lo > hi - m) { 
-         fpush ( lo, n ); 
-         fpush ( m, hi ); 
-      } else { 
-         fpush ( m, hi ); 
-         fpush ( lo, n ); 
-      } 
-   } 
-} 
- 
-#undef fmin 
-#undef fpush 
-#undef fpop 
-#undef fswap 
-#undef fvswap 
-#undef FALLBACK_QSORT_SMALL_THRESH 
-#undef FALLBACK_QSORT_STACK_SIZE 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > 0 
-      eclass exists for [0 .. nblock-1] 
-      ((UChar*)eclass) [0 .. nblock-1] holds block 
-      ptr exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)eclass) [0 .. nblock-1] holds block 
-      All other areas of eclass destroyed 
-      fmap [0 .. nblock-1] holds sorted order 
-      bhtab [ 0 .. 2+(nblock/32) ] destroyed 
-*/ 
- 
+__inline__
+void fallbackSimpleSort ( UInt32* fmap, 
+                          UInt32* eclass, 
+                          Int32   lo, 
+                          Int32   hi )
+{
+   Int32 i, j, tmp;
+   UInt32 ec_tmp;
+
+   if (lo == hi) return;
+
+   if (hi - lo > 3) {
+      for ( i = hi-4; i >= lo; i-- ) {
+         tmp = fmap[i];
+         ec_tmp = eclass[tmp];
+         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+            fmap[j-4] = fmap[j];
+         fmap[j-4] = tmp;
+      }
+   }
+
+   for ( i = hi-1; i >= lo; i-- ) {
+      tmp = fmap[i];
+      ec_tmp = eclass[tmp];
+      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+         fmap[j-1] = fmap[j];
+      fmap[j-1] = tmp;
+   }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      fswap(fmap[yyp1], fmap[yyp2]);  \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+                       stackHi[sp] = hz; \
+                       sp++; }
+
+#define fpop(lz,hz) { sp--;              \
+                      lz = stackLo[sp];  \
+                      hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE   100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap, 
+                      UInt32* eclass,
+                      Int32   loSt, 
+                      Int32   hiSt )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m;
+   Int32 sp, lo, hi;
+   UInt32 med, r, r3;
+   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+   r = 0;
+
+   sp = 0;
+   fpush ( loSt, hiSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+
+      fpop ( lo, hi );
+      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+         fallbackSimpleSort ( fmap, eclass, lo, hi );
+         continue;
+      }
+
+      /* Random partitioning.  Median of 3 sometimes fails to
+         avoid bad cases.  Median of 9 seems to help but 
+         looks rather expensive.  This too seems to work but
+         is cheaper.  Guidance for the magic constants 
+         7621 and 32768 is taken from Sedgewick's algorithms
+         book, chapter 35.
+      */
+      r = ((r * 7621) + 1) % 32768;
+      r3 = r % 3;
+      if (r3 == 0) med = eclass[fmap[lo]]; else
+      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+                   med = eclass[fmap[hi]];
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (1) {
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unLo], fmap[ltLo]); 
+               ltLo++; unLo++; 
+               continue; 
+            };
+            if (n > 0) break;
+            unLo++;
+         }
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unHi], fmap[gtHi]); 
+               gtHi--; unHi--; 
+               continue; 
+            };
+            if (n < 0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+      if (gtHi < ltLo) continue;
+
+      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      if (n - lo > hi - m) {
+         fpush ( lo, n );
+         fpush ( m, hi );
+      } else {
+         fpush ( m, hi );
+         fpush ( lo, n );
+      }
+   }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      eclass exists for [0 .. nblock-1]
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      All other areas of eclass destroyed
+      fmap [0 .. nblock-1] holds sorted order
+      bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
 #define       SET_BH(zz)  bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
 #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
 #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
-#define      WORD_BH(zz)  bhtab[(zz) >> 5] 
-#define UNALIGNED_BH(zz)  ((zz) & 0x01f) 
- 
-static 
-void fallbackSort ( UInt32* fmap,  
-                    UInt32* eclass,  
-                    UInt32* bhtab, 
-                    Int32   nblock, 
-                    Int32   verb ) 
-{ 
-   Int32 ftab[257]; 
-   Int32 ftabCopy[256]; 
-   Int32 H, i, j, k, l, r, cc, cc1; 
-   Int32 nNotDone; 
-   Int32 nBhtab; 
-   UChar* eclass8 = (UChar*)eclass; 
- 
-   /*-- 
-      Initial 1-char radix sort to generate 
-      initial fmap and initial BH bits. 
-   --*/ 
-   if (verb >= 4) 
-      VPrintf0 ( "        bucket sorting ...\n" ); 
-   for (i = 0; i < 257;    i++) ftab[i] = 0; 
-   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 
-   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i]; 
-   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1]; 
- 
-   for (i = 0; i < nblock; i++) { 
-      j = eclass8[i]; 
-      k = ftab[j] - 1; 
-      ftab[j] = k; 
-      fmap[k] = i; 
-   } 
- 
-   nBhtab = 2 + (nblock / 32); 
-   for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 
-   for (i = 0; i < 256; i++) SET_BH(ftab[i]); 
- 
+#define      WORD_BH(zz)  bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap, 
+                    UInt32* eclass, 
+                    UInt32* bhtab,
+                    Int32   nblock,
+                    Int32   verb )
+{
+   Int32 ftab[257];
+   Int32 ftabCopy[256];
+   Int32 H, i, j, k, l, r, cc, cc1;
+   Int32 nNotDone;
+   Int32 nBhtab;
+   UChar* eclass8 = (UChar*)eclass;
+
+   /*--
+      Initial 1-char radix sort to generate
+      initial fmap and initial BH bits.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        bucket sorting ...\n" );
+   for (i = 0; i < 257;    i++) ftab[i] = 0;
+   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
+   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+   for (i = 0; i < nblock; i++) {
+      j = eclass8[i];
+      k = ftab[j] - 1;
+      ftab[j] = k;
+      fmap[k] = i;
+   }
+
+   nBhtab = 2 + (nblock / 32);
+   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+   /*--
+      Inductively refine the buckets.  Kind-of an
+      "exponential radix sort" (!), inspired by the
+      Manber-Myers suffix array construction algorithm.
+   --*/
+
+   /*-- set sentinel bits for block-end detection --*/
+   for (i = 0; i < 32; i++) { 
+      SET_BH(nblock + 2*i);
+      CLEAR_BH(nblock + 2*i + 1);
+   }
+
+   /*-- the log(N) loop --*/
+   H = 1;
+   while (1) {
+
+      if (verb >= 4) 
+         VPrintf1 ( "        depth %6d has ", H );
+
+      j = 0;
+      for (i = 0; i < nblock; i++) {
+         if (ISSET_BH(i)) j = i;
+         k = fmap[i] - H; if (k < 0) k += nblock;
+         eclass[k] = j;
+      }
+
+      nNotDone = 0;
+      r = -1;
+      while (1) {
+
+	 /*-- find the next non-singleton bucket --*/
+         k = r + 1;
+         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (ISSET_BH(k)) {
+            while (WORD_BH(k) == 0xffffffff) k += 32;
+            while (ISSET_BH(k)) k++;
+         }
+         l = k - 1;
+         if (l >= nblock) break;
+         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (!ISSET_BH(k)) {
+            while (WORD_BH(k) == 0x00000000) k += 32;
+            while (!ISSET_BH(k)) k++;
+         }
+         r = k - 1;
+         if (r >= nblock) break;
+
+         /*-- now [l, r] bracket current bucket --*/
+         if (r > l) {
+            nNotDone += (r - l + 1);
+            fallbackQSort3 ( fmap, eclass, l, r );
+
+            /*-- scan bucket and generate header bits-- */
+            cc = -1;
+            for (i = l; i <= r; i++) {
+               cc1 = eclass[fmap[i]];
+               if (cc != cc1) { SET_BH(i); cc = cc1; };
+            }
+         }
+      }
+
+      if (verb >= 4) 
+         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+      H *= 2;
+      if (H > nblock || nNotDone == 0) break;
+   }
+
    /*-- 
-      Inductively refine the buckets.  Kind-of an 
-      "exponential radix sort" (!), inspired by the 
-      Manber-Myers suffix array construction algorithm. 
-   --*/ 
- 
-   /*-- set sentinel bits for block-end detection --*/ 
-   for (i = 0; i < 32; i++) {  
-      SET_BH(nblock + 2*i); 
-      CLEAR_BH(nblock + 2*i + 1); 
-   } 
- 
-   /*-- the log(N) loop --*/ 
-   H = 1; 
-   while (1) { 
- 
-      if (verb >= 4)  
-         VPrintf1 ( "        depth %6d has ", H ); 
- 
-      j = 0; 
-      for (i = 0; i < nblock; i++) { 
-         if (ISSET_BH(i)) j = i; 
-         k = fmap[i] - H; if (k < 0) k += nblock; 
-         eclass[k] = j; 
-      } 
- 
-      nNotDone = 0; 
-      r = -1; 
-      while (1) { 
- 
-	 /*-- find the next non-singleton bucket --*/ 
-         k = r + 1; 
-         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 
-         if (ISSET_BH(k)) { 
-            while (WORD_BH(k) == 0xffffffff) k += 32; 
-            while (ISSET_BH(k)) k++; 
-         } 
-         l = k - 1; 
-         if (l >= nblock) break; 
-         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 
-         if (!ISSET_BH(k)) { 
-            while (WORD_BH(k) == 0x00000000) k += 32; 
-            while (!ISSET_BH(k)) k++; 
-         } 
-         r = k - 1; 
-         if (r >= nblock) break; 
- 
-         /*-- now [l, r] bracket current bucket --*/ 
-         if (r > l) { 
-            nNotDone += (r - l + 1); 
-            fallbackQSort3 ( fmap, eclass, l, r ); 
- 
-            /*-- scan bucket and generate header bits-- */ 
-            cc = -1; 
-            for (i = l; i <= r; i++) { 
-               cc1 = eclass[fmap[i]]; 
-               if (cc != cc1) { SET_BH(i); cc = cc1; }; 
-            } 
-         } 
-      } 
- 
-      if (verb >= 4)  
-         VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 
- 
-      H *= 2; 
-      if (H > nblock || nNotDone == 0) break; 
-   } 
- 
-   /*--  
-      Reconstruct the original block in 
-      eclass8 [0 .. nblock-1], since the 
-      previous phase destroyed it. 
-   --*/ 
-   if (verb >= 4) 
-      VPrintf0 ( "        reconstructing block ...\n" ); 
-   j = 0; 
-   for (i = 0; i < nblock; i++) { 
-      while (ftabCopy[j] == 0) j++; 
-      ftabCopy[j]--; 
-      eclass8[fmap[i]] = (UChar)j; 
-   } 
-   AssertH ( j < 256, 1005 ); 
-} 
- 
-#undef       SET_BH 
-#undef     CLEAR_BH 
-#undef     ISSET_BH 
-#undef      WORD_BH 
-#undef UNALIGNED_BH 
- 
- 
-/*---------------------------------------------*/ 
-/*--- The main, O(N^2 log(N)) sorting       ---*/ 
-/*--- algorithm.  Faster for "normal"       ---*/ 
-/*--- non-repetitive blocks.                ---*/ 
-/*---------------------------------------------*/ 
- 
-/*---------------------------------------------*/ 
-static 
-__inline__ 
-Bool mainGtU ( UInt32  i1,  
-               UInt32  i2, 
-               UChar*  block,  
-               UInt16* quadrant, 
-               UInt32  nblock, 
-               Int32*  budget ) 
-{ 
-   Int32  k; 
-   UChar  c1, c2; 
-   UInt16 s1, s2; 
- 
-   AssertD ( i1 != i2, "mainGtU" ); 
-   /* 1 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 2 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 3 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 4 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 5 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 6 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 7 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 8 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 9 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 10 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 11 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
-   /* 12 */ 
-   c1 = block[i1]; c2 = block[i2]; 
-   if (c1 != c2) return (c1 > c2); 
-   i1++; i2++; 
- 
-   k = nblock + 8; 
- 
-   do { 
-      /* 1 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 2 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 3 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 4 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 5 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 6 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 7 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
-      /* 8 */ 
-      c1 = block[i1]; c2 = block[i2]; 
-      if (c1 != c2) return (c1 > c2); 
-      s1 = quadrant[i1]; s2 = quadrant[i2]; 
-      if (s1 != s2) return (s1 > s2); 
-      i1++; i2++; 
- 
-      if (i1 >= nblock) i1 -= nblock; 
-      if (i2 >= nblock) i2 -= nblock; 
- 
-      k -= 8; 
-      (*budget)--; 
-   } 
-      while (k >= 0); 
- 
-   return False; 
-} 
- 
- 
-/*---------------------------------------------*/ 
-/*-- 
-   Knuth's increments seem to work better 
-   than Incerpi-Sedgewick here.  Possibly 
-   because the number of elems to sort is 
-   usually small, typically <= 20. 
---*/ 
-static 
-Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 
-                   9841, 29524, 88573, 265720, 
-                   797161, 2391484 }; 
- 
+      Reconstruct the original block in
+      eclass8 [0 .. nblock-1], since the
+      previous phase destroyed it.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        reconstructing block ...\n" );
+   j = 0;
+   for (i = 0; i < nblock; i++) {
+      while (ftabCopy[j] == 0) j++;
+      ftabCopy[j]--;
+      eclass8[fmap[i]] = (UChar)j;
+   }
+   AssertH ( j < 256, 1005 );
+}
+
+#undef       SET_BH
+#undef     CLEAR_BH
+#undef     ISSET_BH
+#undef      WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting       ---*/
+/*--- algorithm.  Faster for "normal"       ---*/
+/*--- non-repetitive blocks.                ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32  i1, 
+               UInt32  i2,
+               UChar*  block, 
+               UInt16* quadrant,
+               UInt32  nblock,
+               Int32*  budget )
+{
+   Int32  k;
+   UChar  c1, c2;
+   UInt16 s1, s2;
+
+   AssertD ( i1 != i2, "mainGtU" );
+   /* 1 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 2 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 3 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 4 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 5 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 6 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 7 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 8 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 9 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 10 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 11 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 12 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+
+   k = nblock + 8;
+
+   do {
+      /* 1 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 2 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 3 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 4 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 5 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 6 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 7 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 8 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+
+      if (i1 >= nblock) i1 -= nblock;
+      if (i2 >= nblock) i2 -= nblock;
+
+      k -= 8;
+      (*budget)--;
+   }
+      while (k >= 0);
+
+   return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+   Knuth's increments seem to work better
+   than Incerpi-Sedgewick here.  Possibly
+   because the number of elems to sort is
+   usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                   9841, 29524, 88573, 265720,
+                   797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+                      UChar*  block,
+                      UInt16* quadrant,
+                      Int32   nblock,
+                      Int32   lo, 
+                      Int32   hi, 
+                      Int32   d,
+                      Int32*  budget )
+{
+   Int32 i, j, h, bigN, hp;
+   UInt32 v;
+
+   bigN = hi - lo + 1;
+   if (bigN < 2) return;
+
+   hp = 0;
+   while (incs[hp] < bigN) hp++;
+   hp--;
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+
+      i = lo + h;
+      while (True) {
+
+         /*-- copy 1 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 2 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 3 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         if (*budget < 0) return;
+      }
+   }
+}
+
+
+/*---------------------------------------------*/
+/*--
+   The following is an implementation of
+   an elegant 3-way quicksort for strings,
+   described in a paper "Fast Algorithms for
+   Sorting and Searching Strings", by Robert
+   Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      mswap(ptr[yyp1], ptr[yyp2]);    \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
 static 
-void mainSimpleSort ( UInt32* ptr, 
-                      UChar*  block, 
-                      UInt16* quadrant, 
-                      Int32   nblock, 
-                      Int32   lo,  
-                      Int32   hi,  
-                      Int32   d, 
-                      Int32*  budget ) 
-{ 
-   Int32 i, j, h, bigN, hp; 
-   UInt32 v; 
- 
-   bigN = hi - lo + 1; 
-   if (bigN < 2) return; 
- 
-   hp = 0; 
-   while (incs[hp] < bigN) hp++; 
-   hp--; 
- 
-   for (; hp >= 0; hp--) { 
-      h = incs[hp]; 
- 
-      i = lo + h; 
-      while (True) { 
- 
-         /*-- copy 1 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         /*-- copy 2 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         /*-- copy 3 --*/ 
-         if (i > hi) break; 
-         v = ptr[i]; 
-         j = i; 
-         while ( mainGtU (  
-                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget  
-                 ) ) { 
-            ptr[j] = ptr[j-h]; 
-            j = j - h; 
-            if (j <= (lo + h - 1)) break; 
-         } 
-         ptr[j] = v; 
-         i++; 
- 
-         if (*budget < 0) return; 
-      } 
-   } 
-} 
- 
- 
-/*---------------------------------------------*/ 
-/*-- 
-   The following is an implementation of 
-   an elegant 3-way quicksort for strings, 
-   described in a paper "Fast Algorithms for 
-   Sorting and Searching Strings", by Robert 
-   Sedgewick and Jon L. Bentley. 
---*/ 
- 
-#define mswap(zz1, zz2) \ 
-   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 
- 
-#define mvswap(zzp1, zzp2, zzn)       \ 
-{                                     \ 
-   Int32 yyp1 = (zzp1);               \ 
-   Int32 yyp2 = (zzp2);               \ 
-   Int32 yyn  = (zzn);                \ 
-   while (yyn > 0) {                  \ 
-      mswap(ptr[yyp1], ptr[yyp2]);    \ 
-      yyp1++; yyp2++; yyn--;          \ 
-   }                                  \ 
-} 
- 
-static  
-__inline__ 
-UChar mmed3 ( UChar a, UChar b, UChar c ) 
-{ 
-   UChar t; 
-   if (a > b) { t = a; a = b; b = t; }; 
-   if (b > c) {  
-      b = c; 
-      if (a > b) b = a; 
-   } 
-   return b; 
-} 
- 
-#define mmin(a,b) ((a) < (b)) ? (a) : (b) 
- 
-#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 
-                          stackHi[sp] = hz; \ 
-                          stackD [sp] = dz; \ 
-                          sp++; } 
- 
-#define mpop(lz,hz,dz) { sp--;             \ 
-                         lz = stackLo[sp]; \ 
-                         hz = stackHi[sp]; \ 
-                         dz = stackD [sp]; } 
- 
- 
-#define mnextsize(az) (nextHi[az]-nextLo[az]) 
- 
-#define mnextswap(az,bz)                                        \ 
-   { Int32 tz;                                                  \ 
-     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 
-     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 
-     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 
- 
- 
-#define MAIN_QSORT_SMALL_THRESH 20 
-#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 
-#define MAIN_QSORT_STACK_SIZE 100 
- 
-static 
-void mainQSort3 ( UInt32* ptr, 
-                  UChar*  block, 
-                  UInt16* quadrant, 
-                  Int32   nblock, 
-                  Int32   loSt,  
-                  Int32   hiSt,  
-                  Int32   dSt, 
-                  Int32*  budget ) 
-{ 
-   Int32 unLo, unHi, ltLo, gtHi, n, m, med; 
-   Int32 sp, lo, hi, d; 
- 
-   Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 
-   Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 
-   Int32 stackD [MAIN_QSORT_STACK_SIZE]; 
- 
-   Int32 nextLo[3]; 
-   Int32 nextHi[3]; 
-   Int32 nextD [3]; 
- 
-   sp = 0; 
-   mpush ( loSt, hiSt, dSt ); 
- 
-   while (sp > 0) { 
- 
-      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 
- 
-      mpop ( lo, hi, d ); 
-      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||  
-          d > MAIN_QSORT_DEPTH_THRESH) { 
-         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 
-         if (*budget < 0) return; 
-         continue; 
-      } 
- 
-      med = (Int32)  
-            mmed3 ( block[ptr[ lo         ]+d], 
-                    block[ptr[ hi         ]+d], 
-                    block[ptr[ (lo+hi)>>1 ]+d] ); 
- 
-      unLo = ltLo = lo; 
-      unHi = gtHi = hi; 
- 
-      while (True) { 
-         while (True) { 
-            if (unLo > unHi) break; 
-            n = ((Int32)block[ptr[unLo]+d]) - med; 
-            if (n == 0) {  
-               mswap(ptr[unLo], ptr[ltLo]);  
-               ltLo++; unLo++; continue;  
-            }; 
-            if (n >  0) break; 
-            unLo++; 
-         } 
-         while (True) { 
-            if (unLo > unHi) break; 
-            n = ((Int32)block[ptr[unHi]+d]) - med; 
-            if (n == 0) {  
-               mswap(ptr[unHi], ptr[gtHi]);  
-               gtHi--; unHi--; continue;  
-            }; 
-            if (n <  0) break; 
-            unHi--; 
-         } 
-         if (unLo > unHi) break; 
-         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 
-      } 
- 
-      AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 
- 
-      if (gtHi < ltLo) { 
-         mpush(lo, hi, d+1 ); 
-         continue; 
-      } 
- 
-      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 
-      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 
- 
-      n = lo + unLo - ltLo - 1; 
-      m = hi - (gtHi - unHi) + 1; 
- 
-      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d; 
-      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d; 
-      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 
- 
-      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 
-      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 
-      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 
- 
-      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 
-      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 
- 
-      mpush (nextLo[0], nextHi[0], nextD[0]); 
-      mpush (nextLo[1], nextHi[1], nextD[1]); 
-      mpush (nextLo[2], nextHi[2], nextD[2]); 
-   } 
-} 
- 
-#undef mswap 
-#undef mvswap 
-#undef mpush 
-#undef mpop 
-#undef mmin 
-#undef mnextsize 
-#undef mnextswap 
-#undef MAIN_QSORT_SMALL_THRESH 
-#undef MAIN_QSORT_DEPTH_THRESH 
-#undef MAIN_QSORT_STACK_SIZE 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > N_OVERSHOOT 
-      block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 
-      ((UChar*)block32) [0 .. nblock-1] holds block 
-      ptr exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)block32) [0 .. nblock-1] holds block 
-      All other areas of block32 destroyed 
-      ftab [0 .. 65536 ] destroyed 
-      ptr [0 .. nblock-1] holds sorted order 
-      if (*budget < 0), sorting was abandoned 
-*/ 
- 
-#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 
-#define SETMASK (1 << 21) 
-#define CLEARMASK (~(SETMASK)) 
- 
-static 
-void mainSort ( UInt32* ptr,  
-                UChar*  block, 
-                UInt16* quadrant,  
-                UInt32* ftab, 
-                Int32   nblock, 
-                Int32   verb, 
-                Int32*  budget ) 
-{ 
-   Int32  i, j, k, ss, sb; 
-   Int32  runningOrder[256]; 
-   Bool   bigDone[256]; 
-   Int32  copyStart[256]; 
-   Int32  copyEnd  [256]; 
-   UChar  c1; 
-   Int32  numQSorted; 
-   UInt16 s; 
-   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" ); 
- 
-   /*-- set up the 2-byte frequency table --*/ 
-   for (i = 65536; i >= 0; i--) ftab[i] = 0; 
- 
-   j = block[0] << 8; 
-   i = nblock-1; 
-   for (; i >= 3; i -= 4) { 
-      quadrant[i] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i]) << 8); 
-      ftab[j]++; 
-      quadrant[i-1] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 
-      ftab[j]++; 
-      quadrant[i-2] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 
-      ftab[j]++; 
-      quadrant[i-3] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 
-      ftab[j]++; 
-   } 
-   for (; i >= 0; i--) { 
-      quadrant[i] = 0; 
-      j = (j >> 8) | ( ((UInt16)block[i]) << 8); 
-      ftab[j]++; 
-   } 
- 
-   /*-- (emphasises close relationship of block & quadrant) --*/ 
-   for (i = 0; i < BZ_N_OVERSHOOT; i++) { 
-      block   [nblock+i] = block[i]; 
-      quadrant[nblock+i] = 0; 
-   } 
- 
-   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" ); 
- 
-   /*-- Complete the initial radix sort --*/ 
-   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 
- 
-   s = block[0] << 8; 
-   i = nblock-1; 
-   for (; i >= 3; i -= 4) { 
-      s = (s >> 8) | (block[i] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i; 
-      s = (s >> 8) | (block[i-1] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-1; 
-      s = (s >> 8) | (block[i-2] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-2; 
-      s = (s >> 8) | (block[i-3] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i-3; 
-   } 
-   for (; i >= 0; i--) { 
-      s = (s >> 8) | (block[i] << 8); 
-      j = ftab[s] -1; 
-      ftab[s] = j; 
-      ptr[j] = i; 
-   } 
- 
-   /*-- 
-      Now ftab contains the first loc of every small bucket. 
-      Calculate the running order, from smallest to largest 
-      big bucket. 
-   --*/ 
-   for (i = 0; i <= 255; i++) { 
-      bigDone     [i] = False; 
-      runningOrder[i] = i; 
-   } 
- 
-   { 
-      Int32 vv; 
-      Int32 h = 1; 
-      do h = 3 * h + 1; while (h <= 256); 
-      do { 
-         h = h / 3; 
-         for (i = h; i <= 255; i++) { 
-            vv = runningOrder[i]; 
-            j = i; 
-            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 
-               runningOrder[j] = runningOrder[j-h]; 
-               j = j - h; 
-               if (j <= (h - 1)) goto zero; 
-            } 
-            zero: 
-            runningOrder[j] = vv; 
-         } 
-      } while (h != 1); 
-   } 
- 
-   /*-- 
-      The main sorting loop. 
-   --*/ 
- 
-   numQSorted = 0; 
- 
-   for (i = 0; i <= 255; i++) { 
- 
-      /*-- 
-         Process big buckets, starting with the least full. 
-         Basically this is a 3-step process in which we call 
-         mainQSort3 to sort the small buckets [ss, j], but 
-         also make a big effort to avoid the calls if we can. 
-      --*/ 
-      ss = runningOrder[i]; 
- 
-      /*-- 
-         Step 1: 
-         Complete the big bucket [ss] by quicksorting 
-         any unsorted small buckets [ss, j], for j != ss.   
-         Hopefully previous pointer-scanning phases have already 
-         completed many of the small buckets [ss, j], so 
-         we don't have to sort them at all. 
-      --*/ 
-      for (j = 0; j <= 255; j++) { 
-         if (j != ss) { 
-            sb = (ss << 8) + j; 
-            if ( ! (ftab[sb] & SETMASK) ) { 
-               Int32 lo = ftab[sb]   & CLEARMASK; 
-               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 
-               if (hi > lo) { 
-                  if (verb >= 4) 
-                     VPrintf4 ( "        qsort [0x%x, 0x%x]   " 
-                                "done %d   this %d\n", 
-                                ss, j, numQSorted, hi - lo + 1 ); 
-                  mainQSort3 (  
-                     ptr, block, quadrant, nblock,  
-                     lo, hi, BZ_N_RADIX, budget  
-                  );    
-                  numQSorted += (hi - lo + 1); 
-                  if (*budget < 0) return; 
-               } 
-            } 
-            ftab[sb] |= SETMASK; 
-         } 
-      } 
- 
-      AssertH ( !bigDone[ss], 1006 ); 
- 
-      /*-- 
-         Step 2: 
-         Now scan this big bucket [ss] so as to synthesise the 
-         sorted order for small buckets [t, ss] for all t, 
-         including, magically, the bucket [ss,ss] too. 
-         This will avoid doing Real Work in subsequent Step 1's. 
-      --*/ 
-      { 
-         for (j = 0; j <= 255; j++) { 
-            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK; 
-            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 
-         } 
-         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 
-            k = ptr[j]-1; if (k < 0) k += nblock; 
-            c1 = block[k]; 
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+   UChar t;
+   if (a > b) { t = a; a = b; b = t; };
+   if (b > c) { 
+      b = c;
+      if (a > b) b = a;
+   }
+   return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+                          stackHi[sp] = hz; \
+                          stackD [sp] = dz; \
+                          sp++; }
+
+#define mpop(lz,hz,dz) { sp--;             \
+                         lz = stackLo[sp]; \
+                         hz = stackHi[sp]; \
+                         dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz)                                        \
+   { Int32 tz;                                                  \
+     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+                  UChar*  block,
+                  UInt16* quadrant,
+                  Int32   nblock,
+                  Int32   loSt, 
+                  Int32   hiSt, 
+                  Int32   dSt,
+                  Int32*  budget )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+   Int32 sp, lo, hi, d;
+
+   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+   Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+   Int32 nextLo[3];
+   Int32 nextHi[3];
+   Int32 nextD [3];
+
+   sp = 0;
+   mpush ( loSt, hiSt, dSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+
+      mpop ( lo, hi, d );
+      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
+          d > MAIN_QSORT_DEPTH_THRESH) {
+         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+         if (*budget < 0) return;
+         continue;
+      }
+
+      med = (Int32) 
+            mmed3 ( block[ptr[ lo         ]+d],
+                    block[ptr[ hi         ]+d],
+                    block[ptr[ (lo+hi)>>1 ]+d] );
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (True) {
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unLo]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unLo], ptr[ltLo]); 
+               ltLo++; unLo++; continue; 
+            };
+            if (n >  0) break;
+            unLo++;
+         }
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unHi]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unHi], ptr[gtHi]); 
+               gtHi--; unHi--; continue; 
+            };
+            if (n <  0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+      if (gtHi < ltLo) {
+         mpush(lo, hi, d+1 );
+         continue;
+      }
+
+      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
+      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
+      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+      mpush (nextLo[0], nextHi[0], nextD[0]);
+      mpush (nextLo[1], nextHi[1], nextD[1]);
+      mpush (nextLo[2], nextHi[2], nextD[2]);
+   }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > N_OVERSHOOT
+      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      All other areas of block32 destroyed
+      ftab [0 .. 65536 ] destroyed
+      ptr [0 .. nblock-1] holds sorted order
+      if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr, 
+                UChar*  block,
+                UInt16* quadrant, 
+                UInt32* ftab,
+                Int32   nblock,
+                Int32   verb,
+                Int32*  budget )
+{
+   Int32  i, j, k, ss, sb;
+   Int32  runningOrder[256];
+   Bool   bigDone[256];
+   Int32  copyStart[256];
+   Int32  copyEnd  [256];
+   UChar  c1;
+   Int32  numQSorted;
+   UInt16 s;
+   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
+
+   /*-- set up the 2-byte frequency table --*/
+   for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+   j = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+      quadrant[i-1] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+      ftab[j]++;
+      quadrant[i-2] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+      ftab[j]++;
+      quadrant[i-3] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+      ftab[j]++;
+   }
+   for (; i >= 0; i--) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+   }
+
+   /*-- (emphasises close relationship of block & quadrant) --*/
+   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+      block   [nblock+i] = block[i];
+      quadrant[nblock+i] = 0;
+   }
+
+   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
+
+   /*-- Complete the initial radix sort --*/
+   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+   s = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+      s = (s >> 8) | (block[i-1] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-1;
+      s = (s >> 8) | (block[i-2] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-2;
+      s = (s >> 8) | (block[i-3] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-3;
+   }
+   for (; i >= 0; i--) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+   }
+
+   /*--
+      Now ftab contains the first loc of every small bucket.
+      Calculate the running order, from smallest to largest
+      big bucket.
+   --*/
+   for (i = 0; i <= 255; i++) {
+      bigDone     [i] = False;
+      runningOrder[i] = i;
+   }
+
+   {
+      Int32 vv;
+      Int32 h = 1;
+      do h = 3 * h + 1; while (h <= 256);
+      do {
+         h = h / 3;
+         for (i = h; i <= 255; i++) {
+            vv = runningOrder[i];
+            j = i;
+            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+               runningOrder[j] = runningOrder[j-h];
+               j = j - h;
+               if (j <= (h - 1)) goto zero;
+            }
+            zero:
+            runningOrder[j] = vv;
+         }
+      } while (h != 1);
+   }
+
+   /*--
+      The main sorting loop.
+   --*/
+
+   numQSorted = 0;
+
+   for (i = 0; i <= 255; i++) {
+
+      /*--
+         Process big buckets, starting with the least full.
+         Basically this is a 3-step process in which we call
+         mainQSort3 to sort the small buckets [ss, j], but
+         also make a big effort to avoid the calls if we can.
+      --*/
+      ss = runningOrder[i];
+
+      /*--
+         Step 1:
+         Complete the big bucket [ss] by quicksorting
+         any unsorted small buckets [ss, j], for j != ss.  
+         Hopefully previous pointer-scanning phases have already
+         completed many of the small buckets [ss, j], so
+         we don't have to sort them at all.
+      --*/
+      for (j = 0; j <= 255; j++) {
+         if (j != ss) {
+            sb = (ss << 8) + j;
+            if ( ! (ftab[sb] & SETMASK) ) {
+               Int32 lo = ftab[sb]   & CLEARMASK;
+               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+               if (hi > lo) {
+                  if (verb >= 4)
+                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
+                                "done %d   this %d\n",
+                                ss, j, numQSorted, hi - lo + 1 );
+                  mainQSort3 ( 
+                     ptr, block, quadrant, nblock, 
+                     lo, hi, BZ_N_RADIX, budget 
+                  );   
+                  numQSorted += (hi - lo + 1);
+                  if (*budget < 0) return;
+               }
+            }
+            ftab[sb] |= SETMASK;
+         }
+      }
+
+      AssertH ( !bigDone[ss], 1006 );
+
+      /*--
+         Step 2:
+         Now scan this big bucket [ss] so as to synthesise the
+         sorted order for small buckets [t, ss] for all t,
+         including, magically, the bucket [ss,ss] too.
+         This will avoid doing Real Work in subsequent Step 1's.
+      --*/
+      {
+         for (j = 0; j <= 255; j++) {
+            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
+            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+         }
+         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1])
+               ptr[ copyStart[c1]++ ] = k;
+         }
+         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
             if (!bigDone[c1]) 
-               ptr[ copyStart[c1]++ ] = k; 
-         } 
-         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 
-            k = ptr[j]-1; if (k < 0) k += nblock; 
-            c1 = block[k]; 
-            if (!bigDone[c1])  
-               ptr[ copyEnd[c1]-- ] = k; 
-         } 
-      } 
- 
-      AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 
-                ||  
-                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 
-                   Necessity for this case is demonstrated by compressing  
-                   a sequence of approximately 48.5 million of character  
-                   251; 1.0.0/1.0.1 will then die here. */ 
-                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 
-                1007 ) 
- 
-      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 
- 
-      /*-- 
-         Step 3: 
-         The [ss] big bucket is now done.  Record this fact, 
-         and update the quadrant descriptors.  Remember to 
-         update quadrants in the overshoot area too, if 
-         necessary.  The "if (i < 255)" test merely skips 
-         this updating for the last bucket processed, since 
-         updating for the last bucket is pointless. 
- 
-         The quadrant array provides a way to incrementally 
-         cache sort orderings, as they appear, so as to  
-         make subsequent comparisons in fullGtU() complete 
-         faster.  For repetitive blocks this makes a big 
-         difference (but not big enough to be able to avoid 
-         the fallback sorting mechanism, exponential radix sort). 
- 
-         The precise meaning is: at all times: 
- 
-            for 0 <= i < nblock and 0 <= j <= nblock 
- 
-            if block[i] != block[j],  
- 
-               then the relative values of quadrant[i] and  
-                    quadrant[j] are meaningless. 
- 
-               else { 
-                  if quadrant[i] < quadrant[j] 
-                     then the string starting at i lexicographically 
-                     precedes the string starting at j 
- 
-                  else if quadrant[i] > quadrant[j] 
-                     then the string starting at j lexicographically 
-                     precedes the string starting at i 
- 
-                  else 
-                     the relative ordering of the strings starting 
-                     at i and j has not yet been determined. 
-               } 
-      --*/ 
-      bigDone[ss] = True; 
- 
-      if (i < 255) { 
-         Int32 bbStart  = ftab[ss << 8] & CLEARMASK; 
-         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 
-         Int32 shifts   = 0; 
- 
-         while ((bbSize >> shifts) > 65534) shifts++; 
- 
-         for (j = bbSize-1; j >= 0; j--) { 
-            Int32 a2update     = ptr[bbStart + j]; 
-            UInt16 qVal        = (UInt16)(j >> shifts); 
-            quadrant[a2update] = qVal; 
-            if (a2update < BZ_N_OVERSHOOT) 
-               quadrant[a2update + nblock] = qVal; 
-         } 
-         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 
-      } 
- 
-   } 
- 
-   if (verb >= 4) 
-      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n", 
-                 nblock, numQSorted, nblock - numQSorted ); 
-} 
- 
-#undef BIGFREQ 
-#undef SETMASK 
-#undef CLEARMASK 
- 
- 
-/*---------------------------------------------*/ 
-/* Pre: 
-      nblock > 0 
-      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 
-      ((UChar*)arr2)  [0 .. nblock-1] holds block 
-      arr1 exists for [0 .. nblock-1] 
- 
-   Post: 
-      ((UChar*)arr2) [0 .. nblock-1] holds block 
-      All other areas of block destroyed 
-      ftab [ 0 .. 65536 ] destroyed 
-      arr1 [0 .. nblock-1] holds sorted order 
-*/ 
-void BZ2_blockSort ( EState* s ) 
-{ 
-   UInt32* ptr    = s->ptr;  
-   UChar*  block  = s->block; 
-   UInt32* ftab   = s->ftab; 
-   Int32   nblock = s->nblock; 
-   Int32   verb   = s->verbosity; 
-   Int32   wfact  = s->workFactor; 
-   UInt16* quadrant; 
-   Int32   budget; 
-   Int32   budgetInit; 
-   Int32   i; 
- 
-   if (nblock < 10000) { 
-      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 
-   } else { 
-      /* Calculate the location for quadrant, remembering to get 
-         the alignment right.  Assumes that &(block[0]) is at least 
-         2-byte aligned -- this should be ok since block is really 
-         the first section of arr2. 
-      */ 
-      i = nblock+BZ_N_OVERSHOOT; 
-      if (i & 1) i++; 
-      quadrant = (UInt16*)(&(block[i])); 
- 
-      /* (wfact-1) / 3 puts the default-factor-30 
-         transition point at very roughly the same place as  
-         with v0.1 and v0.9.0.   
-         Not that it particularly matters any more, since the 
-         resulting compressed stream is now the same regardless 
-         of whether or not we use the main sort or fallback sort. 
-      */ 
-      if (wfact < 1  ) wfact = 1; 
-      if (wfact > 100) wfact = 100; 
-      budgetInit = nblock * ((wfact-1) / 3); 
-      budget = budgetInit; 
- 
-      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 
-      if (verb >= 3)  
-         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n", 
-                    budgetInit - budget, 
-                    nblock,  
-                    (float)(budgetInit - budget) / 
-                    (float)(nblock==0 ? 1 : nblock) );  
-      if (budget < 0) { 
-         if (verb >= 2)  
-            VPrintf0 ( "    too repetitive; using fallback" 
-                       " sorting algorithm\n" ); 
-         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 
-      } 
-   } 
- 
-   s->origPtr = -1; 
-   for (i = 0; i < s->nblock; i++) 
-      if (ptr[i] == 0) 
-         { s->origPtr = i; break; }; 
- 
-   AssertH( s->origPtr != -1, 1003 ); 
-} 
- 
- 
-/*-------------------------------------------------------------*/ 
-/*--- end                                       blocksort.c ---*/ 
-/*-------------------------------------------------------------*/ 
+               ptr[ copyEnd[c1]-- ] = k;
+         }
+      }
+
+      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+                || 
+                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+                   Necessity for this case is demonstrated by compressing 
+                   a sequence of approximately 48.5 million of character 
+                   251; 1.0.0/1.0.1 will then die here. */
+                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+                1007 )
+
+      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+      /*--
+         Step 3:
+         The [ss] big bucket is now done.  Record this fact,
+         and update the quadrant descriptors.  Remember to
+         update quadrants in the overshoot area too, if
+         necessary.  The "if (i < 255)" test merely skips
+         this updating for the last bucket processed, since
+         updating for the last bucket is pointless.
+
+         The quadrant array provides a way to incrementally
+         cache sort orderings, as they appear, so as to 
+         make subsequent comparisons in fullGtU() complete
+         faster.  For repetitive blocks this makes a big
+         difference (but not big enough to be able to avoid
+         the fallback sorting mechanism, exponential radix sort).
+
+         The precise meaning is: at all times:
+
+            for 0 <= i < nblock and 0 <= j <= nblock
+
+            if block[i] != block[j], 
+
+               then the relative values of quadrant[i] and 
+                    quadrant[j] are meaningless.
+
+               else {
+                  if quadrant[i] < quadrant[j]
+                     then the string starting at i lexicographically
+                     precedes the string starting at j
+
+                  else if quadrant[i] > quadrant[j]
+                     then the string starting at j lexicographically
+                     precedes the string starting at i
+
+                  else
+                     the relative ordering of the strings starting
+                     at i and j has not yet been determined.
+               }
+      --*/
+      bigDone[ss] = True;
+
+      if (i < 255) {
+         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
+         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+         Int32 shifts   = 0;
+
+         while ((bbSize >> shifts) > 65534) shifts++;
+
+         for (j = bbSize-1; j >= 0; j--) {
+            Int32 a2update     = ptr[bbStart + j];
+            UInt16 qVal        = (UInt16)(j >> shifts);
+            quadrant[a2update] = qVal;
+            if (a2update < BZ_N_OVERSHOOT)
+               quadrant[a2update + nblock] = qVal;
+         }
+         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+      }
+
+   }
+
+   if (verb >= 4)
+      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
+                 nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)arr2)  [0 .. nblock-1] holds block
+      arr1 exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)arr2) [0 .. nblock-1] holds block
+      All other areas of block destroyed
+      ftab [ 0 .. 65536 ] destroyed
+      arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+   UInt32* ptr    = s->ptr; 
+   UChar*  block  = s->block;
+   UInt32* ftab   = s->ftab;
+   Int32   nblock = s->nblock;
+   Int32   verb   = s->verbosity;
+   Int32   wfact  = s->workFactor;
+   UInt16* quadrant;
+   Int32   budget;
+   Int32   budgetInit;
+   Int32   i;
+
+   if (nblock < 10000) {
+      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+   } else {
+      /* Calculate the location for quadrant, remembering to get
+         the alignment right.  Assumes that &(block[0]) is at least
+         2-byte aligned -- this should be ok since block is really
+         the first section of arr2.
+      */
+      i = nblock+BZ_N_OVERSHOOT;
+      if (i & 1) i++;
+      quadrant = (UInt16*)(&(block[i]));
+
+      /* (wfact-1) / 3 puts the default-factor-30
+         transition point at very roughly the same place as 
+         with v0.1 and v0.9.0.  
+         Not that it particularly matters any more, since the
+         resulting compressed stream is now the same regardless
+         of whether or not we use the main sort or fallback sort.
+      */
+      if (wfact < 1  ) wfact = 1;
+      if (wfact > 100) wfact = 100;
+      budgetInit = nblock * ((wfact-1) / 3);
+      budget = budgetInit;
+
+      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+      if (verb >= 3) 
+         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
+                    budgetInit - budget,
+                    nblock, 
+                    (float)(budgetInit - budget) /
+                    (float)(nblock==0 ? 1 : nblock) ); 
+      if (budget < 0) {
+         if (verb >= 2) 
+            VPrintf0 ( "    too repetitive; using fallback"
+                       " sorting algorithm\n" );
+         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+      }
+   }
+
+   s->origPtr = -1;
+   for (i = 0; i < s->nblock; i++)
+      if (ptr[i] == 0)
+         { s->origPtr = i; break; };
+
+   AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       blocksort.c ---*/
+/*-------------------------------------------------------------*/
author	Anton Samokhvalov <pg83@yandex.ru>	2022-02-10 16:45:17 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:17 +0300
commit	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree	dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/libs/libbz2/blocksort.c
parent	72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download	ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz