1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "blocksplitter.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "deflate.h"
27 #include "lz77.h"
28 #include "squeeze.h"
29 #include "tree.h"
30 #include "util.h"
31 
32 /*
33 The "f" for the FindMinimum function below.
34 i: the current parameter of f(i)
35 context: for your implementation
36 */
37 typedef double FindMinimumFun(size_t i, void* context);
38 
39 /*
40 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41 double, i is in range start-end (excluding end).
42 */
FindMinimum(FindMinimumFun f,void * context,size_t start,size_t end)43 static size_t FindMinimum(FindMinimumFun f, void* context,
44                           size_t start, size_t end) {
45   if (end - start < 1024) {
46     double best = ZOPFLI_LARGE_FLOAT;
47     size_t result = start;
48     size_t i;
49     for (i = start; i < end; i++) {
50       double v = f(i, context);
51       if (v < best) {
52         best = v;
53         result = i;
54       }
55     }
56     return result;
57   } else {
58     /* Try to find minimum faster by recursively checking multiple points. */
59 #define NUM 9  /* Good value: 9. */
60     size_t i;
61     size_t p[NUM];
62     double vp[NUM];
63     size_t besti;
64     double best;
65     double lastbest = ZOPFLI_LARGE_FLOAT;
66     size_t pos = start;
67 
68     for (;;) {
69       if (end - start <= NUM) break;
70 
71       for (i = 0; i < NUM; i++) {
72         p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73         vp[i] = f(p[i], context);
74       }
75       besti = 0;
76       best = vp[0];
77       for (i = 1; i < NUM; i++) {
78         if (vp[i] < best) {
79           best = vp[i];
80           besti = i;
81         }
82       }
83       if (best > lastbest) break;
84 
85       start = besti == 0 ? start : p[besti - 1];
86       end = besti == NUM - 1 ? end : p[besti + 1];
87 
88       pos = p[besti];
89       lastbest = best;
90     }
91     return pos;
92 #undef NUM
93   }
94 }
95 
96 /*
97 Returns estimated cost of a block in bits.  It includes the size to encode the
98 tree and the size to encode all literal, length and distance symbols and their
99 extra bits.
100 
101 litlens: lz77 lit/lengths
102 dists: ll77 distances
103 lstart: start of block
104 lend: end of block (not inclusive)
105 */
EstimateCost(const unsigned short * litlens,const unsigned short * dists,size_t lstart,size_t lend)106 static double EstimateCost(const unsigned short* litlens,
107                            const unsigned short* dists,
108                            size_t lstart, size_t lend) {
109   return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110 }
111 
112 typedef struct SplitCostContext {
113   const unsigned short* litlens;
114   const unsigned short* dists;
115   size_t llsize;
116   size_t start;
117   size_t end;
118 } SplitCostContext;
119 
120 
121 /*
122 Gets the cost which is the sum of the cost of the left and the right section
123 of the data.
124 type: FindMinimumFun
125 */
SplitCost(size_t i,void * context)126 static double SplitCost(size_t i, void* context) {
127   SplitCostContext* c = (SplitCostContext*)context;
128   return EstimateCost(c->litlens, c->dists, c->start, i) +
129       EstimateCost(c->litlens, c->dists, i, c->end);
130 }
131 
AddSorted(size_t value,size_t ** out,size_t * outsize)132 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133   size_t i;
134   ZOPFLI_APPEND_DATA(value, out, outsize);
135   for (i = 0; i + 1 < *outsize; i++) {
136     if ((*out)[i] > value) {
137       size_t j;
138       for (j = *outsize - 1; j > i; j--) {
139         (*out)[j] = (*out)[j - 1];
140       }
141       (*out)[i] = value;
142       break;
143     }
144   }
145 }
146 
147 /*
148 Prints the block split points as decimal and hex values in the terminal.
149 */
PrintBlockSplitPoints(const unsigned short * litlens,const unsigned short * dists,size_t llsize,const size_t * lz77splitpoints,size_t nlz77points)150 static void PrintBlockSplitPoints(const unsigned short* litlens,
151                                   const unsigned short* dists,
152                                   size_t llsize, const size_t* lz77splitpoints,
153                                   size_t nlz77points) {
154   size_t* splitpoints = 0;
155   size_t npoints = 0;
156   size_t i;
157   /* The input is given as lz77 indices, but we want to see the uncompressed
158   index values. */
159   size_t pos = 0;
160   if (nlz77points > 0) {
161     for (i = 0; i < llsize; i++) {
162       size_t length = dists[i] == 0 ? 1 : litlens[i];
163       if (lz77splitpoints[npoints] == i) {
164         ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
165         if (npoints == nlz77points) break;
166       }
167       pos += length;
168     }
169   }
170   assert(npoints == nlz77points);
171 
172   fprintf(stderr, "block split points: ");
173   for (i = 0; i < npoints; i++) {
174     fprintf(stderr, "%d ", (int)splitpoints[i]);
175   }
176   fprintf(stderr, "(hex:");
177   for (i = 0; i < npoints; i++) {
178     fprintf(stderr, " %x", (int)splitpoints[i]);
179   }
180   fprintf(stderr, ")\n");
181 
182   free(splitpoints);
183 }
184 
185 /*
186 Finds next block to try to split, the largest of the available ones.
187 The largest is chosen to make sure that if only a limited amount of blocks is
188 requested, their sizes are spread evenly.
189 llsize: the size of the LL77 data, which is the size of the done array here.
190 done: array indicating which blocks starting at that position are no longer
191     splittable (splitting them increases rather than decreases cost).
192 splitpoints: the splitpoints found so far.
193 npoints: the amount of splitpoints found so far.
194 lstart: output variable, giving start of block.
195 lend: output variable, giving end of block.
196 returns 1 if a block was found, 0 if no block found (all are done).
197 */
FindLargestSplittableBlock(size_t llsize,const unsigned char * done,const size_t * splitpoints,size_t npoints,size_t * lstart,size_t * lend)198 static int FindLargestSplittableBlock(
199     size_t llsize, const unsigned char* done,
200     const size_t* splitpoints, size_t npoints,
201     size_t* lstart, size_t* lend) {
202   size_t longest = 0;
203   int found = 0;
204   size_t i;
205   for (i = 0; i <= npoints; i++) {
206     size_t start = i == 0 ? 0 : splitpoints[i - 1];
207     size_t end = i == npoints ? llsize - 1 : splitpoints[i];
208     if (!done[start] && end - start > longest) {
209       *lstart = start;
210       *lend = end;
211       found = 1;
212       longest = end - start;
213     }
214   }
215   return found;
216 }
217 
ZopfliBlockSplitLZ77(const ZopfliOptions * options,const unsigned short * litlens,const unsigned short * dists,size_t llsize,size_t maxblocks,size_t ** splitpoints,size_t * npoints)218 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
219                           const unsigned short* litlens,
220                           const unsigned short* dists,
221                           size_t llsize, size_t maxblocks,
222                           size_t** splitpoints, size_t* npoints) {
223   size_t lstart, lend;
224   size_t i;
225   size_t llpos = 0;
226   size_t numblocks = 1;
227   unsigned char* done;
228   double splitcost, origcost;
229 
230   if (llsize < 10) return;  /* This code fails on tiny files. */
231 
232   done = (unsigned char*)malloc(llsize);
233   if (!done) exit(-1); /* Allocation failed. */
234   for (i = 0; i < llsize; i++) done[i] = 0;
235 
236   lstart = 0;
237   lend = llsize;
238   for (;;) {
239     SplitCostContext c;
240 
241     if (maxblocks > 0 && numblocks >= maxblocks) {
242       break;
243     }
244 
245     c.litlens = litlens;
246     c.dists = dists;
247     c.llsize = llsize;
248     c.start = lstart;
249     c.end = lend;
250     assert(lstart < lend);
251     llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
252 
253     assert(llpos > lstart);
254     assert(llpos < lend);
255 
256     splitcost = EstimateCost(litlens, dists, lstart, llpos) +
257         EstimateCost(litlens, dists, llpos, lend);
258     origcost = EstimateCost(litlens, dists, lstart, lend);
259 
260     if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
261       done[lstart] = 1;
262     } else {
263       AddSorted(llpos, splitpoints, npoints);
264       numblocks++;
265     }
266 
267     if (!FindLargestSplittableBlock(
268         llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
269       break;  /* No further split will probably reduce compression. */
270     }
271 
272     if (lend - lstart < 10) {
273       break;
274     }
275   }
276 
277   if (options->verbose) {
278     PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
279   }
280 
281   free(done);
282 }
283 
ZopfliBlockSplit(const ZopfliOptions * options,const unsigned char * in,size_t instart,size_t inend,size_t maxblocks,size_t ** splitpoints,size_t * npoints)284 void ZopfliBlockSplit(const ZopfliOptions* options,
285                       const unsigned char* in, size_t instart, size_t inend,
286                       size_t maxblocks, size_t** splitpoints, size_t* npoints) {
287   size_t pos = 0;
288   size_t i;
289   ZopfliBlockState s;
290   size_t* lz77splitpoints = 0;
291   size_t nlz77points = 0;
292   ZopfliLZ77Store store;
293 
294   ZopfliInitLZ77Store(&store);
295 
296   s.options = options;
297   s.blockstart = instart;
298   s.blockend = inend;
299 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
300   s.lmc = 0;
301 #endif
302 
303   *npoints = 0;
304   *splitpoints = 0;
305 
306   /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
307   results in better blocks. */
308   ZopfliLZ77Greedy(&s, in, instart, inend, &store);
309 
310   ZopfliBlockSplitLZ77(options,
311                        store.litlens, store.dists, store.size, maxblocks,
312                        &lz77splitpoints, &nlz77points);
313 
314   /* Convert LZ77 positions to positions in the uncompressed input. */
315   pos = instart;
316   if (nlz77points > 0) {
317     for (i = 0; i < store.size; i++) {
318       size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
319       if (lz77splitpoints[*npoints] == i) {
320         ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
321         if (*npoints == nlz77points) break;
322       }
323       pos += length;
324     }
325   }
326   assert(*npoints == nlz77points);
327 
328   free(lz77splitpoints);
329   ZopfliCleanLZ77Store(&store);
330 }
331 
ZopfliBlockSplitSimple(const unsigned char * in,size_t instart,size_t inend,size_t blocksize,size_t ** splitpoints,size_t * npoints)332 void ZopfliBlockSplitSimple(const unsigned char* in,
333                             size_t instart, size_t inend,
334                             size_t blocksize,
335                             size_t** splitpoints, size_t* npoints) {
336   size_t i = instart;
337   while (i < inend) {
338     ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
339     i += blocksize;
340   }
341   (void)in;
342 }
343