1 /*
2  * Copyright 2018 The Chromium Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the Chromium source repository LICENSE file.
5  *
6  * A benchmark test harness for measuring decoding performance of gzip or zlib
7  * (deflate) encoded compressed data. Given a file containing any data, encode
8  * (compress) it into gzip or zlib format and then decode (uncompress). Output
9  * the median and maximum encoding and decoding rates in MB/s.
10  *
11  * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select
12  * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those
13  * stream wrappers.
14  *
15  * Note this code can be compiled outside of the Chromium build system against
16  * the system zlib (-lz) with g++ or clang++ as follows:
17  *
18  *   g++|clang++ -O3 -Wall -std=c++11 zlib_bench.cc -lstdc++ -lz
19  */
20 
21 #include <algorithm>
22 #include <chrono>
23 #include <fstream>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include <memory.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 #include "zlib.h"
34 
error_exit(const char * error,int code)35 void error_exit(const char* error, int code) {
36   fprintf(stderr, "%s (%d)\n", error, code);
37   exit(code);
38 }
39 
string_data(std::string * s)40 inline char* string_data(std::string* s) {
41   return s->empty() ? nullptr : &*s->begin();
42 }
43 
44 struct Data {
DataData45   Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); }
46   std::unique_ptr<char[]> data;
47   size_t size;
48 };
49 
read_file_data_or_exit(const char * name)50 Data read_file_data_or_exit(const char* name) {
51   std::ifstream file(name, std::ios::in | std::ios::binary);
52   if (!file) {
53     perror(name);
54     exit(1);
55   }
56 
57   file.seekg(0, std::ios::end);
58   Data data(file.tellg());
59   file.seekg(0, std::ios::beg);
60 
61   if (file && data.data)
62     file.read(data.data.get(), data.size);
63 
64   if (!file || !data.data || !data.size) {
65     perror((std::string("failed: reading ") + name).c_str());
66     exit(1);
67   }
68 
69   return data;
70 }
71 
zlib_estimate_compressed_size(size_t input_size)72 size_t zlib_estimate_compressed_size(size_t input_size) {
73   return compressBound(input_size);
74 }
75 
76 enum zlib_wrapper {
77   kWrapperNONE,
78   kWrapperZLIB,
79   kWrapperGZIP,
80   kWrapperZRAW,
81 };
82 
zlib_stream_wrapper_type(zlib_wrapper type)83 inline int zlib_stream_wrapper_type(zlib_wrapper type) {
84   if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper
85     return MAX_WBITS;
86   if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper
87     return MAX_WBITS + 16;
88   if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE
89     return -MAX_WBITS;
90   error_exit("bad wrapper type", int(type));
91   return 0;
92 }
93 
zlib_wrapper_name(zlib_wrapper type)94 const char* zlib_wrapper_name(zlib_wrapper type) {
95   if (type == kWrapperZLIB)
96     return "ZLIB";
97   if (type == kWrapperGZIP)
98     return "GZIP";
99   if (type == kWrapperZRAW)
100     return "RAW";
101   error_exit("bad wrapper type", int(type));
102   return nullptr;
103 }
104 
105 static int zlib_strategy = Z_DEFAULT_STRATEGY;
106 
zlib_level_strategy_name(int compression_level)107 const char* zlib_level_strategy_name(int compression_level) {
108   if (compression_level == 0)
109     return "";  // strategy is meaningless at level 0
110   if (zlib_strategy == Z_HUFFMAN_ONLY)
111     return "huffman ";
112   if (zlib_strategy == Z_RLE)
113     return "rle ";
114   if (zlib_strategy == Z_DEFAULT_STRATEGY)
115     return "";
116   error_exit("bad strategy", zlib_strategy);
117   return nullptr;
118 }
119 
120 static int zlib_compression_level = Z_DEFAULT_COMPRESSION;
121 
zlib_compress(const zlib_wrapper type,const char * input,const size_t input_size,std::string * output,bool resize_output=false)122 void zlib_compress(
123     const zlib_wrapper type,
124     const char* input,
125     const size_t input_size,
126     std::string* output,
127     bool resize_output = false)
128 {
129   if (resize_output)
130     output->resize(zlib_estimate_compressed_size(input_size));
131   size_t output_size = output->size();
132 
133   z_stream stream;
134   memset(&stream, 0, sizeof(stream));
135 
136   int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED,
137       zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy);
138   if (result != Z_OK)
139     error_exit("deflateInit2 failed", result);
140 
141   stream.next_out = (Bytef*)string_data(output);
142   stream.avail_out = (uInt)output_size;
143   stream.next_in = (z_const Bytef*)input;
144   stream.avail_in = (uInt)input_size;
145 
146   result = deflate(&stream, Z_FINISH);
147   if (result == Z_STREAM_END)
148     output_size = stream.total_out;
149   result |= deflateEnd(&stream);
150   if (result != Z_STREAM_END)
151     error_exit("compress failed", result);
152 
153   if (resize_output)
154     output->resize(output_size);
155 }
156 
zlib_uncompress(const zlib_wrapper type,const std::string & input,const size_t output_size,std::string * output)157 void zlib_uncompress(
158     const zlib_wrapper type,
159     const std::string& input,
160     const size_t output_size,
161     std::string* output)
162 {
163   z_stream stream;
164   memset(&stream, 0, sizeof(stream));
165 
166   int result = inflateInit2(&stream, zlib_stream_wrapper_type(type));
167   if (result != Z_OK)
168     error_exit("inflateInit2 failed", result);
169 
170   stream.next_out = (Bytef*)string_data(output);
171   stream.avail_out = (uInt)output->size();
172   stream.next_in = (z_const Bytef*)input.data();
173   stream.avail_in = (uInt)input.size();
174 
175   result = inflate(&stream, Z_FINISH);
176   if (stream.total_out != output_size)
177     result = Z_DATA_ERROR;
178   result |= inflateEnd(&stream);
179   if (result == Z_STREAM_END)
180     return;
181 
182   std::string error("uncompress failed: ");
183   if (stream.msg)
184     error.append(stream.msg);
185   error_exit(error.c_str(), result);
186 }
187 
verify_equal(const char * input,size_t size,std::string * output)188 void verify_equal(const char* input, size_t size, std::string* output) {
189   const char* data = string_data(output);
190   if (output->size() == size && !memcmp(data, input, size))
191     return;
192   fprintf(stderr, "uncompressed data does not match the input data\n");
193   exit(3);
194 }
195 
zlib_file(const char * name,const zlib_wrapper type)196 void zlib_file(const char* name, const zlib_wrapper type) {
197   /*
198    * Read the file data.
199    */
200   const auto file = read_file_data_or_exit(name);
201   const int length = static_cast<int>(file.size);
202   const char* data = file.data.get();
203 
204   /*
205    * Report compression strategy and file name.
206    */
207   const char* strategy = zlib_level_strategy_name(zlib_compression_level);
208   printf("%s%-40s :\n", strategy, name);
209 
210   /*
211    * Chop the data into blocks.
212    */
213   const int block_size = 1 << 20;
214   const int blocks = (length + block_size - 1) / block_size;
215 
216   std::vector<const char*> input(blocks);
217   std::vector<size_t> input_length(blocks);
218   std::vector<std::string> compressed(blocks);
219   std::vector<std::string> output(blocks);
220 
221   for (int b = 0; b < blocks; ++b) {
222     int input_start = b * block_size;
223     int input_limit = std::min<int>((b + 1) * block_size, length);
224     input[b] = data + input_start;
225     input_length[b] = input_limit - input_start;
226   }
227 
228   /*
229    * Run the zlib compress/uncompress loop a few times with |repeats| to
230    * process about 10MB of data if the length is small relative to 10MB.
231    * If length is large relative to 10MB, process the data once.
232    */
233   const int mega_byte = 1024 * 1024;
234   const int repeats = (10 * mega_byte + length) / (length + 1);
235   const int runs = 5;
236   double ctime[runs];
237   double utime[runs];
238 
239   for (int run = 0; run < runs; ++run) {
240     const auto now = [] { return std::chrono::steady_clock::now(); };
241 
242     // Pre-grow the output buffer so we don't measure string resize time.
243     for (int b = 0; b < blocks; ++b)
244       compressed[b].resize(zlib_estimate_compressed_size(block_size));
245 
246     auto start = now();
247     for (int b = 0; b < blocks; ++b)
248       for (int r = 0; r < repeats; ++r)
249         zlib_compress(type, input[b], input_length[b], &compressed[b]);
250     ctime[run] = std::chrono::duration<double>(now() - start).count();
251 
252     // Compress again, resizing compressed, so we don't leave junk at the
253     // end of the compressed string that could confuse zlib_uncompress().
254     for (int b = 0; b < blocks; ++b)
255       zlib_compress(type, input[b], input_length[b], &compressed[b], true);
256 
257     for (int b = 0; b < blocks; ++b)
258       output[b].resize(input_length[b]);
259 
260     start = now();
261     for (int r = 0; r < repeats; ++r)
262       for (int b = 0; b < blocks; ++b)
263         zlib_uncompress(type, compressed[b], input_length[b], &output[b]);
264     utime[run] = std::chrono::duration<double>(now() - start).count();
265 
266     for (int b = 0; b < blocks; ++b)
267       verify_equal(input[b], input_length[b], &output[b]);
268   }
269 
270   /*
271    * Output the median/maximum compress/uncompress rates in MB/s.
272    */
273   size_t output_length = 0;
274   for (size_t i = 0; i < compressed.size(); ++i)
275     output_length += compressed[i].size();
276 
277   std::sort(ctime, ctime + runs);
278   std::sort(utime, utime + runs);
279 
280   double deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2];
281   double inflate_rate_med = length * repeats / mega_byte / utime[runs / 2];
282   double deflate_rate_max = length * repeats / mega_byte / ctime[0];
283   double inflate_rate_max = length * repeats / mega_byte / utime[0];
284 
285   // type, block size, compression ratio, etc
286   printf("%s: [b %dM] bytes %6d -> %6u %4.1f%%",
287     zlib_wrapper_name(type), block_size / (1 << 20), length,
288     static_cast<unsigned>(output_length), output_length * 100.0 / length);
289 
290   // compress / uncompress median (max) rates
291   printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n",
292     deflate_rate_med, deflate_rate_max, inflate_rate_med, inflate_rate_max);
293 }
294 
295 static int argn = 1;
296 
get_option(int argc,char * argv[],const char * option)297 char* get_option(int argc, char* argv[], const char* option) {
298   if (argn < argc)
299     return !strcmp(argv[argn], option) ? argv[argn++] : nullptr;
300   return nullptr;
301 }
302 
get_compression(int argc,char * argv[],int * value)303 bool get_compression(int argc, char* argv[], int* value) {
304   if (argn < argc)
305     *value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1;
306   return *value >= 0 && *value <= 9;
307 }
308 
309 const char* options = "gzip|zlib|raw [--compression 0:9] [--huffman|--rle]";
310 
usage_exit(const char * program)311 void usage_exit(const char* program) {
312   printf("usage: %s %s files...", program, options);
313   exit(1);
314 }
315 
main(int argc,char * argv[])316 int main(int argc, char* argv[]) {
317   zlib_wrapper type;
318   if (get_option(argc, argv, "zlib"))
319     type = kWrapperZLIB;
320   else if (get_option(argc, argv, "gzip"))
321     type = kWrapperGZIP;
322   else if (get_option(argc, argv, "raw"))
323     type = kWrapperZRAW;
324   else
325     usage_exit(argv[0]);
326 
327   while (argn < argc && argv[argn][0] == '-') {
328     if (get_option(argc, argv, "--compression")) {
329       if (!get_compression(argc, argv, &zlib_compression_level))
330         usage_exit(argv[0]);
331     } else if (get_option(argc, argv, "--huffman")) {
332       zlib_strategy = Z_HUFFMAN_ONLY;
333     } else if (get_option(argc, argv, "--rle")) {
334       zlib_strategy = Z_RLE;
335     } else {
336       usage_exit(argv[0]);
337     }
338   }
339 
340   if (argn >= argc)
341     usage_exit(argv[0]);
342   while (argn < argc)
343     zlib_file(argv[argn++], type);
344 
345   return 0;
346 }
347