1 /*
2     datagen.c - compressible data generator test tool
3     Copyright (C) Yann Collet 2012-2015
4 
5     GPL v2 License
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License along
18     with this program; if not, write to the Free Software Foundation, Inc.,
19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     You can contact the author at :
22    - LZ4 source repository : http://code.google.com/p/lz4
23    - LZ4 source mirror : https://github.com/Cyan4973/lz4
24    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
25 */
26 
27 /**************************************
28  Remove Visual warning messages
29 **************************************/
30 #define _CRT_SECURE_NO_WARNINGS   // fgets
31 
32 
33 /**************************************
34  Includes
35 **************************************/
36 #include <stdio.h>      // fgets, sscanf
37 #include <string.h>     // strcmp
38 
39 
40 /**************************************
41    Basic Types
42 **************************************/
43 #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
44 # include <stdint.h>
45   typedef  uint8_t BYTE;
46   typedef uint16_t U16;
47   typedef uint32_t U32;
48   typedef  int32_t S32;
49   typedef uint64_t U64;
50 #else
51   typedef unsigned char       BYTE;
52   typedef unsigned short      U16;
53   typedef unsigned int        U32;
54   typedef   signed int        S32;
55   typedef unsigned long long  U64;
56 #endif
57 
58 
59 /**************************************
60  Constants
61 **************************************/
62 #ifndef LZ4_VERSION
63 #  define LZ4_VERSION "r125"
64 #endif
65 
66 #define KB *(1 <<10)
67 #define MB *(1 <<20)
68 #define GB *(1U<<30)
69 
70 #define CDG_SIZE_DEFAULT (64 KB)
71 #define CDG_SEED_DEFAULT 0
72 #define CDG_COMPRESSIBILITY_DEFAULT 50
73 #define PRIME1   2654435761U
74 #define PRIME2   2246822519U
75 
76 
77 /**************************************
78   Macros
79 **************************************/
80 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
81 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
82 
83 
84 /**************************************
85   Local Parameters
86 **************************************/
87 static unsigned no_prompt = 0;
88 static char*    programName;
89 static unsigned displayLevel = 2;
90 
91 
92 /*********************************************************
93   functions
94 *********************************************************/
95 
96 #define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
CDG_rand(U32 * src)97 static unsigned int CDG_rand(U32* src)
98 {
99     U32 rand32 = *src;
100     rand32 *= PRIME1;
101     rand32 += PRIME2;
102     rand32  = CDG_rotl32(rand32, 13);
103     *src = rand32;
104     return rand32;
105 }
106 
107 
108 #define CDG_RAND15BITS  ((CDG_rand(seed) >> 3) & 32767)
109 #define CDG_RANDLENGTH  ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15)
110 #define CDG_RANDCHAR    (((CDG_rand(seed) >> 9) & 63) + '0')
CDG_generate(U64 size,U32 * seed,double proba)111 static void CDG_generate(U64 size, U32* seed, double proba)
112 {
113     BYTE fullbuff[32 KB + 128 KB + 1];
114     BYTE* buff = fullbuff + 32 KB;
115     U64 total=0;
116     U32 P32 = (U32)(32768 * proba);
117     U32 pos=1;
118     U32 genBlockSize = 128 KB;
119 
120     // Build initial prefix
121     fullbuff[0] = CDG_RANDCHAR;
122     while (pos<32 KB)
123     {
124         // Select : Literal (char) or Match (within 32K)
125         if (CDG_RAND15BITS < P32)
126         {
127             // Copy (within 64K)
128             U32 d;
129             int ref;
130             int length = CDG_RANDLENGTH + 4;
131             U32 offset = CDG_RAND15BITS + 1;
132             if (offset > pos) offset = pos;
133             ref = pos - offset;
134             d = pos + length;
135             while (pos < d) fullbuff[pos++] = fullbuff[ref++];
136         }
137         else
138         {
139             // Literal (noise)
140             U32 d = pos + CDG_RANDLENGTH;
141             while (pos < d) fullbuff[pos++] = CDG_RANDCHAR;
142         }
143     }
144 
145     // Generate compressible data
146     pos = 0;
147     while (total < size)
148     {
149         if (size-total < 128 KB) genBlockSize = (U32)(size-total);
150         total += genBlockSize;
151         buff[genBlockSize] = 0;
152         pos = 0;
153         while (pos<genBlockSize)
154         {
155             // Select : Literal (char) or Match (within 32K)
156             if (CDG_RAND15BITS < P32)
157             {
158                 // Copy (within 64K)
159                 int ref;
160                 U32 d;
161                 int length = CDG_RANDLENGTH + 4;
162                 U32 offset = CDG_RAND15BITS + 1;
163                 if (pos + length > genBlockSize ) length = genBlockSize - pos;
164                 ref = pos - offset;
165                 d = pos + length;
166                 while (pos < d) buff[pos++] = buff[ref++];
167             }
168             else
169             {
170                 // Literal (noise)
171                 U32 d;
172                 int length = CDG_RANDLENGTH;
173                 if (pos + length > genBlockSize) length = genBlockSize - pos;
174                 d = pos + length;
175                 while (pos < d) buff[pos++] = CDG_RANDCHAR;
176             }
177         }
178         // output datagen
179         pos=0;
180         for (;pos+512<=genBlockSize;pos+=512)
181             printf("%512.512s", buff+pos);
182         for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
183         // Regenerate prefix
184         memcpy(fullbuff, buff + 96 KB, 32 KB);
185     }
186 }
187 
188 
CDG_usage(void)189 int CDG_usage(void)
190 {
191     DISPLAY( "Compressible data generator\n");
192     DISPLAY( "Usage :\n");
193     DISPLAY( "      %s [size] [args]\n", programName);
194     DISPLAY( "\n");
195     DISPLAY( "Arguments :\n");
196     DISPLAY( " -g#    : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
197     DISPLAY( " -s#    : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
198     DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
199     DISPLAY( " -h     : display help and exit\n");
200     return 0;
201 }
202 
203 
main(int argc,char ** argv)204 int main(int argc, char** argv)
205 {
206     int argNb;
207     int proba = CDG_COMPRESSIBILITY_DEFAULT;
208     U64 size = CDG_SIZE_DEFAULT;
209     U32 seed = CDG_SEED_DEFAULT;
210 
211     // Check command line
212     programName = argv[0];
213     for(argNb=1; argNb<argc; argNb++)
214     {
215         char* argument = argv[argNb];
216 
217         if(!argument) continue;   // Protection if argument empty
218 
219         // Decode command (note : aggregated commands are allowed)
220         if (*argument=='-')
221         {
222             if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
223 
224             argument++;
225             while (*argument!=0)
226             {
227                 switch(*argument)
228                 {
229                 case 'h':
230                     return CDG_usage();
231                 case 'g':
232                     argument++;
233                     size=0;
234                     while ((*argument>='0') && (*argument<='9'))
235                     {
236                         size *= 10;
237                         size += *argument - '0';
238                         argument++;
239                     }
240                     if (*argument=='K') { size <<= 10; argument++; }
241                     if (*argument=='M') { size <<= 20; argument++; }
242                     if (*argument=='G') { size <<= 30; argument++; }
243                     if (*argument=='B') { argument++; }
244                     break;
245                 case 's':
246                     argument++;
247                     seed=0;
248                     while ((*argument>='0') && (*argument<='9'))
249                     {
250                         seed *= 10;
251                         seed += *argument - '0';
252                         argument++;
253                     }
254                     break;
255                 case 'p':
256                     argument++;
257                     proba=0;
258                     while ((*argument>='0') && (*argument<='9'))
259                     {
260                         proba *= 10;
261                         proba += *argument - '0';
262                         argument++;
263                     }
264                     if (proba<0) proba=0;
265                     if (proba>100) proba=100;
266                     break;
267                 case 'v':
268                     displayLevel = 4;
269                     argument++;
270                     break;
271                 default: ;
272                 }
273             }
274 
275         }
276     }
277 
278     // Get Seed
279     DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION);
280     DISPLAYLEVEL(3, "Seed = %u \n", seed);
281     if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
282 
283     CDG_generate(size, &seed, ((double)proba) / 100);
284 
285     return 0;
286 }
287