1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "Bench.h"
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18 
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22 
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <malloc.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30 
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/Alloc.h"
33 
34 #ifndef _7ZIP_ST
35 #include "../../../Windows/Synchronization.h"
36 #include "../../../Windows/Thread.h"
37 #endif
38 
39 #include "../../../Windows/PropVariant.h"
40 
41 static const UInt32 kUncompressMinBlockSize =
42 #ifdef UNDER_CE
43 1 << 24;
44 #else
45 1 << 26;
46 #endif
47 
48 static const UInt32 kCrcBlockSize =
49 #ifdef UNDER_CE
50 1 << 25;
51 #else
52 1 << 30;
53 #endif
54 
55 static const UInt32 kAdditionalSize = (1 << 16);
56 static const UInt32 kCompressedAdditionalSize = (1 << 10);
57 static const UInt32 kMaxLzmaPropSize = 5;
58 
59 class CBaseRandomGenerator
60 {
61   UInt32 A1;
62   UInt32 A2;
63 public:
CBaseRandomGenerator()64   CBaseRandomGenerator() { Init(); }
Init()65   void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()66   UInt32 GetRnd()
67   {
68     return
69       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
70       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
71   }
72 };
73 
74 class CBenchBuffer
75 {
76 public:
77   size_t BufferSize;
78   Byte *Buffer;
CBenchBuffer()79   CBenchBuffer(): Buffer(0) {}
~CBenchBuffer()80   virtual ~CBenchBuffer() { Free(); }
Free()81   void Free()
82   {
83     ::MidFree(Buffer);
84     Buffer = 0;
85   }
Alloc(size_t bufferSize)86   bool Alloc(size_t bufferSize)
87   {
88     if (Buffer != 0 && BufferSize == bufferSize)
89       return true;
90     Free();
91     Buffer = (Byte *)::MidAlloc(bufferSize);
92     BufferSize = bufferSize;
93     return (Buffer != 0);
94   }
95 };
96 
97 class CBenchRandomGenerator: public CBenchBuffer
98 {
99   CBaseRandomGenerator *RG;
100 public:
Set(CBaseRandomGenerator * rg)101   void Set(CBaseRandomGenerator *rg) { RG = rg; }
GetVal(UInt32 & res,int numBits)102   UInt32 GetVal(UInt32 &res, int numBits)
103   {
104     UInt32 val = res & (((UInt32)1 << numBits) - 1);
105     res >>= numBits;
106     return val;
107   }
GetLen(UInt32 & res)108   UInt32 GetLen(UInt32 &res)
109   {
110     UInt32 len = GetVal(res, 2);
111     return GetVal(res, 1 + len);
112   }
Generate()113   void Generate()
114   {
115     UInt32 pos = 0;
116     UInt32 rep0 = 1;
117     while (pos < BufferSize)
118     {
119       UInt32 res = RG->GetRnd();
120       res >>= 1;
121       if (GetVal(res, 1) == 0 || pos < 1024)
122         Buffer[pos++] = (Byte)(res & 0xFF);
123       else
124       {
125         UInt32 len;
126         len = 1 + GetLen(res);
127         if (GetVal(res, 3) != 0)
128         {
129           len += GetLen(res);
130           do
131           {
132             UInt32 ppp = GetVal(res, 5) + 6;
133             res = RG->GetRnd();
134             if (ppp > 30)
135               continue;
136             rep0 = /* (1 << ppp) +*/  GetVal(res, ppp);
137             res = RG->GetRnd();
138           }
139           while (rep0 >= pos);
140           rep0++;
141         }
142 
143         for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
144           Buffer[pos] = Buffer[pos - rep0];
145       }
146     }
147   }
148 };
149 
150 
151 class CBenchmarkInStream:
152   public ISequentialInStream,
153   public CMyUnknownImp
154 {
155   const Byte *Data;
156   size_t Pos;
157   size_t Size;
158 public:
159   MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)160   void Init(const Byte *data, size_t size)
161   {
162     Data = data;
163     Size = size;
164     Pos = 0;
165   }
166   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
167 };
168 
Read(void * data,UInt32 size,UInt32 * processedSize)169 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
170 {
171   size_t remain = Size - Pos;
172   UInt32 kMaxBlockSize = (1 << 20);
173   if (size > kMaxBlockSize)
174     size = kMaxBlockSize;
175   if (size > remain)
176     size = (UInt32)remain;
177   for (UInt32 i = 0; i < size; i++)
178     ((Byte *)data)[i] = Data[Pos + i];
179   Pos += size;
180   if(processedSize != NULL)
181     *processedSize = size;
182   return S_OK;
183 }
184 
185 class CBenchmarkOutStream:
186   public ISequentialOutStream,
187   public CBenchBuffer,
188   public CMyUnknownImp
189 {
190   // bool _overflow;
191 public:
192   UInt32 Pos;
193   // CBenchmarkOutStream(): _overflow(false) {}
Init()194   void Init()
195   {
196     // _overflow = false;
197     Pos = 0;
198   }
199   MY_UNKNOWN_IMP
200   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
201 };
202 
Write(const void * data,UInt32 size,UInt32 * processedSize)203 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
204 {
205   size_t curSize = BufferSize - Pos;
206   if (curSize > size)
207     curSize = size;
208   memcpy(Buffer + Pos, data, curSize);
209   Pos += (UInt32)curSize;
210   if(processedSize != NULL)
211     *processedSize = (UInt32)curSize;
212   if (curSize != size)
213   {
214     // _overflow = true;
215     return E_FAIL;
216   }
217   return S_OK;
218 }
219 
220 class CCrcOutStream:
221   public ISequentialOutStream,
222   public CMyUnknownImp
223 {
224 public:
225   UInt32 Crc;
226   MY_UNKNOWN_IMP
Init()227   void Init() { Crc = CRC_INIT_VAL; }
228   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
229 };
230 
Write(const void * data,UInt32 size,UInt32 * processedSize)231 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
232 {
233   Crc = CrcUpdate(Crc, data, size);
234   if (processedSize != NULL)
235     *processedSize = size;
236   return S_OK;
237 }
238 
GetTimeCount()239 static UInt64 GetTimeCount()
240 {
241   #ifdef USE_POSIX_TIME
242   #ifdef USE_POSIX_TIME2
243   timeval v;
244   if (gettimeofday(&v, 0) == 0)
245     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
246   return (UInt64)time(NULL) * 1000000;
247   #else
248   return time(NULL);
249   #endif
250   #else
251   /*
252   LARGE_INTEGER value;
253   if (::QueryPerformanceCounter(&value))
254     return value.QuadPart;
255   */
256   return GetTickCount();
257   #endif
258 }
259 
GetFreq()260 static UInt64 GetFreq()
261 {
262   #ifdef USE_POSIX_TIME
263   #ifdef USE_POSIX_TIME2
264   return 1000000;
265   #else
266   return 1;
267   #endif
268   #else
269   /*
270   LARGE_INTEGER value;
271   if (::QueryPerformanceFrequency(&value))
272     return value.QuadPart;
273   */
274   return 1000;
275   #endif
276 }
277 
278 #ifndef USE_POSIX_TIME
GetTime64(const FILETIME & t)279 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
280 #endif
281 
GetUserTime()282 static UInt64 GetUserTime()
283 {
284   #ifdef USE_POSIX_TIME
285   return clock();
286   #else
287   FILETIME creationTime, exitTime, kernelTime, userTime;
288   if (
289   #ifdef UNDER_CE
290     ::GetThreadTimes(::GetCurrentThread()
291   #else
292     ::GetProcessTimes(::GetCurrentProcess()
293   #endif
294     , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
295     return GetTime64(userTime) + GetTime64(kernelTime);
296   return (UInt64)GetTickCount() * 10000;
297   #endif
298 }
299 
GetUserFreq()300 static UInt64 GetUserFreq()
301 {
302   #ifdef USE_POSIX_TIME
303   return CLOCKS_PER_SEC;
304   #else
305   return 10000000;
306   #endif
307 }
308 
309 class CBenchProgressStatus
310 {
311   #ifndef _7ZIP_ST
312   NWindows::NSynchronization::CCriticalSection CS;
313   #endif
314 public:
315   HRESULT Res;
316   bool EncodeMode;
SetResult(HRESULT res)317   void SetResult(HRESULT res)
318   {
319     #ifndef _7ZIP_ST
320     NWindows::NSynchronization::CCriticalSectionLock lock(CS);
321     #endif
322     Res = res;
323   }
GetResult()324   HRESULT GetResult()
325   {
326     #ifndef _7ZIP_ST
327     NWindows::NSynchronization::CCriticalSectionLock lock(CS);
328     #endif
329     return Res;
330   }
331 };
332 
333 class CBenchProgressInfo:
334   public ICompressProgressInfo,
335   public CMyUnknownImp
336 {
337 public:
338   CBenchProgressStatus *Status;
339   CBenchInfo BenchInfo;
340   HRESULT Res;
341   IBenchCallback *callback;
CBenchProgressInfo()342   CBenchProgressInfo(): callback(0) {}
343   MY_UNKNOWN_IMP
344   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
345 };
346 
SetStartTime(CBenchInfo & bi)347 static void SetStartTime(CBenchInfo &bi)
348 {
349   bi.GlobalFreq = GetFreq();
350   bi.UserFreq = GetUserFreq();
351   bi.GlobalTime = ::GetTimeCount();
352   bi.UserTime = ::GetUserTime();
353 }
354 
SetFinishTime(const CBenchInfo & biStart,CBenchInfo & dest)355 static void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
356 {
357   dest.GlobalFreq = GetFreq();
358   dest.UserFreq = GetUserFreq();
359   dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
360   dest.UserTime = ::GetUserTime() - biStart.UserTime;
361 }
362 
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)363 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
364 {
365   HRESULT res = Status->GetResult();
366   if (res != S_OK)
367     return res;
368   if (!callback)
369     return res;
370   CBenchInfo info = BenchInfo;
371   SetFinishTime(BenchInfo, info);
372   if (Status->EncodeMode)
373   {
374     info.UnpackSize = *inSize;
375     info.PackSize = *outSize;
376     res = callback->SetEncodeResult(info, false);
377   }
378   else
379   {
380     info.PackSize = BenchInfo.PackSize + *inSize;
381     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
382     res = callback->SetDecodeResult(info, false);
383   }
384   if (res != S_OK)
385     Status->SetResult(res);
386   return res;
387 }
388 
389 static const int kSubBits = 8;
390 
GetLogSize(UInt32 size)391 static UInt32 GetLogSize(UInt32 size)
392 {
393   for (int i = kSubBits; i < 32; i++)
394     for (UInt32 j = 0; j < (1 << kSubBits); j++)
395       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
396         return (i << kSubBits) + j;
397   return (32 << kSubBits);
398 }
399 
NormalizeVals(UInt64 & v1,UInt64 & v2)400 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
401 {
402   while (v1 > 1000000)
403   {
404     v1 >>= 1;
405     v2 >>= 1;
406   }
407 }
408 
GetUsage(const CBenchInfo & info)409 UInt64 GetUsage(const CBenchInfo &info)
410 {
411   UInt64 userTime = info.UserTime;
412   UInt64 userFreq = info.UserFreq;
413   UInt64 globalTime = info.GlobalTime;
414   UInt64 globalFreq = info.GlobalFreq;
415   NormalizeVals(userTime, userFreq);
416   NormalizeVals(globalFreq, globalTime);
417   if (userFreq == 0)
418     userFreq = 1;
419   if (globalTime == 0)
420     globalTime = 1;
421   return userTime * globalFreq * 1000000 / userFreq / globalTime;
422 }
423 
GetRatingPerUsage(const CBenchInfo & info,UInt64 rating)424 UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
425 {
426   UInt64 userTime = info.UserTime;
427   UInt64 userFreq = info.UserFreq;
428   UInt64 globalTime = info.GlobalTime;
429   UInt64 globalFreq = info.GlobalFreq;
430   NormalizeVals(userFreq, userTime);
431   NormalizeVals(globalTime, globalFreq);
432   if (globalFreq == 0)
433     globalFreq = 1;
434   if (userTime == 0)
435     userTime = 1;
436   return userFreq * globalTime / globalFreq *  rating / userTime;
437 }
438 
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)439 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
440 {
441   UInt64 elTime = elapsedTime;
442   NormalizeVals(freq, elTime);
443   if (elTime == 0)
444     elTime = 1;
445   return value * freq / elTime;
446 }
447 
GetCompressRating(UInt32 dictionarySize,UInt64 elapsedTime,UInt64 freq,UInt64 size)448 UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
449 {
450   UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
451   UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits));
452   UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
453   return MyMultDiv64(numCommands, elapsedTime, freq);
454 }
455 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt32 numIterations)456 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
457 {
458   UInt64 numCommands = (inSize * 200 + outSize * 4) * numIterations;
459   return MyMultDiv64(numCommands, elapsedTime, freq);
460 }
461 
462 struct CEncoderInfo;
463 
464 struct CEncoderInfo
465 {
466   #ifndef _7ZIP_ST
467   NWindows::CThread thread[2];
468   #endif
469   CMyComPtr<ICompressCoder> encoder;
470   CBenchProgressInfo *progressInfoSpec[2];
471   CMyComPtr<ICompressProgressInfo> progressInfo[2];
472   UInt32 NumIterations;
473   #ifdef USE_ALLOCA
474   size_t AllocaSize;
475   #endif
476 
477   struct CDecoderInfo
478   {
479     CEncoderInfo *Encoder;
480     UInt32 DecoderIndex;
481     #ifdef USE_ALLOCA
482     size_t AllocaSize;
483     #endif
484     bool CallbackMode;
485   };
486   CDecoderInfo decodersInfo[2];
487 
488   CMyComPtr<ICompressCoder> decoders[2];
489   HRESULT Results[2];
490   CBenchmarkOutStream *outStreamSpec;
491   CMyComPtr<ISequentialOutStream> outStream;
492   IBenchCallback *callback;
493   UInt32 crc;
494   UInt32 kBufferSize;
495   UInt32 compressedSize;
496   CBenchRandomGenerator rg;
497   CBenchmarkOutStream *propStreamSpec;
498   CMyComPtr<ISequentialOutStream> propStream;
499   HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
500   HRESULT Encode();
501   HRESULT Decode(UInt32 decoderIndex);
502 
CEncoderInfoCEncoderInfo503   CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
504 
505   #ifndef _7ZIP_ST
EncodeThreadFunctionCEncoderInfo506   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
507   {
508     CEncoderInfo *encoder = (CEncoderInfo *)param;
509     #ifdef USE_ALLOCA
510     alloca(encoder->AllocaSize);
511     #endif
512     HRESULT res = encoder->Encode();
513     encoder->Results[0] = res;
514     if (res != S_OK)
515       encoder->progressInfoSpec[0]->Status->SetResult(res);
516 
517     return 0;
518   }
DecodeThreadFunctionCEncoderInfo519   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
520   {
521     CDecoderInfo *decoder = (CDecoderInfo *)param;
522     #ifdef USE_ALLOCA
523     alloca(decoder->AllocaSize);
524     #endif
525     CEncoderInfo *encoder = decoder->Encoder;
526     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
527     return 0;
528   }
529 
CreateEncoderThreadCEncoderInfo530   HRESULT CreateEncoderThread()
531   {
532     return thread[0].Create(EncodeThreadFunction, this);
533   }
534 
CreateDecoderThreadCEncoderInfo535   HRESULT CreateDecoderThread(int index, bool callbackMode
536       #ifdef USE_ALLOCA
537       , size_t allocaSize
538       #endif
539       )
540   {
541     CDecoderInfo &decoder = decodersInfo[index];
542     decoder.DecoderIndex = index;
543     decoder.Encoder = this;
544     #ifdef USE_ALLOCA
545     decoder.AllocaSize = allocaSize;
546     #endif
547     decoder.CallbackMode = callbackMode;
548     return thread[index].Create(DecodeThreadFunction, &decoder);
549   }
550   #endif
551 };
552 
Init(UInt32 dictionarySize,UInt32 numThreads,CBaseRandomGenerator * rgLoc)553 HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
554 {
555   rg.Set(rgLoc);
556   kBufferSize = dictionarySize + kAdditionalSize;
557   UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
558   if (!rg.Alloc(kBufferSize))
559     return E_OUTOFMEMORY;
560   rg.Generate();
561   crc = CrcCalc(rg.Buffer, rg.BufferSize);
562 
563   outStreamSpec = new CBenchmarkOutStream;
564   if (!outStreamSpec->Alloc(kCompressedBufferSize))
565     return E_OUTOFMEMORY;
566 
567   outStream = outStreamSpec;
568 
569   propStreamSpec = 0;
570   if (!propStream)
571   {
572     propStreamSpec = new CBenchmarkOutStream;
573     propStream = propStreamSpec;
574   }
575   if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
576     return E_OUTOFMEMORY;
577   propStreamSpec->Init();
578 
579   PROPID propIDs[] =
580   {
581     NCoderPropID::kDictionarySize,
582     NCoderPropID::kNumThreads
583   };
584   const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
585   PROPVARIANT props[kNumProps];
586   props[0].vt = VT_UI4;
587   props[0].ulVal = dictionarySize;
588 
589   props[1].vt = VT_UI4;
590   props[1].ulVal = numThreads;
591 
592   {
593     CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
594     RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
595     if (!setCoderProperties)
596       return E_FAIL;
597     RINOK(setCoderProperties->SetCoderProperties(propIDs, props, kNumProps));
598 
599     CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
600     encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
601     if (writeCoderProperties)
602     {
603       RINOK(writeCoderProperties->WriteCoderProperties(propStream));
604     }
605   }
606   return S_OK;
607 }
608 
Encode()609 HRESULT CEncoderInfo::Encode()
610 {
611   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
612   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
613   inStreamSpec->Init(rg.Buffer, rg.BufferSize);
614   outStreamSpec->Init();
615 
616   RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
617   compressedSize = outStreamSpec->Pos;
618   encoder.Release();
619   return S_OK;
620 }
621 
Decode(UInt32 decoderIndex)622 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
623 {
624   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
625   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
626   CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
627 
628   CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
629   decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
630   if (!compressSetDecoderProperties)
631     return E_FAIL;
632 
633   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
634   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
635 
636   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
637   pi->BenchInfo.UnpackSize = 0;
638   pi->BenchInfo.PackSize = 0;
639 
640   for (UInt32 j = 0; j < NumIterations; j++)
641   {
642     inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
643     crcOutStreamSpec->Init();
644 
645     RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
646     UInt64 outSize = kBufferSize;
647     RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
648     if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
649       return S_FALSE;
650     pi->BenchInfo.UnpackSize += kBufferSize;
651     pi->BenchInfo.PackSize += compressedSize;
652   }
653   decoder.Release();
654   return S_OK;
655 }
656 
657 static const UInt32 kNumThreadsMax = (1 << 16);
658 
659 struct CBenchEncoders
660 {
661   CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders662   CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders663   ~CBenchEncoders() { delete []encoders; }
664 };
665 
LzmaBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt32 numThreads,UInt32 dictionarySize,IBenchCallback * callback)666 HRESULT LzmaBench(
667   DECL_EXTERNAL_CODECS_LOC_VARS
668   UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
669 {
670   UInt32 numEncoderThreads =
671     #ifndef _7ZIP_ST
672     (numThreads > 1 ? numThreads / 2 : 1);
673     #else
674     1;
675     #endif
676   UInt32 numSubDecoderThreads =
677     #ifndef _7ZIP_ST
678     (numThreads > 1 ? 2 : 1);
679     #else
680     1;
681     #endif
682   if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
683   {
684     return E_INVALIDARG;
685   }
686 
687   CBenchEncoders encodersSpec(numEncoderThreads);
688   CEncoderInfo *encoders = encodersSpec.encoders;
689 
690 
691   UInt32 i;
692   for (i = 0; i < numEncoderThreads; i++)
693   {
694     CEncoderInfo &encoder = encoders[i];
695     encoder.callback = (i == 0) ? callback : 0;
696 
697     const UInt32 kLzmaId = 0x030101;
698     RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS kLzmaId, encoder.encoder, true));
699     if (!encoder.encoder)
700       return E_NOTIMPL;
701     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
702     {
703       RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS kLzmaId, encoder.decoders[j], false));
704       if (!encoder.decoders[j])
705         return E_NOTIMPL;
706     }
707   }
708 
709   CBaseRandomGenerator rg;
710   rg.Init();
711   for (i = 0; i < numEncoderThreads; i++)
712   {
713     RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
714   }
715 
716   CBenchProgressStatus status;
717   status.Res = S_OK;
718   status.EncodeMode = true;
719 
720   for (i = 0; i < numEncoderThreads; i++)
721   {
722     CEncoderInfo &encoder = encoders[i];
723     for (int j = 0; j < 2; j++)
724     {
725       encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
726       encoder.progressInfoSpec[j]->Status = &status;
727     }
728     if (i == 0)
729     {
730       encoder.progressInfoSpec[0]->callback = callback;
731       encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
732       SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
733     }
734 
735     #ifndef _7ZIP_ST
736     if (numEncoderThreads > 1)
737     {
738       #ifdef USE_ALLOCA
739       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
740       #endif
741       RINOK(encoder.CreateEncoderThread())
742     }
743     else
744     #endif
745     {
746       RINOK(encoder.Encode());
747     }
748   }
749   #ifndef _7ZIP_ST
750   if (numEncoderThreads > 1)
751     for (i = 0; i < numEncoderThreads; i++)
752       encoders[i].thread[0].Wait();
753   #endif
754 
755   RINOK(status.Res);
756 
757   CBenchInfo info;
758 
759   SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
760   info.UnpackSize = 0;
761   info.PackSize = 0;
762   info.NumIterations = 1; // progressInfoSpec->NumIterations;
763   for (i = 0; i < numEncoderThreads; i++)
764   {
765     CEncoderInfo &encoder = encoders[i];
766     info.UnpackSize += encoder.kBufferSize;
767     info.PackSize += encoder.compressedSize;
768   }
769   RINOK(callback->SetEncodeResult(info, true));
770 
771 
772   status.Res = S_OK;
773   status.EncodeMode = false;
774 
775   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
776   for (i = 0; i < numEncoderThreads; i++)
777   {
778     CEncoderInfo &encoder = encoders[i];
779     encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
780 
781     if (i == 0)
782     {
783       encoder.progressInfoSpec[0]->callback = callback;
784       encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
785       SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
786     }
787 
788     #ifndef _7ZIP_ST
789     if (numDecoderThreads > 1)
790     {
791       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
792       {
793         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
794             #ifdef USE_ALLOCA
795             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
796             #endif
797             );
798         RINOK(res);
799       }
800     }
801     else
802     #endif
803     {
804       RINOK(encoder.Decode(0));
805     }
806   }
807   #ifndef _7ZIP_ST
808   HRESULT res = S_OK;
809   if (numDecoderThreads > 1)
810     for (i = 0; i < numEncoderThreads; i++)
811       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
812       {
813         CEncoderInfo &encoder = encoders[i];
814         encoder.thread[j].Wait();
815         if (encoder.Results[j] != S_OK)
816           res = encoder.Results[j];
817       }
818   RINOK(res);
819   #endif
820   RINOK(status.Res);
821   SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
822   #ifndef _7ZIP_ST
823   #ifdef UNDER_CE
824   if (numDecoderThreads > 1)
825     for (i = 0; i < numEncoderThreads; i++)
826       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
827       {
828         FILETIME creationTime, exitTime, kernelTime, userTime;
829         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
830           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
831       }
832   #endif
833   #endif
834   info.UnpackSize = 0;
835   info.PackSize = 0;
836   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
837   for (i = 0; i < numEncoderThreads; i++)
838   {
839     CEncoderInfo &encoder = encoders[i];
840     info.UnpackSize += encoder.kBufferSize;
841     info.PackSize += encoder.compressedSize;
842   }
843   RINOK(callback->SetDecodeResult(info, false));
844   RINOK(callback->SetDecodeResult(info, true));
845   return S_OK;
846 }
847 
848 
GetLZMAUsage(bool multiThread,UInt32 dictionary)849 inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
850 {
851   UInt32 hs = dictionary - 1;
852   hs |= (hs >> 1);
853   hs |= (hs >> 2);
854   hs |= (hs >> 4);
855   hs |= (hs >> 8);
856   hs >>= 1;
857   hs |= 0xFFFF;
858   if (hs > (1 << 24))
859     hs >>= 1;
860   hs++;
861   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
862       (1 << 20) + (multiThread ? (6 << 20) : 0);
863 }
864 
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary)865 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
866 {
867   const UInt32 kBufferSize = dictionary;
868   const UInt32 kCompressedBufferSize = (kBufferSize / 2);
869   UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
870   UInt32 numBigThreads = numThreads / numSubThreads;
871   return (kBufferSize + kCompressedBufferSize +
872     GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
873 }
874 
CrcBig(const void * data,UInt32 size,UInt32 numCycles,UInt32 crcBase)875 static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
876 {
877   for (UInt32 i = 0; i < numCycles; i++)
878     if (CrcCalc(data, size) != crcBase)
879       return false;
880   return true;
881 }
882 
883 #ifndef _7ZIP_ST
884 struct CCrcInfo
885 {
886   NWindows::CThread Thread;
887   const Byte *Data;
888   UInt32 Size;
889   UInt32 NumCycles;
890   UInt32 Crc;
891   bool Res;
WaitCCrcInfo892   void Wait()
893   {
894     Thread.Wait();
895     Thread.Close();
896   }
897 };
898 
CrcThreadFunction(void * param)899 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
900 {
901   CCrcInfo *p = (CCrcInfo *)param;
902   p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
903   return 0;
904 }
905 
906 struct CCrcThreads
907 {
908   UInt32 NumThreads;
909   CCrcInfo *Items;
CCrcThreadsCCrcThreads910   CCrcThreads(): Items(0), NumThreads(0) {}
WaitAllCCrcThreads911   void WaitAll()
912   {
913     for (UInt32 i = 0; i < NumThreads; i++)
914       Items[i].Wait();
915     NumThreads = 0;
916   }
~CCrcThreadsCCrcThreads917   ~CCrcThreads()
918   {
919     WaitAll();
920     delete []Items;
921   }
922 };
923 #endif
924 
CrcCalc1(const Byte * buf,UInt32 size)925 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
926 {
927   UInt32 crc = CRC_INIT_VAL;;
928   for (UInt32 i = 0; i < size; i++)
929     crc = CRC_UPDATE_BYTE(crc, buf[i]);
930   return CRC_GET_DIGEST(crc);
931 }
932 
RandGen(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)933 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
934 {
935   for (UInt32 i = 0; i < size; i++)
936     buf[i] = (Byte)RG.GetRnd();
937 }
938 
RandGenCrc(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)939 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
940 {
941   RandGen(buf, size, RG);
942   return CrcCalc1(buf, size);
943 }
944 
CrcInternalTest()945 bool CrcInternalTest()
946 {
947   CBenchBuffer buffer;
948   const UInt32 kBufferSize0 = (1 << 8);
949   const UInt32 kBufferSize1 = (1 << 10);
950   const UInt32 kCheckSize = (1 << 5);
951   if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
952     return false;
953   Byte *buf = buffer.Buffer;
954   UInt32 i;
955   for (i = 0; i < kBufferSize0; i++)
956     buf[i] = (Byte)i;
957   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
958   if (crc1 != 0x29058C73)
959     return false;
960   CBaseRandomGenerator RG;
961   RandGen(buf + kBufferSize0, kBufferSize1, RG);
962   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
963     for (UInt32 j = 0; j < kCheckSize; j++)
964       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
965         return false;
966   return true;
967 }
968 
CrcBench(UInt32 numThreads,UInt32 bufferSize,UInt64 & speed)969 HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
970 {
971   if (numThreads == 0)
972     numThreads = 1;
973 
974   CBenchBuffer buffer;
975   size_t totalSize = (size_t)bufferSize * numThreads;
976   if (totalSize / numThreads != bufferSize)
977     return E_OUTOFMEMORY;
978   if (!buffer.Alloc(totalSize))
979     return E_OUTOFMEMORY;
980 
981   Byte *buf = buffer.Buffer;
982   CBaseRandomGenerator RG;
983   UInt32 numCycles = (kCrcBlockSize) / ((bufferSize >> 2) + 1) + 1;
984 
985   UInt64 timeVal;
986   #ifndef _7ZIP_ST
987   CCrcThreads threads;
988   if (numThreads > 1)
989   {
990     threads.Items = new CCrcInfo[numThreads];
991     UInt32 i;
992     for (i = 0; i < numThreads; i++)
993     {
994       CCrcInfo &info = threads.Items[i];
995       Byte *data = buf + (size_t)bufferSize * i;
996       info.Data = data;
997       info.NumCycles = numCycles;
998       info.Size = bufferSize;
999       info.Crc = RandGenCrc(data, bufferSize, RG);
1000     }
1001     timeVal = GetTimeCount();
1002     for (i = 0; i < numThreads; i++)
1003     {
1004       CCrcInfo &info = threads.Items[i];
1005       RINOK(info.Thread.Create(CrcThreadFunction, &info));
1006       threads.NumThreads++;
1007     }
1008     threads.WaitAll();
1009     for (i = 0; i < numThreads; i++)
1010       if (!threads.Items[i].Res)
1011         return S_FALSE;
1012   }
1013   else
1014   #endif
1015   {
1016     UInt32 crc = RandGenCrc(buf, bufferSize, RG);
1017     timeVal = GetTimeCount();
1018     if (!CrcBig(buf, bufferSize, numCycles, crc))
1019       return S_FALSE;
1020   }
1021   timeVal = GetTimeCount() - timeVal;
1022   if (timeVal == 0)
1023     timeVal = 1;
1024 
1025   UInt64 size = (UInt64)numCycles * totalSize;
1026   speed = MyMultDiv64(size, timeVal, GetFreq());
1027   return S_OK;
1028 }
1029