1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 #include <stdio.h>
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18 
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22 
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <malloc.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30 
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/Alloc.h"
33 #include "../../../../C/CpuArch.h"
34 
35 #ifndef _7ZIP_ST
36 #include "../../../Windows/Synchronization.h"
37 #include "../../../Windows/Thread.h"
38 #endif
39 
40 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
41 #define USE_WIN_FILE
42 #endif
43 
44 #ifdef USE_WIN_FILE
45 #include "../../../Windows/FileIO.h"
46 #endif
47 
48 
49 #include "../../../Common/IntToString.h"
50 #include "../../../Common/StringConvert.h"
51 #include "../../../Common/StringToInt.h"
52 
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamUtils.h"
55 
56 #include "Bench.h"
57 
58 using namespace NWindows;
59 
60 static const UInt32 k_LZMA = 0x030101;
61 
62 static const UInt64 kComplexInCommands = (UInt64)1 <<
63   #ifdef UNDER_CE
64     31;
65   #else
66     34;
67   #endif
68 
69 static const UInt32 kComplexInSeconds = 4;
70 
SetComplexCommands(UInt32 complexInSeconds,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)71 static void SetComplexCommands(UInt32 complexInSeconds,
72     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
73 {
74   complexInCommands = kComplexInCommands;
75   const UInt64 kMinFreq = (UInt64)1000000 * 4;
76   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
77   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
78     cpuFreq = kMinFreq;
79   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
80   {
81     if (complexInSeconds != 0)
82       complexInCommands = complexInSeconds * cpuFreq;
83     else
84       complexInCommands = cpuFreq >> 2;
85   }
86 }
87 
88 static const unsigned kNumHashDictBits = 17;
89 static const UInt32 kFilterUnpackSize = (48 << 10);
90 
91 static const unsigned kOldLzmaDictBits = 30;
92 
93 static const UInt32 kAdditionalSize = (1 << 16);
94 static const UInt32 kCompressedAdditionalSize = (1 << 10);
95 static const UInt32 kMaxLzmaPropSize = 5;
96 
97 class CBaseRandomGenerator
98 {
99   UInt32 A1;
100   UInt32 A2;
101 public:
CBaseRandomGenerator()102   CBaseRandomGenerator() { Init(); }
Init()103   void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()104   UInt32 GetRnd()
105   {
106     return
107       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
108       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
109   }
110 };
111 
112 
113 static const unsigned kBufferAlignment = 1 << 4;
114 
115 struct CBenchBuffer
116 {
117   size_t BufferSize;
118 
119   #ifdef _WIN32
120 
121   Byte *Buffer;
122 
CBenchBufferCBenchBuffer123   CBenchBuffer(): BufferSize(0), Buffer(NULL) {}
~CBenchBufferCBenchBuffer124   ~CBenchBuffer() { ::MidFree(Buffer); }
125 
AllocAlignedMaskCBenchBuffer126   void AllocAlignedMask(size_t size, size_t)
127   {
128     ::MidFree(Buffer);
129     BufferSize = 0;
130     Buffer = (Byte *)::MidAlloc(size);
131     if (Buffer)
132       BufferSize = size;
133   }
134 
135   #else
136 
137   Byte *Buffer;
138   Byte *_bufBase;
139 
CBenchBufferCBenchBuffer140   CBenchBuffer(): BufferSize(0), Buffer(NULL), _bufBase(NULL){}
~CBenchBufferCBenchBuffer141   ~CBenchBuffer() { ::MidFree(_bufBase); }
142 
AllocAlignedMaskCBenchBuffer143   void AllocAlignedMask(size_t size, size_t alignMask)
144   {
145     ::MidFree(_bufBase);
146     Buffer = NULL;
147     BufferSize = 0;
148     _bufBase = (Byte *)::MidAlloc(size + alignMask);
149 
150     if (_bufBase)
151     {
152       // Buffer = (Byte *)(((uintptr_t)_bufBase + alignMask) & ~(uintptr_t)alignMask);
153          Buffer = (Byte *)(((ptrdiff_t)_bufBase + alignMask) & ~(ptrdiff_t)alignMask);
154       BufferSize = size;
155     }
156   }
157 
158   #endif
159 
AllocCBenchBuffer160   bool Alloc(size_t size)
161   {
162     if (Buffer && BufferSize == size)
163       return true;
164     AllocAlignedMask(size, kBufferAlignment - 1);
165     return (Buffer != NULL || size == 0);
166   }
167 };
168 
169 
170 class CBenchRandomGenerator: public CBenchBuffer
171 {
GetVal(UInt32 & res,unsigned numBits)172   static UInt32 GetVal(UInt32 &res, unsigned numBits)
173   {
174     UInt32 val = res & (((UInt32)1 << numBits) - 1);
175     res >>= numBits;
176     return val;
177   }
178 
GetLen(UInt32 & r)179   static UInt32 GetLen(UInt32 &r)
180   {
181     UInt32 len = GetVal(r, 2);
182     return GetVal(r, 1 + len);
183   }
184 
185 public:
186 
GenerateSimpleRandom(CBaseRandomGenerator * _RG_)187   void GenerateSimpleRandom(CBaseRandomGenerator *_RG_)
188   {
189     CBaseRandomGenerator rg = *_RG_;
190     const size_t bufSize = BufferSize;
191     Byte *buf = Buffer;
192     for (size_t i = 0; i < bufSize; i++)
193       buf[i] = (Byte)rg.GetRnd();
194     *_RG_ = rg;
195   }
196 
GenerateLz(unsigned dictBits,CBaseRandomGenerator * _RG_)197   void GenerateLz(unsigned dictBits, CBaseRandomGenerator *_RG_)
198   {
199     CBaseRandomGenerator rg = *_RG_;
200     UInt32 pos = 0;
201     UInt32 rep0 = 1;
202     const size_t bufSize = BufferSize;
203     Byte *buf = Buffer;
204     unsigned posBits = 1;
205 
206     while (pos < bufSize)
207     {
208       UInt32 r = rg.GetRnd();
209       if (GetVal(r, 1) == 0 || pos < 1024)
210         buf[pos++] = (Byte)(r & 0xFF);
211       else
212       {
213         UInt32 len;
214         len = 1 + GetLen(r);
215 
216         if (GetVal(r, 3) != 0)
217         {
218           len += GetLen(r);
219 
220           while (((UInt32)1 << posBits) < pos)
221             posBits++;
222 
223           unsigned numBitsMax = dictBits;
224           if (numBitsMax > posBits)
225             numBitsMax = posBits;
226 
227           const unsigned kAddBits = 6;
228           unsigned numLogBits = 5;
229           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
230             numLogBits = 4;
231 
232           for (;;)
233           {
234             UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
235             r = rg.GetRnd();
236             if (ppp > numBitsMax)
237               continue;
238             rep0 = GetVal(r, ppp);
239             if (rep0 < pos)
240               break;
241             r = rg.GetRnd();
242           }
243           rep0++;
244         }
245 
246         {
247           UInt32 rem = (UInt32)bufSize - pos;
248           if (len > rem)
249             len = rem;
250         }
251         Byte *dest = buf + pos;
252         const Byte *src = dest - rep0;
253         pos += len;
254         for (UInt32 i = 0; i < len; i++)
255           *dest++ = *src++;
256       }
257     }
258 
259     *_RG_ = rg;
260   }
261 };
262 
263 
264 class CBenchmarkInStream:
265   public ISequentialInStream,
266   public CMyUnknownImp
267 {
268   const Byte *Data;
269   size_t Pos;
270   size_t Size;
271 public:
272   MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)273   void Init(const Byte *data, size_t size)
274   {
275     Data = data;
276     Size = size;
277     Pos = 0;
278   }
279   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
280 };
281 
Read(void * data,UInt32 size,UInt32 * processedSize)282 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
283 {
284   size_t remain = Size - Pos;
285   UInt32 kMaxBlockSize = (1 << 20);
286   if (size > kMaxBlockSize)
287     size = kMaxBlockSize;
288   if (size > remain)
289     size = (UInt32)remain;
290   for (UInt32 i = 0; i < size; i++)
291     ((Byte *)data)[i] = Data[Pos + i];
292   Pos += size;
293   if (processedSize)
294     *processedSize = size;
295   return S_OK;
296 }
297 
298 class CBenchmarkOutStream:
299   public ISequentialOutStream,
300   public CBenchBuffer,
301   public CMyUnknownImp
302 {
303   // bool _overflow;
304 public:
305   size_t Pos;
306   bool RealCopy;
307   bool CalcCrc;
308   UInt32 Crc;
309 
310   // CBenchmarkOutStream(): _overflow(false) {}
Init(bool realCopy,bool calcCrc)311   void Init(bool realCopy, bool calcCrc)
312   {
313     Crc = CRC_INIT_VAL;
314     RealCopy = realCopy;
315     CalcCrc = calcCrc;
316     // _overflow = false;
317     Pos = 0;
318   }
319 
320   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
321 
322   MY_UNKNOWN_IMP
323   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
324 };
325 
Write(const void * data,UInt32 size,UInt32 * processedSize)326 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
327 {
328   size_t curSize = BufferSize - Pos;
329   if (curSize > size)
330     curSize = size;
331   if (curSize != 0)
332   {
333     if (RealCopy)
334       memcpy(Buffer + Pos, data, curSize);
335     if (CalcCrc)
336       Crc = CrcUpdate(Crc, data, curSize);
337     Pos += curSize;
338   }
339   if (processedSize)
340     *processedSize = (UInt32)curSize;
341   if (curSize != size)
342   {
343     // _overflow = true;
344     return E_FAIL;
345   }
346   return S_OK;
347 }
348 
349 class CCrcOutStream:
350   public ISequentialOutStream,
351   public CMyUnknownImp
352 {
353 public:
354   bool CalcCrc;
355   UInt32 Crc;
356   MY_UNKNOWN_IMP
357 
CCrcOutStream()358   CCrcOutStream(): CalcCrc(true) {};
Init()359   void Init() { Crc = CRC_INIT_VAL; }
360   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
361 };
362 
Write(const void * data,UInt32 size,UInt32 * processedSize)363 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
364 {
365   if (CalcCrc)
366     Crc = CrcUpdate(Crc, data, size);
367   if (processedSize)
368     *processedSize = size;
369   return S_OK;
370 }
371 
GetTimeCount()372 static UInt64 GetTimeCount()
373 {
374   #ifdef USE_POSIX_TIME
375   #ifdef USE_POSIX_TIME2
376   timeval v;
377   if (gettimeofday(&v, 0) == 0)
378     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
379   return (UInt64)time(NULL) * 1000000;
380   #else
381   return time(NULL);
382   #endif
383   #else
384   /*
385   LARGE_INTEGER value;
386   if (::QueryPerformanceCounter(&value))
387     return value.QuadPart;
388   */
389   return GetTickCount();
390   #endif
391 }
392 
GetFreq()393 static UInt64 GetFreq()
394 {
395   #ifdef USE_POSIX_TIME
396   #ifdef USE_POSIX_TIME2
397   return 1000000;
398   #else
399   return 1;
400   #endif
401   #else
402   /*
403   LARGE_INTEGER value;
404   if (::QueryPerformanceFrequency(&value))
405     return value.QuadPart;
406   */
407   return 1000;
408   #endif
409 }
410 
411 #ifdef USE_POSIX_TIME
412 
413 struct CUserTime
414 {
415   UInt64 Sum;
416   clock_t Prev;
417 
InitCUserTime418   void Init()
419   {
420     Prev = clock();
421     Sum = 0;
422   }
423 
GetUserTimeCUserTime424   UInt64 GetUserTime()
425   {
426     clock_t v = clock();
427     Sum += v - Prev;
428     Prev = v;
429     return Sum;
430   }
431 };
432 
433 #else
434 
GetTime64(const FILETIME & t)435 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
GetWinUserTime()436 UInt64 GetWinUserTime()
437 {
438   FILETIME creationTime, exitTime, kernelTime, userTime;
439   if (
440   #ifdef UNDER_CE
441     ::GetThreadTimes(::GetCurrentThread()
442   #else
443     ::GetProcessTimes(::GetCurrentProcess()
444   #endif
445     , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
446     return GetTime64(userTime) + GetTime64(kernelTime);
447   return (UInt64)GetTickCount() * 10000;
448 }
449 
450 struct CUserTime
451 {
452   UInt64 StartTime;
453 
InitCUserTime454   void Init() { StartTime = GetWinUserTime(); }
GetUserTimeCUserTime455   UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
456 };
457 
458 #endif
459 
GetUserFreq()460 static UInt64 GetUserFreq()
461 {
462   #ifdef USE_POSIX_TIME
463   return CLOCKS_PER_SEC;
464   #else
465   return 10000000;
466   #endif
467 }
468 
469 class CBenchProgressStatus
470 {
471   #ifndef _7ZIP_ST
472   NSynchronization::CCriticalSection CS;
473   #endif
474 public:
475   HRESULT Res;
476   bool EncodeMode;
SetResult(HRESULT res)477   void SetResult(HRESULT res)
478   {
479     #ifndef _7ZIP_ST
480     NSynchronization::CCriticalSectionLock lock(CS);
481     #endif
482     Res = res;
483   }
GetResult()484   HRESULT GetResult()
485   {
486     #ifndef _7ZIP_ST
487     NSynchronization::CCriticalSectionLock lock(CS);
488     #endif
489     return Res;
490   }
491 };
492 
493 struct CBenchInfoCalc
494 {
495   CBenchInfo BenchInfo;
496   CUserTime UserTime;
497 
498   void SetStartTime();
499   void SetFinishTime(CBenchInfo &dest);
500 };
501 
SetStartTime()502 void CBenchInfoCalc::SetStartTime()
503 {
504   BenchInfo.GlobalFreq = GetFreq();
505   BenchInfo.UserFreq = GetUserFreq();
506   BenchInfo.GlobalTime = ::GetTimeCount();
507   BenchInfo.UserTime = 0;
508   UserTime.Init();
509 }
510 
SetFinishTime(CBenchInfo & dest)511 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
512 {
513   dest = BenchInfo;
514   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
515   dest.UserTime = UserTime.GetUserTime();
516 }
517 
518 class CBenchProgressInfo:
519   public ICompressProgressInfo,
520   public CMyUnknownImp,
521   public CBenchInfoCalc
522 {
523 public:
524   CBenchProgressStatus *Status;
525   IBenchCallback *Callback;
526 
CBenchProgressInfo()527   CBenchProgressInfo(): Callback(NULL) {}
528   MY_UNKNOWN_IMP
529   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
530 };
531 
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)532 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
533 {
534   HRESULT res = Status->GetResult();
535   if (res != S_OK)
536     return res;
537   if (!Callback)
538     return res;
539   CBenchInfo info;
540   SetFinishTime(info);
541   if (Status->EncodeMode)
542   {
543     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
544     info.PackSize = BenchInfo.PackSize + *outSize;
545     res = Callback->SetEncodeResult(info, false);
546   }
547   else
548   {
549     info.PackSize = BenchInfo.PackSize + *inSize;
550     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
551     res = Callback->SetDecodeResult(info, false);
552   }
553   if (res != S_OK)
554     Status->SetResult(res);
555   return res;
556 }
557 
558 static const unsigned kSubBits = 8;
559 
GetLogSize(UInt32 size)560 static UInt32 GetLogSize(UInt32 size)
561 {
562   for (unsigned i = kSubBits; i < 32; i++)
563     for (UInt32 j = 0; j < (1 << kSubBits); j++)
564       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
565         return (i << kSubBits) + j;
566   return (32 << kSubBits);
567 }
568 
NormalizeVals(UInt64 & v1,UInt64 & v2)569 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
570 {
571   while (v1 > 1000000)
572   {
573     v1 >>= 1;
574     v2 >>= 1;
575   }
576 }
577 
GetUsage() const578 UInt64 CBenchInfo::GetUsage() const
579 {
580   UInt64 userTime = UserTime;
581   UInt64 userFreq = UserFreq;
582   UInt64 globalTime = GlobalTime;
583   UInt64 globalFreq = GlobalFreq;
584   NormalizeVals(userTime, userFreq);
585   NormalizeVals(globalFreq, globalTime);
586   if (userFreq == 0)
587     userFreq = 1;
588   if (globalTime == 0)
589     globalTime = 1;
590   return userTime * globalFreq * 1000000 / userFreq / globalTime;
591 }
592 
GetRatingPerUsage(UInt64 rating) const593 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
594 {
595   UInt64 userTime = UserTime;
596   UInt64 userFreq = UserFreq;
597   UInt64 globalTime = GlobalTime;
598   UInt64 globalFreq = GlobalFreq;
599   NormalizeVals(userFreq, userTime);
600   NormalizeVals(globalTime, globalFreq);
601   if (globalFreq == 0)
602     globalFreq = 1;
603   if (userTime == 0)
604     userTime = 1;
605   return userFreq * globalTime / globalFreq * rating / userTime;
606 }
607 
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)608 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
609 {
610   UInt64 elTime = elapsedTime;
611   NormalizeVals(freq, elTime);
612   if (elTime == 0)
613     elTime = 1;
614   return value * freq / elTime;
615 }
616 
GetSpeed(UInt64 numCommands) const617 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
618 {
619   return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
620 }
621 
622 struct CBenchProps
623 {
624   bool LzmaRatingMode;
625 
626   UInt32 EncComplex;
627   UInt32 DecComplexCompr;
628   UInt32 DecComplexUnc;
629 
CBenchPropsCBenchProps630   CBenchProps(): LzmaRatingMode(false) {}
631   void SetLzmaCompexity();
632 
GeComprCommandsCBenchProps633   UInt64 GeComprCommands(UInt64 unpackSize)
634   {
635     return unpackSize * EncComplex;
636   }
637 
GeDecomprCommandsCBenchProps638   UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
639   {
640     return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
641   }
642 
643   UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
644   UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
645 };
646 
SetLzmaCompexity()647 void CBenchProps::SetLzmaCompexity()
648 {
649   EncComplex = 1200;
650   DecComplexUnc = 4;
651   DecComplexCompr = 190;
652   LzmaRatingMode = true;
653 }
654 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)655 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
656 {
657   if (dictSize < (1 << kBenchMinDicLogSize))
658     dictSize = (1 << kBenchMinDicLogSize);
659   UInt64 encComplex = EncComplex;
660   if (LzmaRatingMode)
661   {
662     UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
663     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
664   }
665   UInt64 numCommands = (UInt64)size * encComplex;
666   return MyMultDiv64(numCommands, elapsedTime, freq);
667 }
668 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)669 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
670 {
671   UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
672   return MyMultDiv64(numCommands, elapsedTime, freq);
673 }
674 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)675 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
676 {
677   CBenchProps props;
678   props.SetLzmaCompexity();
679   return props.GetCompressRating(dictSize, elapsedTime, freq, size);
680 }
681 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)682 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
683 {
684   CBenchProps props;
685   props.SetLzmaCompexity();
686   return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
687 }
688 
689 struct CEncoderInfo;
690 
691 struct CEncoderInfo
692 {
693   #ifndef _7ZIP_ST
694   NWindows::CThread thread[2];
695   UInt32 NumDecoderSubThreads;
696   #endif
697   CMyComPtr<ICompressCoder> _encoder;
698   CMyComPtr<ICompressFilter> _encoderFilter;
699   CBenchProgressInfo *progressInfoSpec[2];
700   CMyComPtr<ICompressProgressInfo> progressInfo[2];
701   UInt64 NumIterations;
702 
703   #ifdef USE_ALLOCA
704   size_t AllocaSize;
705   #endif
706 
707   Byte _key[32];
708   Byte _iv[16];
709   Byte _psw[16];
710   bool CheckCrc_Enc;
711   bool CheckCrc_Dec;
712 
713   struct CDecoderInfo
714   {
715     CEncoderInfo *Encoder;
716     UInt32 DecoderIndex;
717     bool CallbackMode;
718 
719     #ifdef USE_ALLOCA
720     size_t AllocaSize;
721     #endif
722   };
723   CDecoderInfo decodersInfo[2];
724 
725   CMyComPtr<ICompressCoder> _decoders[2];
726   CMyComPtr<ICompressFilter> _decoderFilter;
727 
728   HRESULT Results[2];
729   CBenchmarkOutStream *outStreamSpec;
730   CMyComPtr<ISequentialOutStream> outStream;
731   IBenchCallback *callback;
732   IBenchPrintCallback *printCallback;
733   UInt32 crc;
734   size_t kBufferSize;
735   size_t compressedSize;
736   const Byte *uncompressedDataPtr;
737 
738   const Byte *fileData;
739   CBenchRandomGenerator rg;
740 
741   CBenchBuffer rgCopy; // it must be 16-byte aligned !!!
742   CBenchmarkOutStream *propStreamSpec;
743   CMyComPtr<ISequentialOutStream> propStream;
744 
745   // for decode
746   COneMethodInfo _method;
747   size_t _uncompressedDataSize;
748 
749   HRESULT Init(
750       const COneMethodInfo &method,
751       unsigned generateDictBits,
752       CBaseRandomGenerator *rg);
753   HRESULT Encode();
754   HRESULT Decode(UInt32 decoderIndex);
755 
CEncoderInfoCEncoderInfo756   CEncoderInfo():
757     fileData(NULL),
758     CheckCrc_Enc(true),
759     CheckCrc_Dec(true),
760     outStreamSpec(NULL), callback(NULL), printCallback(NULL), propStreamSpec(NULL) {}
761 
762   #ifndef _7ZIP_ST
763 
EncodeThreadFunctionCEncoderInfo764   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
765   {
766     HRESULT res;
767     CEncoderInfo *encoder = (CEncoderInfo *)param;
768     try
769     {
770       #ifdef USE_ALLOCA
771       alloca(encoder->AllocaSize);
772       #endif
773 
774       res = encoder->Encode();
775       encoder->Results[0] = res;
776     }
777     catch(...)
778     {
779       res = E_FAIL;
780     }
781     if (res != S_OK)
782       encoder->progressInfoSpec[0]->Status->SetResult(res);
783     return 0;
784   }
785 
DecodeThreadFunctionCEncoderInfo786   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
787   {
788     CDecoderInfo *decoder = (CDecoderInfo *)param;
789 
790     #ifdef USE_ALLOCA
791     alloca(decoder->AllocaSize);
792     #endif
793 
794     CEncoderInfo *encoder = decoder->Encoder;
795     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
796     return 0;
797   }
798 
CreateEncoderThreadCEncoderInfo799   HRESULT CreateEncoderThread()
800   {
801     return thread[0].Create(EncodeThreadFunction, this);
802   }
803 
CreateDecoderThreadCEncoderInfo804   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
805       #ifdef USE_ALLOCA
806       , size_t allocaSize
807       #endif
808       )
809   {
810     CDecoderInfo &decoder = decodersInfo[index];
811     decoder.DecoderIndex = index;
812     decoder.Encoder = this;
813 
814     #ifdef USE_ALLOCA
815     decoder.AllocaSize = allocaSize;
816     #endif
817 
818     decoder.CallbackMode = callbackMode;
819     return thread[index].Create(DecodeThreadFunction, &decoder);
820   }
821 
822   #endif
823 };
824 
825 
Init(const COneMethodInfo & method,unsigned generateDictBits,CBaseRandomGenerator * rgLoc)826 HRESULT CEncoderInfo::Init(
827     const COneMethodInfo &method,
828     unsigned generateDictBits,
829     CBaseRandomGenerator *rgLoc)
830 {
831   // we need extra space, if input data is already compressed
832   const size_t kCompressedBufferSize =
833       kCompressedAdditionalSize +
834       kBufferSize + kBufferSize / 16;
835       // kBufferSize / 2;
836 
837   if (kCompressedBufferSize < kBufferSize)
838     return E_FAIL;
839 
840   uncompressedDataPtr = fileData;
841 
842   if (!fileData)
843   {
844     if (!rg.Alloc(kBufferSize))
845       return E_OUTOFMEMORY;
846 
847     // DWORD ttt = GetTickCount();
848     if (generateDictBits == 0)
849       rg.GenerateSimpleRandom(rgLoc);
850     else
851       rg.GenerateLz(generateDictBits, rgLoc);
852     // printf("\n%d\n            ", GetTickCount() - ttt);
853 
854     crc = CrcCalc(rg.Buffer, rg.BufferSize);
855     uncompressedDataPtr = rg.Buffer;
856   }
857 
858   if (_encoderFilter)
859   {
860     if (!rgCopy.Alloc(kBufferSize))
861       return E_OUTOFMEMORY;
862   }
863 
864 
865   outStreamSpec = new CBenchmarkOutStream;
866   outStream = outStreamSpec;
867   if (!outStreamSpec->Alloc(kCompressedBufferSize))
868     return E_OUTOFMEMORY;
869 
870   propStreamSpec = 0;
871   if (!propStream)
872   {
873     propStreamSpec = new CBenchmarkOutStream;
874     propStream = propStreamSpec;
875   }
876   if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
877     return E_OUTOFMEMORY;
878   propStreamSpec->Init(true, false);
879 
880 
881   CMyComPtr<IUnknown> coder;
882   if (_encoderFilter)
883     coder = _encoderFilter;
884   else
885     coder = _encoder;
886   {
887     CMyComPtr<ICompressSetCoderProperties> scp;
888     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
889     if (scp)
890     {
891       UInt64 reduceSize = kBufferSize;
892       RINOK(method.SetCoderProps(scp, &reduceSize));
893     }
894     else
895     {
896       if (method.AreThereNonOptionalProps())
897         return E_INVALIDARG;
898     }
899 
900     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
901     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
902     if (writeCoderProps)
903     {
904       RINOK(writeCoderProps->WriteCoderProperties(propStream));
905     }
906 
907     {
908       CMyComPtr<ICryptoSetPassword> sp;
909       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
910       if (sp)
911       {
912         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
913 
914         // we must call encoding one time to calculate password key for key cache.
915         // it must be after WriteCoderProperties!
916         Byte temp[16];
917         memset(temp, 0, sizeof(temp));
918 
919         if (_encoderFilter)
920         {
921           _encoderFilter->Init();
922           _encoderFilter->Filter(temp, sizeof(temp));
923         }
924         else
925         {
926           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
927           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
928           inStreamSpec->Init(temp, sizeof(temp));
929 
930           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
931           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
932           crcStreamSpec->Init();
933 
934           RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
935         }
936       }
937     }
938   }
939 
940   return S_OK;
941 }
942 
943 
My_FilterBench(ICompressFilter * filter,Byte * data,size_t size)944 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
945 {
946   while (size != 0)
947   {
948     UInt32 cur = (UInt32)1 << 31;
949     if (cur > size)
950       cur = (UInt32)size;
951     UInt32 processed = filter->Filter(data, cur);
952     data += processed;
953     // if (processed > size) (in AES filter), we must fill last block with zeros.
954     // but it is not important for benchmark. So we just copy that data without filtering.
955     if (processed > size || processed == 0)
956       break;
957     size -= processed;
958   }
959 }
960 
961 
Encode()962 HRESULT CEncoderInfo::Encode()
963 {
964   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
965   bi.UnpackSize = 0;
966   bi.PackSize = 0;
967   CMyComPtr<ICryptoProperties> cp;
968   CMyComPtr<IUnknown> coder;
969   if (_encoderFilter)
970     coder = _encoderFilter;
971   else
972     coder = _encoder;
973   coder.QueryInterface(IID_ICryptoProperties, &cp);
974   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
975   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
976   UInt64 prev = 0;
977 
978   UInt32 crcPrev = 0;
979 
980   if (cp)
981   {
982     RINOK(cp->SetKey(_key, sizeof(_key)));
983     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
984   }
985 
986   for (UInt64 i = 0; i < NumIterations; i++)
987   {
988     if (printCallback && bi.UnpackSize - prev > (1 << 20))
989     {
990       RINOK(printCallback->CheckBreak());
991       prev = bi.UnpackSize;
992     }
993 
994     bool isLast = (i == NumIterations - 1);
995     bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
996     outStreamSpec->Init(isLast, calcCrc);
997 
998     if (_encoderFilter)
999     {
1000       memcpy(rgCopy.Buffer, uncompressedDataPtr, kBufferSize);
1001       _encoderFilter->Init();
1002       My_FilterBench(_encoderFilter, rgCopy.Buffer, kBufferSize);
1003       RINOK(WriteStream(outStream, rgCopy.Buffer, kBufferSize));
1004     }
1005     else
1006     {
1007       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1008       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
1009     }
1010 
1011     // outStreamSpec->Print();
1012 
1013     UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
1014     if (i == 0)
1015       crcPrev = crcNew;
1016     else if (calcCrc && crcPrev != crcNew)
1017       return E_FAIL;
1018 
1019     compressedSize = outStreamSpec->Pos;
1020     bi.UnpackSize += kBufferSize;
1021     bi.PackSize += compressedSize;
1022   }
1023 
1024   _encoder.Release();
1025   _encoderFilter.Release();
1026   return S_OK;
1027 }
1028 
1029 
Decode(UInt32 decoderIndex)1030 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1031 {
1032   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1033   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1034   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1035   CMyComPtr<IUnknown> coder;
1036   if (_decoderFilter)
1037   {
1038     if (decoderIndex != 0)
1039       return E_FAIL;
1040     coder = _decoderFilter;
1041   }
1042   else
1043     coder = decoder;
1044 
1045   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1046   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1047   if (!setDecProps && propStreamSpec->Pos != 0)
1048     return E_FAIL;
1049 
1050   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1051   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1052 
1053   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1054   pi->BenchInfo.UnpackSize = 0;
1055   pi->BenchInfo.PackSize = 0;
1056 
1057   #ifndef _7ZIP_ST
1058   {
1059     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1060     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1061     if (setCoderMt)
1062     {
1063       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
1064     }
1065   }
1066   #endif
1067 
1068   CMyComPtr<ICompressSetCoderProperties> scp;
1069   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1070   if (scp)
1071   {
1072     UInt64 reduceSize = _uncompressedDataSize;
1073     RINOK(_method.SetCoderProps(scp, &reduceSize));
1074   }
1075 
1076   CMyComPtr<ICryptoProperties> cp;
1077   coder.QueryInterface(IID_ICryptoProperties, &cp);
1078 
1079   if (setDecProps)
1080   {
1081     RINOK(setDecProps->SetDecoderProperties2(propStreamSpec->Buffer, (UInt32)propStreamSpec->Pos));
1082   }
1083 
1084   {
1085     CMyComPtr<ICryptoSetPassword> sp;
1086     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1087     if (sp)
1088     {
1089       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
1090     }
1091   }
1092 
1093   UInt64 prev = 0;
1094 
1095   if (cp)
1096   {
1097     RINOK(cp->SetKey(_key, sizeof(_key)));
1098     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
1099   }
1100 
1101   for (UInt64 i = 0; i < NumIterations; i++)
1102   {
1103     if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
1104     {
1105       RINOK(printCallback->CheckBreak());
1106       prev = pi->BenchInfo.UnpackSize;
1107     }
1108 
1109     inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
1110     crcOutStreamSpec->Init();
1111 
1112     UInt64 outSize = kBufferSize;
1113     crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
1114 
1115     if (_decoderFilter)
1116     {
1117       if (compressedSize > rgCopy.BufferSize)
1118         return E_FAIL;
1119       memcpy(rgCopy.Buffer, outStreamSpec->Buffer, compressedSize);
1120       _decoderFilter->Init();
1121       My_FilterBench(_decoderFilter, rgCopy.Buffer, compressedSize);
1122       RINOK(WriteStream(crcOutStream, rgCopy.Buffer, compressedSize));
1123     }
1124     else
1125     {
1126       RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
1127     }
1128 
1129     if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1130       return S_FALSE;
1131     pi->BenchInfo.UnpackSize += kBufferSize;
1132     pi->BenchInfo.PackSize += compressedSize;
1133   }
1134 
1135   decoder.Release();
1136   _decoderFilter.Release();
1137   return S_OK;
1138 }
1139 
1140 
1141 static const UInt32 kNumThreadsMax = (1 << 12);
1142 
1143 struct CBenchEncoders
1144 {
1145   CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders1146   CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders1147   ~CBenchEncoders() { delete []encoders; }
1148 };
1149 
1150 
GetNumIterations(UInt64 numCommands,UInt64 complexInCommands)1151 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1152 {
1153   if (numCommands < (1 << 4))
1154     numCommands = (1 << 4);
1155   UInt64 res = complexInCommands / numCommands;
1156   return (res == 0 ? 1 : res);
1157 }
1158 
1159 
MethodBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,bool oldLzmaBenchMode,UInt32 numThreads,const COneMethodInfo & method2,size_t uncompressedDataSize,const Byte * fileData,unsigned generateDictBits,IBenchPrintCallback * printCallback,IBenchCallback * callback,CBenchProps * benchProps)1160 static HRESULT MethodBench(
1161     DECL_EXTERNAL_CODECS_LOC_VARS
1162     UInt64 complexInCommands,
1163     bool
1164       #ifndef _7ZIP_ST
1165         oldLzmaBenchMode
1166       #endif
1167     ,
1168     UInt32
1169       #ifndef _7ZIP_ST
1170         numThreads
1171       #endif
1172     ,
1173     const COneMethodInfo &method2,
1174     size_t uncompressedDataSize,
1175     const Byte *fileData,
1176     unsigned generateDictBits,
1177 
1178     IBenchPrintCallback *printCallback,
1179     IBenchCallback *callback,
1180     CBenchProps *benchProps)
1181 {
1182   COneMethodInfo method = method2;
1183   UInt64 methodId;
1184   UInt32 numStreams;
1185   int codecIndex = FindMethod_Index(
1186       EXTERNAL_CODECS_LOC_VARS
1187       method.MethodName, true,
1188       methodId, numStreams);
1189   if (codecIndex < 0)
1190     return E_NOTIMPL;
1191   if (numStreams != 1)
1192     return E_INVALIDARG;
1193 
1194   UInt32 numEncoderThreads = 1;
1195   UInt32 numSubDecoderThreads = 1;
1196 
1197   #ifndef _7ZIP_ST
1198     numEncoderThreads = numThreads;
1199 
1200     if (oldLzmaBenchMode && methodId == k_LZMA)
1201     {
1202       if (numThreads == 1 && method.Get_NumThreads() < 0)
1203         method.AddProp_NumThreads(1);
1204       const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1205       if (numThreads > 1 && numLzmaThreads > 1)
1206       {
1207         numEncoderThreads = numThreads / 2;
1208         numSubDecoderThreads = 2;
1209       }
1210     }
1211   #endif
1212 
1213   CBenchEncoders encodersSpec(numEncoderThreads);
1214   CEncoderInfo *encoders = encodersSpec.encoders;
1215 
1216   UInt32 i;
1217 
1218   for (i = 0; i < numEncoderThreads; i++)
1219   {
1220     CEncoderInfo &encoder = encoders[i];
1221     encoder.callback = (i == 0) ? callback : 0;
1222     encoder.printCallback = printCallback;
1223 
1224     {
1225       CCreatedCoder cod;
1226       RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS codecIndex, true, encoder._encoderFilter, cod));
1227       encoder._encoder = cod.Coder;
1228       if (!encoder._encoder && !encoder._encoderFilter)
1229         return E_NOTIMPL;
1230     }
1231 
1232     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1233     encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1234 
1235     memset(encoder._iv, 0, sizeof(encoder._iv));
1236     memset(encoder._key, 0, sizeof(encoder._key));
1237     memset(encoder._psw, 0, sizeof(encoder._psw));
1238 
1239     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1240     {
1241       CCreatedCoder cod;
1242       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1243       RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
1244       decoder = cod.Coder;
1245       if (!encoder._decoderFilter && !decoder)
1246         return E_NOTIMPL;
1247     }
1248   }
1249 
1250   CBaseRandomGenerator rg;
1251   rg.Init();
1252 
1253   UInt32 crc = 0;
1254   if (fileData)
1255     crc = CrcCalc(fileData, uncompressedDataSize);
1256 
1257   for (i = 0; i < numEncoderThreads; i++)
1258   {
1259     CEncoderInfo &encoder = encoders[i];
1260     encoder._method = method;
1261     encoder._uncompressedDataSize = uncompressedDataSize;
1262     encoder.kBufferSize = uncompressedDataSize;
1263     encoder.fileData = fileData;
1264     encoder.crc = crc;
1265 
1266     RINOK(encoders[i].Init(method, generateDictBits, &rg));
1267   }
1268 
1269   CBenchProgressStatus status;
1270   status.Res = S_OK;
1271   status.EncodeMode = true;
1272 
1273   for (i = 0; i < numEncoderThreads; i++)
1274   {
1275     CEncoderInfo &encoder = encoders[i];
1276     encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1277 
1278     for (int j = 0; j < 2; j++)
1279     {
1280       CBenchProgressInfo *spec = new CBenchProgressInfo;
1281       encoder.progressInfoSpec[j] = spec;
1282       encoder.progressInfo[j] = spec;
1283       spec->Status = &status;
1284     }
1285 
1286     if (i == 0)
1287     {
1288       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1289       bpi->Callback = callback;
1290       bpi->BenchInfo.NumIterations = numEncoderThreads;
1291       bpi->SetStartTime();
1292     }
1293 
1294     #ifndef _7ZIP_ST
1295     if (numEncoderThreads > 1)
1296     {
1297       #ifdef USE_ALLOCA
1298       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1299       #endif
1300 
1301       RINOK(encoder.CreateEncoderThread())
1302     }
1303     else
1304     #endif
1305     {
1306       RINOK(encoder.Encode());
1307     }
1308   }
1309 
1310   #ifndef _7ZIP_ST
1311   if (numEncoderThreads > 1)
1312     for (i = 0; i < numEncoderThreads; i++)
1313       encoders[i].thread[0].Wait();
1314   #endif
1315 
1316   RINOK(status.Res);
1317 
1318   CBenchInfo info;
1319 
1320   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1321   info.UnpackSize = 0;
1322   info.PackSize = 0;
1323   info.NumIterations = encoders[0].NumIterations;
1324 
1325   for (i = 0; i < numEncoderThreads; i++)
1326   {
1327     CEncoderInfo &encoder = encoders[i];
1328     info.UnpackSize += encoder.kBufferSize;
1329     info.PackSize += encoder.compressedSize;
1330   }
1331 
1332   RINOK(callback->SetEncodeResult(info, true));
1333 
1334 
1335   status.Res = S_OK;
1336   status.EncodeMode = false;
1337 
1338   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1339 
1340   for (i = 0; i < numEncoderThreads; i++)
1341   {
1342     CEncoderInfo &encoder = encoders[i];
1343 
1344     if (i == 0)
1345     {
1346       encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1347       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1348       bpi->Callback = callback;
1349       bpi->BenchInfo.NumIterations = numDecoderThreads;
1350       bpi->SetStartTime();
1351     }
1352     else
1353       encoder.NumIterations = encoders[0].NumIterations;
1354 
1355     #ifndef _7ZIP_ST
1356     {
1357       int numSubThreads = method.Get_NumThreads();
1358       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1359     }
1360     if (numDecoderThreads > 1)
1361     {
1362       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1363       {
1364         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1365             #ifdef USE_ALLOCA
1366             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1367             #endif
1368             );
1369         RINOK(res);
1370       }
1371     }
1372     else
1373     #endif
1374     {
1375       RINOK(encoder.Decode(0));
1376     }
1377   }
1378 
1379   #ifndef _7ZIP_ST
1380   HRESULT res = S_OK;
1381   if (numDecoderThreads > 1)
1382     for (i = 0; i < numEncoderThreads; i++)
1383       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1384       {
1385         CEncoderInfo &encoder = encoders[i];
1386         encoder.thread[j].Wait();
1387         if (encoder.Results[j] != S_OK)
1388           res = encoder.Results[j];
1389       }
1390   RINOK(res);
1391   #endif
1392 
1393   RINOK(status.Res);
1394   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1395 
1396   #ifndef _7ZIP_ST
1397   #ifdef UNDER_CE
1398   if (numDecoderThreads > 1)
1399     for (i = 0; i < numEncoderThreads; i++)
1400       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1401       {
1402         FILETIME creationTime, exitTime, kernelTime, userTime;
1403         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1404           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1405       }
1406   #endif
1407   #endif
1408 
1409   info.UnpackSize = 0;
1410   info.PackSize = 0;
1411   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1412 
1413   for (i = 0; i < numEncoderThreads; i++)
1414   {
1415     CEncoderInfo &encoder = encoders[i];
1416     info.UnpackSize += encoder.kBufferSize;
1417     info.PackSize += encoder.compressedSize;
1418   }
1419 
1420   RINOK(callback->SetDecodeResult(info, false));
1421   RINOK(callback->SetDecodeResult(info, true));
1422 
1423   return S_OK;
1424 }
1425 
1426 
GetLZMAUsage(bool multiThread,UInt32 dictionary)1427 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1428 {
1429   UInt32 hs = dictionary - 1;
1430   hs |= (hs >> 1);
1431   hs |= (hs >> 2);
1432   hs |= (hs >> 4);
1433   hs |= (hs >> 8);
1434   hs >>= 1;
1435   hs |= 0xFFFF;
1436   if (hs > (1 << 24))
1437     hs >>= 1;
1438   hs++;
1439   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1440       (1 << 20) + (multiThread ? (6 << 20) : 0);
1441 }
1442 
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary,bool totalBench)1443 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
1444 {
1445   const UInt32 kBufferSize = dictionary;
1446   const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
1447   bool lzmaMt = (totalBench || numThreads > 1);
1448   UInt32 numBigThreads = numThreads;
1449   if (!totalBench && lzmaMt)
1450     numBigThreads /= 2;
1451   return ((UInt64)kBufferSize + kCompressedBufferSize +
1452     GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
1453 }
1454 
CrcBig(const void * data,UInt32 size,UInt64 numIterations,const UInt32 * checkSum,IHasher * hf,IBenchPrintCallback * callback)1455 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1456     const UInt32 *checkSum, IHasher *hf,
1457     IBenchPrintCallback *callback)
1458 {
1459   Byte hash[64];
1460   UInt64 i;
1461   for (i = 0; i < sizeof(hash); i++)
1462     hash[i] = 0;
1463   for (i = 0; i < numIterations; i++)
1464   {
1465     if (callback && (i & 0xFF) == 0)
1466     {
1467       RINOK(callback->CheckBreak());
1468     }
1469     hf->Init();
1470     hf->Update(data, size);
1471     hf->Final(hash);
1472     UInt32 hashSize = hf->GetDigestSize();
1473     if (hashSize > sizeof(hash))
1474       return S_FALSE;
1475     UInt32 sum = 0;
1476     for (UInt32 j = 0; j < hashSize; j += 4)
1477       sum ^= GetUi32(hash + j);
1478     if (checkSum && sum != *checkSum)
1479     {
1480       return S_FALSE;
1481     }
1482   }
1483   return S_OK;
1484 }
1485 
1486 UInt32 g_BenchCpuFreqTemp = 1;
1487 
1488 #define YY1 sum += val; sum ^= val;
1489 #define YY3 YY1 YY1 YY1 YY1
1490 #define YY5 YY3 YY3 YY3 YY3
1491 #define YY7 YY5 YY5 YY5 YY5
1492 static const UInt32 kNumFreqCommands = 128;
1493 
1494 EXTERN_C_BEGIN
1495 
CountCpuFreq(UInt32 sum,UInt32 num,UInt32 val)1496 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1497 {
1498   for (UInt32 i = 0; i < num; i++)
1499   {
1500     YY7
1501   }
1502   return sum;
1503 }
1504 
1505 EXTERN_C_END
1506 
1507 
1508 #ifndef _7ZIP_ST
1509 
1510 struct CFreqInfo
1511 {
1512   NWindows::CThread Thread;
1513   IBenchPrintCallback *Callback;
1514   HRESULT CallbackRes;
1515   UInt32 ValRes;
1516   UInt32 Size;
1517   UInt64 NumIterations;
1518 
WaitCFreqInfo1519   void Wait()
1520   {
1521     Thread.Wait();
1522     Thread.Close();
1523   }
1524 };
1525 
FreqThreadFunction(void * param)1526 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1527 {
1528   CFreqInfo *p = (CFreqInfo *)param;
1529 
1530   UInt32 sum = g_BenchCpuFreqTemp;
1531   for (UInt64 k = p->NumIterations; k > 0; k--)
1532   {
1533     p->CallbackRes = p->Callback->CheckBreak();
1534     if (p->CallbackRes != S_OK)
1535       return 0;
1536     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1537   }
1538   p->ValRes = sum;
1539   return 0;
1540 }
1541 
1542 struct CFreqThreads
1543 {
1544   CFreqInfo *Items;
1545   UInt32 NumThreads;
1546 
CFreqThreadsCFreqThreads1547   CFreqThreads(): Items(NULL), NumThreads(0) {}
WaitAllCFreqThreads1548   void WaitAll()
1549   {
1550     for (UInt32 i = 0; i < NumThreads; i++)
1551       Items[i].Wait();
1552     NumThreads = 0;
1553   }
~CFreqThreadsCFreqThreads1554   ~CFreqThreads()
1555   {
1556     WaitAll();
1557     delete []Items;
1558   }
1559 };
1560 
1561 struct CCrcInfo
1562 {
1563   NWindows::CThread Thread;
1564   IBenchPrintCallback *Callback;
1565   HRESULT CallbackRes;
1566 
1567   const Byte *Data;
1568   UInt32 Size;
1569   UInt64 NumIterations;
1570   bool CheckSumDefined;
1571   UInt32 CheckSum;
1572   CMyComPtr<IHasher> Hasher;
1573   HRESULT Res;
1574 
1575   #ifdef USE_ALLOCA
1576   size_t AllocaSize;
1577   #endif
1578 
WaitCCrcInfo1579   void Wait()
1580   {
1581     Thread.Wait();
1582     Thread.Close();
1583   }
1584 };
1585 
CrcThreadFunction(void * param)1586 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1587 {
1588   CCrcInfo *p = (CCrcInfo *)param;
1589 
1590   #ifdef USE_ALLOCA
1591   alloca(p->AllocaSize);
1592   #endif
1593 
1594   p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1595       p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1596       p->Callback);
1597   return 0;
1598 }
1599 
1600 struct CCrcThreads
1601 {
1602   CCrcInfo *Items;
1603   UInt32 NumThreads;
1604 
CCrcThreadsCCrcThreads1605   CCrcThreads(): Items(NULL), NumThreads(0) {}
WaitAllCCrcThreads1606   void WaitAll()
1607   {
1608     for (UInt32 i = 0; i < NumThreads; i++)
1609       Items[i].Wait();
1610     NumThreads = 0;
1611   }
~CCrcThreadsCCrcThreads1612   ~CCrcThreads()
1613   {
1614     WaitAll();
1615     delete []Items;
1616   }
1617 };
1618 
1619 #endif
1620 
CrcCalc1(const Byte * buf,UInt32 size)1621 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
1622 {
1623   UInt32 crc = CRC_INIT_VAL;;
1624   for (UInt32 i = 0; i < size; i++)
1625     crc = CRC_UPDATE_BYTE(crc, buf[i]);
1626   return CRC_GET_DIGEST(crc);
1627 }
1628 
RandGen(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1629 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1630 {
1631   for (UInt32 i = 0; i < size; i++)
1632     buf[i] = (Byte)RG.GetRnd();
1633 }
1634 
RandGenCrc(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1635 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1636 {
1637   RandGen(buf, size, RG);
1638   return CrcCalc1(buf, size);
1639 }
1640 
CrcInternalTest()1641 bool CrcInternalTest()
1642 {
1643   CBenchBuffer buffer;
1644   const UInt32 kBufferSize0 = (1 << 8);
1645   const UInt32 kBufferSize1 = (1 << 10);
1646   const UInt32 kCheckSize = (1 << 5);
1647   if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
1648     return false;
1649   Byte *buf = buffer.Buffer;
1650   UInt32 i;
1651   for (i = 0; i < kBufferSize0; i++)
1652     buf[i] = (Byte)i;
1653   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1654   if (crc1 != 0x29058C73)
1655     return false;
1656   CBaseRandomGenerator RG;
1657   RandGen(buf + kBufferSize0, kBufferSize1, RG);
1658   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1659     for (UInt32 j = 0; j < kCheckSize; j++)
1660       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1661         return false;
1662   return true;
1663 }
1664 
1665 struct CBenchMethod
1666 {
1667   unsigned Weight;
1668   unsigned DictBits;
1669   UInt32 EncComplex;
1670   UInt32 DecComplexCompr;
1671   UInt32 DecComplexUnc;
1672   const char *Name;
1673 };
1674 
1675 static const CBenchMethod g_Bench[] =
1676 {
1677   { 40, 17,  357,  145,   20, "LZMA:x1" },
1678   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
1679   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
1680 
1681   { 10, 16,  124,   40,   14, "Deflate:x1" },
1682   { 20, 16,  376,   40,   14, "Deflate:x5" },
1683   { 10, 16, 1082,   40,   14, "Deflate:x7" },
1684   { 10, 17,  422,   40,   14, "Deflate64:x5" },
1685 
1686   { 10, 15,  590,   69,   69, "BZip2:x1" },
1687   { 20, 19,  815,  122,  122, "BZip2:x5" },
1688   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
1689   { 10, 19, 2530,  122,  122, "BZip2:x7" },
1690 
1691   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
1692   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
1693 
1694   {  2,  0,    6,    0,    6, "Delta:4" },
1695   {  2,  0,    4,    0,    4, "BCJ" },
1696 
1697   { 10,  0,   24,    0,   24, "AES256CBC:1" },
1698   {  2,  0,    8,    0,    2, "AES256CBC:2" }
1699 };
1700 
1701 struct CBenchHash
1702 {
1703   unsigned Weight;
1704   UInt32 Complex;
1705   UInt32 CheckSum;
1706   const char *Name;
1707 };
1708 
1709 static const CBenchHash g_Hash[] =
1710 {
1711   {  1,  1820, 0x8F8FEDAB, "CRC32:1" },
1712   { 10,   558, 0x8F8FEDAB, "CRC32:4" },
1713   { 10,   339, 0x8F8FEDAB, "CRC32:8" },
1714   { 10,   512, 0xDF1C17CC, "CRC64" },
1715   { 10,  5100, 0x2D79FF2E, "SHA256" },
1716   { 10,  2340, 0x4C25132B, "SHA1" },
1717   {  2,  5500, 0xE084E913, "BLAKE2sp" }
1718 };
1719 
1720 struct CTotalBenchRes
1721 {
1722   // UInt64 NumIterations1; // for Usage
1723   UInt64 NumIterations2; // for Rating / RPU
1724 
1725   UInt64 Rating;
1726   UInt64 Usage;
1727   UInt64 RPU;
1728 
InitCTotalBenchRes1729   void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
1730 
SetSumCTotalBenchRes1731   void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1732   {
1733     Rating = (r1.Rating + r2.Rating);
1734     Usage = (r1.Usage + r2.Usage);
1735     RPU = (r1.RPU + r2.RPU);
1736     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
1737     NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
1738   }
1739 };
1740 
PrintNumber(IBenchPrintCallback & f,UInt64 value,unsigned size)1741 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
1742 {
1743   char s[128];
1744   unsigned startPos = (unsigned)sizeof(s) - 32;
1745   memset(s, ' ', startPos);
1746   ConvertUInt64ToString(value, s + startPos);
1747   // if (withSpace)
1748   {
1749     startPos--;
1750     size++;
1751   }
1752   unsigned len = (unsigned)strlen(s + startPos);
1753   if (size > len)
1754   {
1755     startPos -= (size - len);
1756     if (startPos < 0)
1757       startPos = 0;
1758   }
1759   f.Print(s + startPos);
1760 }
1761 
1762 static const unsigned kFieldSize_Name = 12;
1763 static const unsigned kFieldSize_SmallName = 4;
1764 static const unsigned kFieldSize_Speed = 9;
1765 static const unsigned kFieldSize_Usage = 5;
1766 static const unsigned kFieldSize_RU = 6;
1767 static const unsigned kFieldSize_Rating = 6;
1768 static const unsigned kFieldSize_EU = 5;
1769 static const unsigned kFieldSize_Effec = 5;
1770 
1771 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1772 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1773 
1774 
PrintRating(IBenchPrintCallback & f,UInt64 rating,unsigned size)1775 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
1776 {
1777   PrintNumber(f, (rating + 500000) / 1000000, size);
1778 }
1779 
1780 
PrintPercents(IBenchPrintCallback & f,UInt64 val,UInt64 divider,unsigned size)1781 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
1782 {
1783   PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1784 }
1785 
PrintChars(IBenchPrintCallback & f,char c,unsigned size)1786 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
1787 {
1788   char s[256];
1789   memset(s, (Byte)c, size);
1790   s[size] = 0;
1791   f.Print(s);
1792 }
1793 
PrintSpaces(IBenchPrintCallback & f,unsigned size)1794 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
1795 {
1796   PrintChars(f, ' ', size);
1797 }
1798 
PrintResults(IBenchPrintCallback & f,UInt64 usage,UInt64 rpu,UInt64 rating,bool showFreq,UInt64 cpuFreq)1799 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1800 {
1801   PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1802   PrintRating(f, rpu, kFieldSize_RU);
1803   PrintRating(f, rating, kFieldSize_Rating);
1804   if (showFreq)
1805   {
1806     if (cpuFreq == 0)
1807       PrintSpaces(f, kFieldSize_EUAndEffec);
1808     else
1809     {
1810       UInt64 ddd = cpuFreq * usage / 100;
1811       if (ddd == 0)
1812         ddd = 1;
1813       PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1814       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1815     }
1816   }
1817 }
1818 
PrintResults(IBenchPrintCallback * f,const CBenchInfo & info,unsigned weight,UInt64 rating,bool showFreq,UInt64 cpuFreq,CTotalBenchRes * res)1819 static void PrintResults(IBenchPrintCallback *f,
1820     const CBenchInfo &info,
1821     unsigned weight,
1822     UInt64 rating,
1823     bool showFreq, UInt64 cpuFreq,
1824     CTotalBenchRes *res)
1825 {
1826   UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1827   if (f)
1828   {
1829     if (speed != 0)
1830       PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1831     else
1832       PrintSpaces(*f, 1 + kFieldSize_Speed);
1833   }
1834   UInt64 usage = info.GetUsage();
1835   UInt64 rpu = info.GetRatingPerUsage(rating);
1836   if (f)
1837   {
1838     PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1839   }
1840 
1841   if (res)
1842   {
1843     // res->NumIterations1++;
1844     res->NumIterations2 += weight;
1845     res->RPU += (rpu * weight);
1846     res->Rating += (rating * weight);
1847     res->Usage += (usage * weight);
1848   }
1849 }
1850 
PrintTotals(IBenchPrintCallback & f,bool showFreq,UInt64 cpuFreq,const CTotalBenchRes & res)1851 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1852 {
1853   PrintSpaces(f, 1 + kFieldSize_Speed);
1854   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
1855   UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
1856   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
1857 }
1858 
1859 
PrintHex(AString & s,UInt64 v)1860 static void PrintHex(AString &s, UInt64 v)
1861 {
1862   char temp[32];
1863   ConvertUInt64ToHex(v, temp);
1864   s += temp;
1865 }
1866 
GetProcessThreadsInfo(const NSystem::CProcessAffinity & ti)1867 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
1868 {
1869   AString s;
1870   // s.Add_UInt32(ti.numProcessThreads);
1871   if (ti.processAffinityMask != ti.systemAffinityMask)
1872   {
1873     // if (ti.numProcessThreads != ti.numSysThreads)
1874     {
1875       s += " / ";
1876       s.Add_UInt32(ti.GetNumSystemThreads());
1877     }
1878     s += " : ";
1879     PrintHex(s, ti.processAffinityMask);
1880     s += " / ";
1881     PrintHex(s, ti.systemAffinityMask);
1882   }
1883   return s;
1884 }
1885 
1886 
PrintSize(AString & s,UInt64 v)1887 static void PrintSize(AString &s, UInt64 v)
1888 {
1889   char c = 0;
1890   if ((v & 0x3FF) == 0) { v >>= 10; c = 'K';
1891   if ((v & 0x3FF) == 0) { v >>= 10; c = 'M';
1892   if ((v & 0x3FF) == 0) { v >>= 10; c = 'G';
1893   if ((v & 0x3FF) == 0) { v >>= 10; c = 'T';
1894   }}}}
1895   else
1896   {
1897     PrintHex(s, v);
1898     return;
1899   }
1900   char temp[32];
1901   ConvertUInt64ToString(v, temp);
1902   s += temp;
1903   if (c)
1904     s += c;
1905 }
1906 
1907 
1908 #ifdef _7ZIP_LARGE_PAGES
1909 
1910 extern bool g_LargePagesMode;
1911 
1912 extern "C"
1913 {
1914   extern SIZE_T g_LargePageSize;
1915 }
1916 
Add_LargePages_String(AString & s)1917 void Add_LargePages_String(AString &s)
1918 {
1919   if (g_LargePagesMode || g_LargePageSize != 0)
1920   {
1921     s += " (LP-";
1922     PrintSize(s, g_LargePageSize);
1923     if (!g_LargePagesMode)
1924       s += "-NA";
1925     s += ")";
1926   }
1927 }
1928 
1929 #endif
1930 
1931 
1932 
PrintRequirements(IBenchPrintCallback & f,const char * sizeString,bool size_Defined,UInt64 size,const char * threadsString,UInt32 numThreads)1933 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
1934     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
1935 {
1936   f.Print("RAM ");
1937   f.Print(sizeString);
1938   if (size_Defined)
1939     PrintNumber(f, (size >> 20), 6);
1940   else
1941     f.Print("      ?");
1942   f.Print(" MB");
1943 
1944   #ifdef _7ZIP_LARGE_PAGES
1945   {
1946     AString s;
1947     Add_LargePages_String(s);
1948     f.Print(s);
1949   }
1950   #endif
1951 
1952   f.Print(",  # ");
1953   f.Print(threadsString);
1954   PrintNumber(f, numThreads, 3);
1955 }
1956 
1957 
1958 
1959 struct CBenchCallbackToPrint: public IBenchCallback
1960 {
1961   CBenchProps BenchProps;
1962   CTotalBenchRes EncodeRes;
1963   CTotalBenchRes DecodeRes;
1964   IBenchPrintCallback *_file;
1965   UInt32 DictSize;
1966 
1967   bool Use2Columns;
1968   unsigned NameFieldSize;
1969 
1970   bool ShowFreq;
1971   UInt64 CpuFreq;
1972 
1973   unsigned EncodeWeight;
1974   unsigned DecodeWeight;
1975 
CBenchCallbackToPrintCBenchCallbackToPrint1976   CBenchCallbackToPrint():
1977       Use2Columns(false),
1978       NameFieldSize(0),
1979       ShowFreq(false),
1980       CpuFreq(0),
1981       EncodeWeight(1),
1982       DecodeWeight(1)
1983       {}
1984 
InitCBenchCallbackToPrint1985   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
1986   void Print(const char *s);
1987   void NewLine();
1988 
1989   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
1990   HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
1991   HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
1992 };
1993 
SetFreq(bool showFreq,UInt64 cpuFreq)1994 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
1995 {
1996   ShowFreq = showFreq;
1997   CpuFreq = cpuFreq;
1998   return S_OK;
1999 }
2000 
SetEncodeResult(const CBenchInfo & info,bool final)2001 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
2002 {
2003   RINOK(_file->CheckBreak());
2004   if (final)
2005   {
2006     UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
2007     PrintResults(_file, info,
2008         EncodeWeight, rating,
2009         ShowFreq, CpuFreq, &EncodeRes);
2010     if (!Use2Columns)
2011       _file->NewLine();
2012   }
2013   return S_OK;
2014 }
2015 
2016 static const char * const kSep = "  | ";
2017 
SetDecodeResult(const CBenchInfo & info,bool final)2018 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
2019 {
2020   RINOK(_file->CheckBreak());
2021   if (final)
2022   {
2023     UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
2024     if (Use2Columns)
2025       _file->Print(kSep);
2026     else
2027       PrintSpaces(*_file, NameFieldSize);
2028     CBenchInfo info2 = info;
2029     info2.UnpackSize *= info2.NumIterations;
2030     info2.PackSize *= info2.NumIterations;
2031     info2.NumIterations = 1;
2032     PrintResults(_file, info2,
2033         DecodeWeight, rating,
2034         ShowFreq, CpuFreq, &DecodeRes);
2035   }
2036   return S_OK;
2037 }
2038 
Print(const char * s)2039 void CBenchCallbackToPrint::Print(const char *s)
2040 {
2041   _file->Print(s);
2042 }
2043 
NewLine()2044 void CBenchCallbackToPrint::NewLine()
2045 {
2046   _file->NewLine();
2047 }
2048 
PrintLeft(IBenchPrintCallback & f,const char * s,unsigned size)2049 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
2050 {
2051   f.Print(s);
2052   int numSpaces = size - MyStringLen(s);
2053   if (numSpaces > 0)
2054     PrintSpaces(f, numSpaces);
2055 }
2056 
PrintRight(IBenchPrintCallback & f,const char * s,unsigned size)2057 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
2058 {
2059   int numSpaces = size - MyStringLen(s);
2060   if (numSpaces > 0)
2061     PrintSpaces(f, numSpaces);
2062   f.Print(s);
2063 }
2064 
TotalBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,bool forceUnpackSize,size_t unpackSize,const Byte * fileData,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback)2065 static HRESULT TotalBench(
2066     DECL_EXTERNAL_CODECS_LOC_VARS
2067     UInt64 complexInCommands,
2068     UInt32 numThreads,
2069     bool forceUnpackSize,
2070     size_t unpackSize,
2071     const Byte *fileData,
2072     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
2073 {
2074   for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
2075   {
2076     const CBenchMethod &bench = g_Bench[i];
2077     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2078     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2079     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2080     callback->BenchProps.EncComplex = bench.EncComplex;
2081 
2082     COneMethodInfo method;
2083     NCOM::CPropVariant propVariant;
2084     propVariant = bench.Name;
2085     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2086 
2087     size_t unpackSize2 = unpackSize;
2088     if (!forceUnpackSize && bench.DictBits == 0)
2089       unpackSize2 = kFilterUnpackSize;
2090 
2091     callback->EncodeWeight = bench.Weight;
2092     callback->DecodeWeight = bench.Weight;
2093 
2094     HRESULT res = MethodBench(
2095         EXTERNAL_CODECS_LOC_VARS
2096         complexInCommands,
2097         false, numThreads, method,
2098         unpackSize2, fileData,
2099         bench.DictBits,
2100         printCallback, callback, &callback->BenchProps);
2101 
2102     if (res == E_NOTIMPL)
2103     {
2104       // callback->Print(" ---");
2105       // we need additional empty line as line for decompression results
2106       if (!callback->Use2Columns)
2107         callback->NewLine();
2108     }
2109     else
2110     {
2111       RINOK(res);
2112     }
2113 
2114     callback->NewLine();
2115   }
2116   return S_OK;
2117 }
2118 
2119 
FreqBench(UInt64 complexInCommands,UInt32 numThreads,IBenchPrintCallback * _file,bool showFreq,UInt64 specifiedFreq,UInt64 & cpuFreq,UInt32 & res)2120 static HRESULT FreqBench(
2121     UInt64 complexInCommands,
2122     UInt32 numThreads,
2123     IBenchPrintCallback *_file,
2124     bool showFreq,
2125     UInt64 specifiedFreq,
2126     UInt64 &cpuFreq,
2127     UInt32 &res)
2128 {
2129   res = 0;
2130   cpuFreq = 0;
2131 
2132   UInt32 bufferSize = 1 << 20;
2133   UInt32 complexity = kNumFreqCommands;
2134   if (numThreads == 0)
2135     numThreads = 1;
2136 
2137   #ifdef _7ZIP_ST
2138   numThreads = 1;
2139   #endif
2140 
2141   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2142   UInt64 numIterations = complexInCommands / complexity / bsize;
2143   if (numIterations == 0)
2144     numIterations = 1;
2145 
2146   CBenchInfoCalc progressInfoSpec;
2147 
2148   #ifndef _7ZIP_ST
2149   CFreqThreads threads;
2150   if (numThreads > 1)
2151   {
2152     threads.Items = new CFreqInfo[numThreads];
2153     UInt32 i;
2154     for (i = 0; i < numThreads; i++)
2155     {
2156       CFreqInfo &info = threads.Items[i];
2157       info.Callback = _file;
2158       info.CallbackRes = S_OK;
2159       info.NumIterations = numIterations;
2160       info.Size = bufferSize;
2161     }
2162     progressInfoSpec.SetStartTime();
2163     for (i = 0; i < numThreads; i++)
2164     {
2165       CFreqInfo &info = threads.Items[i];
2166       RINOK(info.Thread.Create(FreqThreadFunction, &info));
2167       threads.NumThreads++;
2168     }
2169     threads.WaitAll();
2170     for (i = 0; i < numThreads; i++)
2171     {
2172       RINOK(threads.Items[i].CallbackRes);
2173     }
2174   }
2175   else
2176   #endif
2177   {
2178     progressInfoSpec.SetStartTime();
2179     UInt32 sum = g_BenchCpuFreqTemp;
2180     for (UInt64 k = numIterations; k > 0; k--)
2181     {
2182       RINOK(_file->CheckBreak());
2183       sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
2184     }
2185     res += sum;
2186   }
2187 
2188   CBenchInfo info;
2189   progressInfoSpec.SetFinishTime(info);
2190 
2191   info.UnpackSize = 0;
2192   info.PackSize = 0;
2193   info.NumIterations = 1;
2194 
2195   if (_file)
2196   {
2197     {
2198       UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
2199       UInt64 rating = info.GetSpeed(numCommands);
2200       cpuFreq = rating / numThreads;
2201       PrintResults(_file, info,
2202           0, // weight
2203           rating,
2204           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
2205     }
2206     RINOK(_file->CheckBreak());
2207   }
2208 
2209   return S_OK;
2210 }
2211 
2212 
2213 
CrcBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufferSize,UInt64 & speed,UInt32 complexity,unsigned benchWeight,const UInt32 * checkSum,const COneMethodInfo & method,IBenchPrintCallback * _file,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2214 static HRESULT CrcBench(
2215     DECL_EXTERNAL_CODECS_LOC_VARS
2216     UInt64 complexInCommands,
2217     UInt32 numThreads, UInt32 bufferSize,
2218     UInt64 &speed,
2219     UInt32 complexity, unsigned benchWeight,
2220     const UInt32 *checkSum,
2221     const COneMethodInfo &method,
2222     IBenchPrintCallback *_file,
2223     CTotalBenchRes *encodeRes,
2224     bool showFreq, UInt64 cpuFreq)
2225 {
2226   if (numThreads == 0)
2227     numThreads = 1;
2228 
2229   #ifdef _7ZIP_ST
2230   numThreads = 1;
2231   #endif
2232 
2233   const AString &methodName = method.MethodName;
2234   // methodName.RemoveChar(L'-');
2235   CMethodId hashID;
2236   if (!FindHashMethod(
2237       EXTERNAL_CODECS_LOC_VARS
2238       methodName, hashID))
2239     return E_NOTIMPL;
2240 
2241   CBenchBuffer buffer;
2242   size_t totalSize = (size_t)bufferSize * numThreads;
2243   if (totalSize / numThreads != bufferSize)
2244     return E_OUTOFMEMORY;
2245   if (!buffer.Alloc(totalSize))
2246     return E_OUTOFMEMORY;
2247 
2248   Byte *buf = buffer.Buffer;
2249   CBaseRandomGenerator RG;
2250   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2251   UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
2252   if (numIterations == 0)
2253     numIterations = 1;
2254 
2255   CBenchInfoCalc progressInfoSpec;
2256 
2257   #ifndef _7ZIP_ST
2258   CCrcThreads threads;
2259   if (numThreads > 1)
2260   {
2261     threads.Items = new CCrcInfo[numThreads];
2262 
2263     UInt32 i;
2264     for (i = 0; i < numThreads; i++)
2265     {
2266       CCrcInfo &info = threads.Items[i];
2267       AString name;
2268       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
2269       if (!info.Hasher)
2270         return E_NOTIMPL;
2271       CMyComPtr<ICompressSetCoderProperties> scp;
2272       info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2273       if (scp)
2274       {
2275         UInt64 reduceSize = 1;
2276         RINOK(method.SetCoderProps(scp, &reduceSize));
2277       }
2278 
2279       Byte *data = buf + (size_t)bufferSize * i;
2280       info.Callback = _file;
2281       info.Data = data;
2282       info.NumIterations = numIterations;
2283       info.Size = bufferSize;
2284       /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
2285       info.CheckSumDefined = false;
2286       if (checkSum)
2287       {
2288         info.CheckSum = *checkSum;
2289         info.CheckSumDefined = (checkSum && (i == 0));
2290       }
2291 
2292       #ifdef USE_ALLOCA
2293       info.AllocaSize = (i * 16 * 21) & 0x7FF;
2294       #endif
2295     }
2296 
2297     progressInfoSpec.SetStartTime();
2298 
2299     for (i = 0; i < numThreads; i++)
2300     {
2301       CCrcInfo &info = threads.Items[i];
2302       RINOK(info.Thread.Create(CrcThreadFunction, &info));
2303       threads.NumThreads++;
2304     }
2305     threads.WaitAll();
2306     for (i = 0; i < numThreads; i++)
2307     {
2308       RINOK(threads.Items[i].Res);
2309     }
2310   }
2311   else
2312   #endif
2313   {
2314     /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
2315     progressInfoSpec.SetStartTime();
2316     CMyComPtr<IHasher> hasher;
2317     AString name;
2318     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
2319     if (!hasher)
2320       return E_NOTIMPL;
2321     CMyComPtr<ICompressSetCoderProperties> scp;
2322     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2323     if (scp)
2324     {
2325       UInt64 reduceSize = 1;
2326       RINOK(method.SetCoderProps(scp, &reduceSize));
2327     }
2328     RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
2329   }
2330 
2331   CBenchInfo info;
2332   progressInfoSpec.SetFinishTime(info);
2333 
2334   UInt64 unpSize = numIterations * bufferSize;
2335   UInt64 unpSizeThreads = unpSize * numThreads;
2336   info.UnpackSize = unpSizeThreads;
2337   info.PackSize = unpSizeThreads;
2338   info.NumIterations = 1;
2339 
2340   if (_file)
2341   {
2342     {
2343       UInt64 numCommands = unpSizeThreads * complexity / 256;
2344       UInt64 rating = info.GetSpeed(numCommands);
2345       PrintResults(_file, info,
2346           benchWeight, rating,
2347           showFreq, cpuFreq, encodeRes);
2348     }
2349     RINOK(_file->CheckBreak());
2350   }
2351 
2352   speed = info.GetSpeed(unpSizeThreads);
2353 
2354   return S_OK;
2355 }
2356 
TotalBench_Hash(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufSize,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2357 static HRESULT TotalBench_Hash(
2358     DECL_EXTERNAL_CODECS_LOC_VARS
2359     UInt64 complexInCommands,
2360     UInt32 numThreads, UInt32 bufSize,
2361     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2362     CTotalBenchRes *encodeRes,
2363     bool showFreq, UInt64 cpuFreq)
2364 {
2365   for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2366   {
2367     const CBenchHash &bench = g_Hash[i];
2368     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2369     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2370     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2371     // callback->BenchProps.EncComplex = bench.EncComplex;
2372 
2373     COneMethodInfo method;
2374     NCOM::CPropVariant propVariant;
2375     propVariant = bench.Name;
2376     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2377 
2378     UInt64 speed;
2379     HRESULT res = CrcBench(
2380         EXTERNAL_CODECS_LOC_VARS
2381         complexInCommands,
2382         numThreads, bufSize,
2383         speed,
2384         bench.Complex, bench.Weight,
2385         &bench.CheckSum, method,
2386         printCallback, encodeRes, showFreq, cpuFreq);
2387     if (res == E_NOTIMPL)
2388     {
2389       // callback->Print(" ---");
2390     }
2391     else
2392     {
2393       RINOK(res);
2394     }
2395     callback->NewLine();
2396   }
2397   return S_OK;
2398 }
2399 
2400 struct CTempValues
2401 {
2402   UInt64 *Values;
CTempValuesCTempValues2403   CTempValues(UInt32 num) { Values = new UInt64[num]; }
~CTempValuesCTempValues2404   ~CTempValues() { delete []Values; }
2405 };
2406 
ParseNumberString(const UString & s,NCOM::CPropVariant & prop)2407 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2408 {
2409   const wchar_t *end;
2410   UInt64 result = ConvertStringToUInt64(s, &end);
2411   if (*end != 0 || s.IsEmpty())
2412     prop = s;
2413   else if (result <= (UInt32)0xFFFFFFFF)
2414     prop = (UInt32)result;
2415   else
2416     prop = result;
2417 }
2418 
GetNumThreadsNext(unsigned i,UInt32 numThreads)2419 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2420 {
2421   if (i < 2)
2422     return i + 1;
2423   i -= 1;
2424   UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2425   return (num <= numThreads) ? num : numThreads;
2426 }
2427 
AreSameMethodNames(const char * fullName,const char * shortName)2428 static bool AreSameMethodNames(const char *fullName, const char *shortName)
2429 {
2430   return StringsAreEqualNoCase_Ascii(fullName, shortName);
2431 }
2432 
2433 
2434 #ifdef MY_CPU_X86_OR_AMD64
2435 
PrintCpuChars(AString & s,UInt32 v)2436 static void PrintCpuChars(AString &s, UInt32 v)
2437 {
2438   for (int j = 0; j < 4; j++)
2439   {
2440     Byte b = (Byte)(v & 0xFF);
2441     v >>= 8;
2442     if (b == 0)
2443       break;
2444     s += (char)b;
2445   }
2446 }
2447 
x86cpuid_to_String(const Cx86cpuid & c,AString & s)2448 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
2449 {
2450   s.Empty();
2451 
2452   UInt32 maxFunc2 = 0;
2453   UInt32 t[3];
2454 
2455   MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
2456 
2457   bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
2458 
2459   if (!fullNameIsAvail)
2460   {
2461     for (int i = 0; i < 3; i++)
2462       PrintCpuChars(s, c.vendor[i]);
2463   }
2464   else
2465   {
2466     for (int i = 0; i < 3; i++)
2467     {
2468       UInt32 d[4] = { 0 };
2469       MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
2470       for (int j = 0; j < 4; j++)
2471         PrintCpuChars(s, d[j]);
2472     }
2473   }
2474 
2475   s.Add_Space_if_NotEmpty();
2476   {
2477     char temp[32];
2478     ConvertUInt32ToHex(c.ver, temp);
2479     s += '(';
2480     s += temp;
2481     s += ')';
2482   }
2483 }
2484 
2485 #endif
2486 
2487 
2488 
2489 static const char * const k_PROCESSOR_ARCHITECTURE[] =
2490 {
2491     "x86" // "INTEL"
2492   , "MIPS"
2493   , "ALPHA"
2494   , "PPC"
2495   , "SHX"
2496   , "ARM"
2497   , "IA64"
2498   , "ALPHA64"
2499   , "MSIL"
2500   , "x64" // "AMD64"
2501   , "IA32_ON_WIN64"
2502   , "NEUTRAL"
2503   , "ARM64"
2504   , "ARM32_ON_WIN64"
2505 };
2506 
2507 #define MY__PROCESSOR_ARCHITECTURE_INTEL 0
2508 #define MY__PROCESSOR_ARCHITECTURE_AMD64 9
2509 
2510 
2511 #define MY__PROCESSOR_INTEL_PENTIUM  586
2512 #define MY__PROCESSOR_AMD_X8664      8664
2513 
2514 /*
2515 static const CUInt32PCharPair k_PROCESSOR[] =
2516 {
2517   { 2200, "IA64" },
2518   { 8664, "x64" }
2519 };
2520 
2521 #define PROCESSOR_INTEL_386      386
2522 #define PROCESSOR_INTEL_486      486
2523 #define PROCESSOR_INTEL_PENTIUM  586
2524 #define PROCESSOR_INTEL_860      860
2525 #define PROCESSOR_INTEL_IA64     2200
2526 #define PROCESSOR_AMD_X8664      8664
2527 #define PROCESSOR_MIPS_R2000     2000
2528 #define PROCESSOR_MIPS_R3000     3000
2529 #define PROCESSOR_MIPS_R4000     4000
2530 #define PROCESSOR_ALPHA_21064    21064
2531 #define PROCESSOR_PPC_601        601
2532 #define PROCESSOR_PPC_603        603
2533 #define PROCESSOR_PPC_604        604
2534 #define PROCESSOR_PPC_620        620
2535 #define PROCESSOR_HITACHI_SH3    10003
2536 #define PROCESSOR_HITACHI_SH3E   10004
2537 #define PROCESSOR_HITACHI_SH4    10005
2538 #define PROCESSOR_MOTOROLA_821   821
2539 #define PROCESSOR_SHx_SH3        103
2540 #define PROCESSOR_SHx_SH4        104
2541 #define PROCESSOR_STRONGARM      2577    // 0xA11
2542 #define PROCESSOR_ARM720         1824    // 0x720
2543 #define PROCESSOR_ARM820         2080    // 0x820
2544 #define PROCESSOR_ARM920         2336    // 0x920
2545 #define PROCESSOR_ARM_7TDMI      70001
2546 #define PROCESSOR_OPTIL          18767   // 0x494f
2547 */
2548 
2549 #ifdef _WIN32
2550 
2551 static const char * const k_PF[] =
2552 {
2553     "FP_ERRATA"
2554   , "FP_EMU"
2555   , "CMPXCHG"
2556   , "MMX"
2557   , "PPC_MOVEMEM_64BIT"
2558   , "ALPHA_BYTE"
2559   , "SSE"
2560   , "3DNOW"
2561   , "RDTSC"
2562   , "PAE"
2563   , "SSE2"
2564   , "SSE_DAZ"
2565   , "NX"
2566   , "SSE3"
2567   , "CMPXCHG16B"
2568   , "CMP8XCHG16"
2569   , "CHANNELS"
2570   , "XSAVE"
2571   , "ARM_VFP_32"
2572   , "ARM_NEON"
2573   , "L2AT"
2574   , "VIRT_FIRMWARE"
2575   , "RDWRFSGSBASE"
2576   , "FASTFAIL"
2577   , "ARM_DIVIDE"
2578   , "ARM_64BIT_LOADSTORE_ATOMIC"
2579   , "ARM_EXTERNAL_CACHE"
2580   , "ARM_FMAC"
2581   , "RDRAND"
2582   , "ARM_V8"
2583   , "ARM_V8_CRYPTO"
2584   , "ARM_V8_CRC32"
2585   , "RDTSCP"
2586 };
2587 
2588 #endif
2589 
2590 
2591 
2592 
PrintPage(AString & s,UInt32 v)2593 static void PrintPage(AString &s, UInt32 v)
2594 {
2595   if ((v & 0x3FF) == 0)
2596   {
2597     s.Add_UInt32(v >> 10);
2598     s += "K";
2599   }
2600   else
2601     s.Add_UInt32(v >> 10);
2602 }
2603 
TypeToString2(const char * const table[],unsigned num,UInt32 value)2604 static AString TypeToString2(const char * const table[], unsigned num, UInt32 value)
2605 {
2606   char sz[16];
2607   const char *p = NULL;
2608   if (value < num)
2609     p = table[value];
2610   if (!p)
2611   {
2612     ConvertUInt32ToString(value, sz);
2613     p = sz;
2614   }
2615   return (AString)p;
2616 }
2617 
2618 #ifdef _WIN32
2619 
SysInfo_To_String(AString & s,const SYSTEM_INFO & si)2620 static void SysInfo_To_String(AString &s, const SYSTEM_INFO &si)
2621 {
2622   s += TypeToString2(k_PROCESSOR_ARCHITECTURE, ARRAY_SIZE(k_PROCESSOR_ARCHITECTURE), si.wProcessorArchitecture);
2623 
2624   if (!(   si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_INTEL && si.dwProcessorType == MY__PROCESSOR_INTEL_PENTIUM
2625       || si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_AMD64 && si.dwProcessorType == MY__PROCESSOR_AMD_X8664))
2626   {
2627     s += " ";
2628     // s += TypePairToString(k_PROCESSOR, ARRAY_SIZE(k_PROCESSOR), si.dwProcessorType);
2629     s.Add_UInt32(si.dwProcessorType);
2630   }
2631   s += " ";
2632   PrintHex(s, si.wProcessorLevel);
2633   s += ".";
2634   PrintHex(s, si.wProcessorRevision);
2635   if ((UInt64)si.dwActiveProcessorMask + 1 != ((UInt64)1 << si.dwNumberOfProcessors))
2636   if ((UInt64)si.dwActiveProcessorMask + 1 != 0 || si.dwNumberOfProcessors != sizeof(UInt64) * 8)
2637   {
2638     s += " act:";
2639     PrintHex(s, si.dwActiveProcessorMask);
2640   }
2641   s += " cpus:";
2642   s.Add_UInt32(si.dwNumberOfProcessors);
2643   if (si.dwPageSize != 1 << 12)
2644   {
2645     s += " page:";
2646     PrintPage(s, si.dwPageSize);
2647   }
2648   if (si.dwAllocationGranularity != 1 << 16)
2649   {
2650     s += " gran:";
2651     PrintPage(s, si.dwAllocationGranularity);
2652   }
2653   s += " ";
2654 
2655   DWORD_PTR minAdd = (DWORD_PTR)si.lpMinimumApplicationAddress;
2656   UInt64 maxSize = (UInt64)(DWORD_PTR)si.lpMaximumApplicationAddress + 1;
2657   const UInt32 kReserveSize = ((UInt32)1 << 16);
2658   if (minAdd != kReserveSize)
2659   {
2660     PrintSize(s, minAdd);
2661     s += "-";
2662   }
2663   else
2664   {
2665     if ((maxSize & (kReserveSize - 1)) == 0)
2666       maxSize += kReserveSize;
2667   }
2668   PrintSize(s, maxSize);
2669 }
2670 
2671 #ifndef _WIN64
2672 typedef VOID (WINAPI *Func_GetNativeSystemInfo)(LPSYSTEM_INFO lpSystemInfo);
2673 #endif
2674 
2675 #endif
2676 
GetSysInfo(AString & s1,AString & s2)2677 void GetSysInfo(AString &s1, AString &s2)
2678 {
2679   s1.Empty();
2680   s2.Empty();
2681 
2682   #ifdef _WIN32
2683     SYSTEM_INFO si;
2684     GetSystemInfo(&si);
2685     {
2686       SysInfo_To_String(s1, si);
2687       // s += " : ";
2688     }
2689 
2690     #if !defined(_WIN64) && !defined(UNDER_CE)
2691     Func_GetNativeSystemInfo fn_GetNativeSystemInfo = (Func_GetNativeSystemInfo)GetProcAddress(
2692         GetModuleHandleA("kernel32.dll"), "GetNativeSystemInfo");
2693     if (fn_GetNativeSystemInfo)
2694     {
2695       SYSTEM_INFO si2;
2696       fn_GetNativeSystemInfo(&si2);
2697       // if (memcmp(&si, &si2, sizeof(si)) != 0)
2698       {
2699         // s += " - ";
2700         SysInfo_To_String(s2, si2);
2701       }
2702     }
2703     #endif
2704   #endif
2705 }
2706 
2707 
GetCpuName(AString & s)2708 void GetCpuName(AString &s)
2709 {
2710   s.Empty();
2711 
2712   #ifdef MY_CPU_X86_OR_AMD64
2713   {
2714     Cx86cpuid cpuid;
2715     if (x86cpuid_CheckAndRead(&cpuid))
2716     {
2717       AString s2;
2718       x86cpuid_to_String(cpuid, s2);
2719       s += s2;
2720     }
2721     else
2722     {
2723     #ifdef MY_CPU_AMD64
2724     s += "x64";
2725     #else
2726     s += "x86";
2727     #endif
2728     }
2729   }
2730   #else
2731 
2732     #ifdef MY_CPU_LE
2733       s += "LE";
2734     #elif defined(MY_CPU_BE)
2735       s += "BE";
2736     #endif
2737 
2738   #endif
2739 
2740   #ifdef _7ZIP_LARGE_PAGES
2741   Add_LargePages_String(s);
2742   #endif
2743 }
2744 
2745 
GetCpuFeatures(AString & s)2746 void GetCpuFeatures(AString &s)
2747 {
2748   s.Empty();
2749 
2750   #ifdef _WIN32
2751   const unsigned kNumFeatures_Extra = 32; // we check also for unknown features
2752   const unsigned kNumFeatures = ARRAY_SIZE(k_PF) + kNumFeatures_Extra;
2753   for (unsigned i = 0; i < kNumFeatures; i++)
2754   {
2755     if (IsProcessorFeaturePresent(i))
2756     {
2757       s.Add_Space_if_NotEmpty();
2758       s += TypeToString2(k_PF, ARRAY_SIZE(k_PF), i);
2759     }
2760   }
2761   #endif
2762 }
2763 
2764 
2765 #ifdef _WIN32
2766 #ifndef UNDER_CE
2767 
2768 typedef void (WINAPI * Func_RtlGetVersion) (OSVERSIONINFOEXW *);
2769 
My_RtlGetVersion(OSVERSIONINFOEXW * vi)2770 static BOOL My_RtlGetVersion(OSVERSIONINFOEXW *vi)
2771 {
2772   HMODULE ntdll = ::GetModuleHandleW(L"ntdll.dll");
2773   if (!ntdll)
2774     return FALSE;
2775   Func_RtlGetVersion func = (Func_RtlGetVersion)GetProcAddress(ntdll, "RtlGetVersion");
2776   if (!func)
2777     return FALSE;
2778   func(vi);
2779   return TRUE;
2780 }
2781 
2782 #endif
2783 #endif
2784 
2785 
Bench(DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback * printCallback,IBenchCallback * benchCallback,const CObjectVector<CProperty> & props,UInt32 numIterations,bool multiDict)2786 HRESULT Bench(
2787     DECL_EXTERNAL_CODECS_LOC_VARS
2788     IBenchPrintCallback *printCallback,
2789     IBenchCallback *benchCallback,
2790     // IBenchFreqCallback *freqCallback,
2791     const CObjectVector<CProperty> &props,
2792     UInt32 numIterations,
2793     bool multiDict)
2794 {
2795   if (!CrcInternalTest())
2796     return S_FALSE;
2797 
2798   UInt32 numCPUs = 1;
2799   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
2800 
2801   NSystem::CProcessAffinity threadsInfo;
2802   threadsInfo.InitST();
2803 
2804   #ifndef _7ZIP_ST
2805 
2806   if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0)
2807     numCPUs = threadsInfo.GetNumProcessThreads();
2808   else
2809     numCPUs = NSystem::GetNumberOfProcessors();
2810 
2811   #endif
2812 
2813   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
2814 
2815   UInt32 numThreadsSpecified = numCPUs;
2816 
2817   UInt32 testTime = kComplexInSeconds;
2818 
2819   UInt64 specifiedFreq = 0;
2820 
2821   bool multiThreadTests = false;
2822 
2823   COneMethodInfo method;
2824 
2825   CBenchBuffer fileDataBuffer;
2826 
2827   {
2828   unsigned i;
2829   for (i = 0; i < props.Size(); i++)
2830   {
2831     const CProperty &property = props[i];
2832     UString name (property.Name);
2833     name.MakeLower_Ascii();
2834 
2835     if (name.IsEqualTo("file"))
2836     {
2837       if (property.Value.IsEmpty())
2838         return E_INVALIDARG;
2839 
2840       #ifdef USE_WIN_FILE
2841 
2842       NFile::NIO::CInFile file;
2843       if (!file.Open(us2fs(property.Value)))
2844         return E_INVALIDARG;
2845       UInt64 len;
2846       if (!file.GetLength(len))
2847         return E_FAIL;
2848       if (len >= ((UInt32)1 << 31) || len == 0)
2849         return E_INVALIDARG;
2850       if (!fileDataBuffer.Alloc((size_t)len))
2851         return E_OUTOFMEMORY;
2852       UInt32 processedSize;
2853       file.Read(fileDataBuffer.Buffer, (UInt32)len, processedSize);
2854       if (processedSize != len)
2855         return E_FAIL;
2856       if (printCallback)
2857       {
2858         printCallback->Print("file size =");
2859         PrintNumber(*printCallback, len, 0);
2860         printCallback->NewLine();
2861       }
2862       continue;
2863 
2864       #else
2865 
2866       return E_NOTIMPL;
2867 
2868       #endif
2869     }
2870 
2871     NCOM::CPropVariant propVariant;
2872     if (!property.Value.IsEmpty())
2873       ParseNumberString(property.Value, propVariant);
2874 
2875     if (name.IsEqualTo("time"))
2876     {
2877       RINOK(ParsePropToUInt32(UString(), propVariant, testTime));
2878       continue;
2879     }
2880 
2881     if (name.IsEqualTo("freq"))
2882     {
2883       UInt32 freq32 = 0;
2884       RINOK(ParsePropToUInt32(UString(), propVariant, freq32));
2885       if (freq32 == 0)
2886         return E_INVALIDARG;
2887       specifiedFreq = (UInt64)freq32 * 1000000;
2888 
2889       if (printCallback)
2890       {
2891         printCallback->Print("freq=");
2892         PrintNumber(*printCallback, freq32, 0);
2893         printCallback->NewLine();
2894       }
2895 
2896       continue;
2897     }
2898 
2899     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
2900     {
2901       UString s = name.Ptr(2);
2902       if (s.IsEqualTo("*")
2903           || s.IsEmpty() && propVariant.vt == VT_BSTR && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))
2904       {
2905         multiThreadTests = true;
2906         continue;
2907       }
2908       #ifndef _7ZIP_ST
2909       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
2910       #endif
2911       continue;
2912     }
2913 
2914     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
2915   }
2916   }
2917 
2918   if (printCallback)
2919   {
2920     #ifdef _WIN32
2921     #ifndef UNDER_CE
2922     {
2923       AString s;
2924       // OSVERSIONINFO vi;
2925       OSVERSIONINFOEXW vi;
2926       vi.dwOSVersionInfoSize = sizeof(vi);
2927       // if (::GetVersionEx(&vi))
2928       if (My_RtlGetVersion(&vi))
2929       {
2930         s += "Windows";
2931         if (vi.dwPlatformId != VER_PLATFORM_WIN32_NT)
2932           s.Add_UInt32(vi.dwPlatformId);
2933         s += " "; s.Add_UInt32(vi.dwMajorVersion);
2934         s += "."; s.Add_UInt32(vi.dwMinorVersion);
2935         s += " "; s.Add_UInt32(vi.dwBuildNumber);
2936         // s += " "; s += GetAnsiString(vi.szCSDVersion);
2937       }
2938       printCallback->Print(s);
2939       printCallback->NewLine();
2940     }
2941     #endif
2942     #endif
2943 
2944     {
2945       AString s1, s2;
2946       GetSysInfo(s1, s2);
2947       if (!s1.IsEmpty() || !s2.IsEmpty())
2948       {
2949         printCallback->Print(s1);
2950         if (s1 != s2 && !s2.IsEmpty())
2951         {
2952           printCallback->Print(" - ");
2953           printCallback->Print(s2);
2954         }
2955         printCallback->NewLine();
2956       }
2957     }
2958     {
2959       AString s;
2960       GetCpuFeatures(s);
2961       if (!s.IsEmpty())
2962       {
2963         printCallback->Print(s);
2964         printCallback->NewLine();
2965       }
2966     }
2967     {
2968       AString s;
2969       GetCpuName(s);
2970       if (!s.IsEmpty())
2971       {
2972         printCallback->Print(s);
2973         printCallback->NewLine();
2974       }
2975     }
2976   }
2977 
2978   if (printCallback)
2979   {
2980     printCallback->Print("CPU Freq:");
2981   }
2982 
2983   UInt64 complexInCommands = kComplexInCommands;
2984 
2985   if (printCallback /* || freqCallback */)
2986   {
2987     UInt64 numMilCommands = 1 << 6;
2988     if (specifiedFreq != 0)
2989     {
2990       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
2991         numMilCommands >>= 1;
2992     }
2993 
2994     for (int jj = 0;; jj++)
2995     {
2996       if (printCallback)
2997         RINOK(printCallback->CheckBreak());
2998 
2999       UInt64 start = ::GetTimeCount();
3000       UInt32 sum = (UInt32)start;
3001       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3002       if (sum == 0xF1541213)
3003         if (printCallback)
3004           printCallback->Print("");
3005       const UInt64 realDelta = ::GetTimeCount() - start;
3006       start = realDelta;
3007       if (start == 0)
3008         start = 1;
3009       UInt64 freq = GetFreq();
3010       // mips is constant in some compilers
3011       const UInt64 mipsVal = numMilCommands * freq / start;
3012       if (printCallback)
3013       {
3014         if (realDelta == 0)
3015         {
3016           printCallback->Print(" -");
3017         }
3018         else
3019         {
3020           // PrintNumber(*printCallback, start, 0);
3021           PrintNumber(*printCallback, mipsVal, 5);
3022         }
3023       }
3024       /*
3025       if (freqCallback)
3026         freqCallback->AddCpuFreq(mipsVal);
3027       */
3028 
3029       if (jj >= 3)
3030       {
3031         SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
3032         if (jj >= 8 || start >= freq)
3033           break;
3034         // break; // change it
3035         numMilCommands <<= 1;
3036       }
3037     }
3038   }
3039 
3040   if (printCallback)
3041   {
3042     printCallback->NewLine();
3043     printCallback->NewLine();
3044     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
3045     printCallback->Print(GetProcessThreadsInfo(threadsInfo));
3046     printCallback->NewLine();
3047   }
3048 
3049   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
3050     return E_INVALIDARG;
3051 
3052   UInt32 dict;
3053   bool dictIsDefined = method.Get_DicSize(dict);
3054 
3055   if (method.MethodName.IsEmpty())
3056     method.MethodName = "LZMA";
3057 
3058   if (benchCallback)
3059   {
3060     CBenchProps benchProps;
3061     benchProps.SetLzmaCompexity();
3062     UInt32 dictSize = method.Get_Lzma_DicSize();
3063     UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
3064     return MethodBench(
3065         EXTERNAL_CODECS_LOC_VARS
3066         complexInCommands,
3067         true, numThreadsSpecified,
3068         method,
3069         uncompressedDataSize, fileDataBuffer.Buffer,
3070         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
3071   }
3072 
3073   AString methodName (method.MethodName);
3074   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
3075     methodName = "crc32";
3076   method.MethodName = methodName;
3077   CMethodId hashID;
3078 
3079   if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
3080   {
3081     if (!printCallback)
3082       return S_FALSE;
3083     IBenchPrintCallback &f = *printCallback;
3084     if (!dictIsDefined)
3085       dict = (1 << 24);
3086 
3087 
3088     // methhodName.RemoveChar(L'-');
3089     UInt32 complexity = 10000;
3090     const UInt32 *checkSum = NULL;
3091     {
3092       unsigned i;
3093       for (i = 0; i < ARRAY_SIZE(g_Hash); i++)
3094       {
3095         const CBenchHash &h = g_Hash[i];
3096         AString benchMethod (h.Name);
3097         AString benchProps;
3098         int propPos = benchMethod.Find(':');
3099         if (propPos >= 0)
3100         {
3101           benchProps = benchMethod.Ptr(propPos + 1);
3102           benchMethod.DeleteFrom(propPos);
3103         }
3104 
3105         if (AreSameMethodNames(benchMethod, methodName))
3106         {
3107           if (benchProps.IsEmpty()
3108               || benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps == "8" && method.PropsString.IsEmpty()
3109               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3110           {
3111             complexity = h.Complex;
3112             checkSum = &h.CheckSum;
3113             if (method.PropsString.IsEqualTo_Ascii_NoCase(benchProps))
3114               break;
3115           }
3116         }
3117       }
3118       if (i == ARRAY_SIZE(g_Hash))
3119         return E_NOTIMPL;
3120     }
3121 
3122     f.NewLine();
3123     f.Print("Size");
3124     const unsigned kFieldSize_CrcSpeed = 6;
3125     unsigned numThreadsTests = 0;
3126     for (;;)
3127     {
3128       UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
3129       PrintNumber(f, t, kFieldSize_CrcSpeed);
3130       numThreadsTests++;
3131       if (t >= numThreadsSpecified)
3132         break;
3133     }
3134     f.NewLine();
3135     f.NewLine();
3136     CTempValues speedTotals(numThreadsTests);
3137     {
3138       for (unsigned ti = 0; ti < numThreadsTests; ti++)
3139         speedTotals.Values[ti] = 0;
3140     }
3141 
3142     UInt64 numSteps = 0;
3143     for (UInt32 i = 0; i < numIterations; i++)
3144     {
3145       for (unsigned pow = 10; pow < 32; pow++)
3146       {
3147         UInt32 bufSize = (UInt32)1 << pow;
3148         if (bufSize > dict)
3149           break;
3150         char s[16];
3151         ConvertUInt32ToString(pow, s);
3152         unsigned pos = MyStringLen(s);
3153         s[pos++] = ':';
3154         s[pos++] = ' ';
3155         s[pos] = 0;
3156         f.Print(s);
3157 
3158         for (unsigned ti = 0; ti < numThreadsTests; ti++)
3159         {
3160           RINOK(f.CheckBreak());
3161           UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
3162           UInt64 speed = 0;
3163           RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
3164               t, bufSize, speed,
3165               complexity,
3166               1, // benchWeight,
3167               (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
3168           PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
3169           speedTotals.Values[ti] += speed;
3170         }
3171         f.NewLine();
3172         numSteps++;
3173       }
3174     }
3175     if (numSteps != 0)
3176     {
3177       f.NewLine();
3178       f.Print("Avg:");
3179       for (unsigned ti = 0; ti < numThreadsTests; ti++)
3180       {
3181         PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
3182       }
3183       f.NewLine();
3184     }
3185     return S_OK;
3186   }
3187 
3188   bool use2Columns = false;
3189 
3190   bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
3191   bool onlyHashBench = false;
3192   if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
3193   {
3194     onlyHashBench = true;
3195     totalBenchMode = true;
3196   }
3197 
3198   // ---------- Threads loop ----------
3199   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
3200   {
3201 
3202   UInt32 numThreads = numThreadsSpecified;
3203 
3204   if (!multiThreadTests)
3205   {
3206     if (threadsPassIndex != 0)
3207       break;
3208   }
3209   else
3210   {
3211     numThreads = 1;
3212     if (threadsPassIndex != 0)
3213     {
3214       if (numCPUs < 2)
3215         break;
3216       numThreads = numCPUs;
3217       if (threadsPassIndex == 1)
3218       {
3219         if (numCPUs >= 4)
3220           numThreads = numCPUs / 2;
3221       }
3222       else if (numCPUs < 4)
3223         break;
3224     }
3225   }
3226 
3227   CBenchCallbackToPrint callback;
3228   callback.Init();
3229   callback._file = printCallback;
3230 
3231   IBenchPrintCallback &f = *printCallback;
3232 
3233   if (threadsPassIndex > 0)
3234   {
3235     f.NewLine();
3236     f.NewLine();
3237   }
3238 
3239   if (!dictIsDefined)
3240   {
3241     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
3242     unsigned dicSizeLog = dicSizeLog_Main;
3243 
3244     #ifdef UNDER_CE
3245     dicSizeLog = (UInt64)1 << 20;
3246     #endif
3247 
3248     if (ramSize_Defined)
3249     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
3250       if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
3251         break;
3252 
3253     dict = (UInt32)1 << dicSizeLog;
3254 
3255     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
3256     {
3257       f.Print("Dictionary reduced to: ");
3258       PrintNumber(f, dicSizeLog, 1);
3259       f.NewLine();
3260     }
3261   }
3262 
3263   PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads:   ", numThreads);
3264   f.NewLine();
3265 
3266   f.NewLine();
3267 
3268   if (totalBenchMode)
3269   {
3270     callback.NameFieldSize = kFieldSize_Name;
3271     use2Columns = false;
3272   }
3273   else
3274   {
3275     callback.NameFieldSize = kFieldSize_SmallName;
3276     use2Columns = true;
3277   }
3278   callback.Use2Columns = use2Columns;
3279 
3280   bool showFreq = false;
3281   UInt64 cpuFreq = 0;
3282 
3283   if (totalBenchMode)
3284   {
3285     showFreq = true;
3286   }
3287 
3288   unsigned fileldSize = kFieldSize_TotalSize;
3289   if (showFreq)
3290     fileldSize += kFieldSize_EUAndEffec;
3291 
3292   if (use2Columns)
3293   {
3294     PrintSpaces(f, callback.NameFieldSize);
3295     PrintRight(f, "Compressing", fileldSize);
3296     f.Print(kSep);
3297     PrintRight(f, "Decompressing", fileldSize);
3298   }
3299   f.NewLine();
3300   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
3301 
3302   int j;
3303 
3304   for (j = 0; j < 2; j++)
3305   {
3306     PrintRight(f, "Speed", kFieldSize_Speed + 1);
3307     PrintRight(f, "Usage", kFieldSize_Usage + 1);
3308     PrintRight(f, "R/U", kFieldSize_RU + 1);
3309     PrintRight(f, "Rating", kFieldSize_Rating + 1);
3310     if (showFreq)
3311     {
3312       PrintRight(f, "E/U", kFieldSize_EU + 1);
3313       PrintRight(f, "Effec", kFieldSize_Effec + 1);
3314     }
3315     if (!use2Columns)
3316       break;
3317     if (j == 0)
3318       f.Print(kSep);
3319   }
3320 
3321   f.NewLine();
3322   PrintSpaces(f, callback.NameFieldSize);
3323 
3324   for (j = 0; j < 2; j++)
3325   {
3326     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
3327     PrintRight(f, "%", kFieldSize_Usage + 1);
3328     PrintRight(f, "MIPS", kFieldSize_RU + 1);
3329     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
3330     if (showFreq)
3331     {
3332       PrintRight(f, "%", kFieldSize_EU + 1);
3333       PrintRight(f, "%", kFieldSize_Effec + 1);
3334     }
3335     if (!use2Columns)
3336       break;
3337     if (j == 0)
3338       f.Print(kSep);
3339   }
3340 
3341   f.NewLine();
3342   f.NewLine();
3343 
3344   if (specifiedFreq != 0)
3345     cpuFreq = specifiedFreq;
3346 
3347 
3348   if (totalBenchMode)
3349   {
3350     for (UInt32 i = 0; i < numIterations; i++)
3351     {
3352       if (i != 0)
3353         printCallback->NewLine();
3354       HRESULT res;
3355 
3356       const unsigned kNumCpuTests = 3;
3357       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
3358       {
3359         PrintLeft(f, "CPU", kFieldSize_Name);
3360         UInt32 resVal;
3361         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3362             (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
3363             specifiedFreq,
3364             cpuFreq, resVal));
3365         callback.NewLine();
3366 
3367         if (specifiedFreq != 0)
3368           cpuFreq = specifiedFreq;
3369 
3370         if (freqTest == kNumCpuTests - 1)
3371           SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
3372       }
3373       callback.NewLine();
3374 
3375       callback.SetFreq(true, cpuFreq);
3376 
3377       if (!onlyHashBench)
3378       {
3379         res = TotalBench(EXTERNAL_CODECS_LOC_VARS
3380             complexInCommands, numThreads,
3381             dictIsDefined || fileDataBuffer.Buffer, // forceUnpackSize
3382             fileDataBuffer.Buffer ? fileDataBuffer.BufferSize : dict,
3383             fileDataBuffer.Buffer,
3384             printCallback, &callback);
3385         RINOK(res);
3386       }
3387 
3388       res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
3389           1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
3390       RINOK(res);
3391 
3392       callback.NewLine();
3393       {
3394         PrintLeft(f, "CPU", kFieldSize_Name);
3395         UInt32 resVal;
3396         UInt64 cpuFreqLastTemp = cpuFreq;
3397         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3398             specifiedFreq != 0, // showFreq
3399             specifiedFreq,
3400             cpuFreqLastTemp, resVal));
3401         callback.NewLine();
3402       }
3403     }
3404   }
3405   else
3406   {
3407     bool needSetComplexity = true;
3408     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
3409     {
3410       unsigned i;
3411       for (i = 0; i < ARRAY_SIZE(g_Bench); i++)
3412       {
3413         const CBenchMethod &h = g_Bench[i];
3414         AString benchMethod (h.Name);
3415         AString benchProps;
3416         int propPos = benchMethod.Find(':');
3417         if (propPos >= 0)
3418         {
3419           benchProps = benchMethod.Ptr(propPos + 1);
3420           benchMethod.DeleteFrom(propPos);
3421         }
3422 
3423         if (AreSameMethodNames(benchMethod, methodName))
3424         {
3425           if (benchProps.IsEmpty()
3426               || benchProps == "x5" && method.PropsString.IsEmpty()
3427               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3428           {
3429             callback.BenchProps.EncComplex = h.EncComplex;
3430             callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
3431             callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
3432             needSetComplexity = false;
3433             break;
3434           }
3435         }
3436       }
3437       if (i == ARRAY_SIZE(g_Bench))
3438         return E_NOTIMPL;
3439     }
3440     if (needSetComplexity)
3441       callback.BenchProps.SetLzmaCompexity();
3442 
3443   for (unsigned i = 0; i < numIterations; i++)
3444   {
3445     const unsigned kStartDicLog = 22;
3446     unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
3447     if (!multiDict)
3448       pow = 31;
3449     while (((UInt32)1 << pow) > dict && pow > 0)
3450       pow--;
3451     for (; ((UInt32)1 << pow) <= dict; pow++)
3452     {
3453       char s[16];
3454       ConvertUInt32ToString(pow, s);
3455       unsigned pos = MyStringLen(s);
3456       s[pos++] = ':';
3457       s[pos] = 0;
3458       PrintLeft(f, s, kFieldSize_SmallName);
3459       callback.DictSize = (UInt32)1 << pow;
3460 
3461       COneMethodInfo method2 = method;
3462 
3463       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
3464       {
3465         // We add dictionary size property.
3466         // method2 can have two different dictionary size properties.
3467         // And last property is main.
3468         NCOM::CPropVariant propVariant = (UInt32)pow;
3469         RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant));
3470       }
3471 
3472       size_t uncompressedDataSize;
3473       if (fileDataBuffer.Buffer)
3474       {
3475         uncompressedDataSize = fileDataBuffer.BufferSize;
3476       }
3477       else
3478       {
3479         uncompressedDataSize = callback.DictSize;
3480         if (uncompressedDataSize >= (1 << 18))
3481           uncompressedDataSize += kAdditionalSize;
3482       }
3483 
3484       HRESULT res = MethodBench(
3485           EXTERNAL_CODECS_LOC_VARS
3486           complexInCommands,
3487           true, numThreads,
3488           method2,
3489           uncompressedDataSize, fileDataBuffer.Buffer,
3490           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
3491       f.NewLine();
3492       RINOK(res);
3493       if (!multiDict)
3494         break;
3495     }
3496   }
3497   }
3498 
3499   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
3500 
3501   if (use2Columns)
3502   {
3503     f.Print(kSep);
3504     PrintChars(f, '-', fileldSize);
3505   }
3506 
3507   f.NewLine();
3508 
3509   if (use2Columns)
3510   {
3511     PrintLeft(f, "Avr:", callback.NameFieldSize);
3512     PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
3513     f.Print(kSep);
3514     PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
3515     f.NewLine();
3516   }
3517 
3518   PrintLeft(f, "Tot:", callback.NameFieldSize);
3519   CTotalBenchRes midRes;
3520   midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
3521   PrintTotals(f, showFreq, cpuFreq, midRes);
3522   f.NewLine();
3523 
3524   }
3525   return S_OK;
3526 }
3527