1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 #include "Bench.h"
6
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <malloc.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/Alloc.h"
33
34 #ifndef _7ZIP_ST
35 #include "../../../Windows/Synchronization.h"
36 #include "../../../Windows/Thread.h"
37 #endif
38
39 #include "../../../Windows/PropVariant.h"
40
41 static const UInt32 kUncompressMinBlockSize =
42 #ifdef UNDER_CE
43 1 << 24;
44 #else
45 1 << 26;
46 #endif
47
48 static const UInt32 kCrcBlockSize =
49 #ifdef UNDER_CE
50 1 << 25;
51 #else
52 1 << 30;
53 #endif
54
55 static const UInt32 kAdditionalSize = (1 << 16);
56 static const UInt32 kCompressedAdditionalSize = (1 << 10);
57 static const UInt32 kMaxLzmaPropSize = 5;
58
59 class CBaseRandomGenerator
60 {
61 UInt32 A1;
62 UInt32 A2;
63 public:
CBaseRandomGenerator()64 CBaseRandomGenerator() { Init(); }
Init()65 void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()66 UInt32 GetRnd()
67 {
68 return
69 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
70 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
71 }
72 };
73
74 class CBenchBuffer
75 {
76 public:
77 size_t BufferSize;
78 Byte *Buffer;
CBenchBuffer()79 CBenchBuffer(): Buffer(0) {}
~CBenchBuffer()80 virtual ~CBenchBuffer() { Free(); }
Free()81 void Free()
82 {
83 ::MidFree(Buffer);
84 Buffer = 0;
85 }
Alloc(size_t bufferSize)86 bool Alloc(size_t bufferSize)
87 {
88 if (Buffer != 0 && BufferSize == bufferSize)
89 return true;
90 Free();
91 Buffer = (Byte *)::MidAlloc(bufferSize);
92 BufferSize = bufferSize;
93 return (Buffer != 0);
94 }
95 };
96
97 class CBenchRandomGenerator: public CBenchBuffer
98 {
99 CBaseRandomGenerator *RG;
100 public:
Set(CBaseRandomGenerator * rg)101 void Set(CBaseRandomGenerator *rg) { RG = rg; }
GetVal(UInt32 & res,int numBits)102 UInt32 GetVal(UInt32 &res, int numBits)
103 {
104 UInt32 val = res & (((UInt32)1 << numBits) - 1);
105 res >>= numBits;
106 return val;
107 }
GetLen(UInt32 & res)108 UInt32 GetLen(UInt32 &res)
109 {
110 UInt32 len = GetVal(res, 2);
111 return GetVal(res, 1 + len);
112 }
Generate()113 void Generate()
114 {
115 UInt32 pos = 0;
116 UInt32 rep0 = 1;
117 while (pos < BufferSize)
118 {
119 UInt32 res = RG->GetRnd();
120 res >>= 1;
121 if (GetVal(res, 1) == 0 || pos < 1024)
122 Buffer[pos++] = (Byte)(res & 0xFF);
123 else
124 {
125 UInt32 len;
126 len = 1 + GetLen(res);
127 if (GetVal(res, 3) != 0)
128 {
129 len += GetLen(res);
130 do
131 {
132 UInt32 ppp = GetVal(res, 5) + 6;
133 res = RG->GetRnd();
134 if (ppp > 30)
135 continue;
136 rep0 = /* (1 << ppp) +*/ GetVal(res, ppp);
137 res = RG->GetRnd();
138 }
139 while (rep0 >= pos);
140 rep0++;
141 }
142
143 for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
144 Buffer[pos] = Buffer[pos - rep0];
145 }
146 }
147 }
148 };
149
150
151 class CBenchmarkInStream:
152 public ISequentialInStream,
153 public CMyUnknownImp
154 {
155 const Byte *Data;
156 size_t Pos;
157 size_t Size;
158 public:
159 MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)160 void Init(const Byte *data, size_t size)
161 {
162 Data = data;
163 Size = size;
164 Pos = 0;
165 }
166 STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
167 };
168
Read(void * data,UInt32 size,UInt32 * processedSize)169 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
170 {
171 size_t remain = Size - Pos;
172 UInt32 kMaxBlockSize = (1 << 20);
173 if (size > kMaxBlockSize)
174 size = kMaxBlockSize;
175 if (size > remain)
176 size = (UInt32)remain;
177 for (UInt32 i = 0; i < size; i++)
178 ((Byte *)data)[i] = Data[Pos + i];
179 Pos += size;
180 if(processedSize != NULL)
181 *processedSize = size;
182 return S_OK;
183 }
184
185 class CBenchmarkOutStream:
186 public ISequentialOutStream,
187 public CBenchBuffer,
188 public CMyUnknownImp
189 {
190 // bool _overflow;
191 public:
192 UInt32 Pos;
193 // CBenchmarkOutStream(): _overflow(false) {}
Init()194 void Init()
195 {
196 // _overflow = false;
197 Pos = 0;
198 }
199 MY_UNKNOWN_IMP
200 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
201 };
202
Write(const void * data,UInt32 size,UInt32 * processedSize)203 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
204 {
205 size_t curSize = BufferSize - Pos;
206 if (curSize > size)
207 curSize = size;
208 memcpy(Buffer + Pos, data, curSize);
209 Pos += (UInt32)curSize;
210 if(processedSize != NULL)
211 *processedSize = (UInt32)curSize;
212 if (curSize != size)
213 {
214 // _overflow = true;
215 return E_FAIL;
216 }
217 return S_OK;
218 }
219
220 class CCrcOutStream:
221 public ISequentialOutStream,
222 public CMyUnknownImp
223 {
224 public:
225 UInt32 Crc;
226 MY_UNKNOWN_IMP
Init()227 void Init() { Crc = CRC_INIT_VAL; }
228 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
229 };
230
Write(const void * data,UInt32 size,UInt32 * processedSize)231 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
232 {
233 Crc = CrcUpdate(Crc, data, size);
234 if (processedSize != NULL)
235 *processedSize = size;
236 return S_OK;
237 }
238
GetTimeCount()239 static UInt64 GetTimeCount()
240 {
241 #ifdef USE_POSIX_TIME
242 #ifdef USE_POSIX_TIME2
243 timeval v;
244 if (gettimeofday(&v, 0) == 0)
245 return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
246 return (UInt64)time(NULL) * 1000000;
247 #else
248 return time(NULL);
249 #endif
250 #else
251 /*
252 LARGE_INTEGER value;
253 if (::QueryPerformanceCounter(&value))
254 return value.QuadPart;
255 */
256 return GetTickCount();
257 #endif
258 }
259
GetFreq()260 static UInt64 GetFreq()
261 {
262 #ifdef USE_POSIX_TIME
263 #ifdef USE_POSIX_TIME2
264 return 1000000;
265 #else
266 return 1;
267 #endif
268 #else
269 /*
270 LARGE_INTEGER value;
271 if (::QueryPerformanceFrequency(&value))
272 return value.QuadPart;
273 */
274 return 1000;
275 #endif
276 }
277
278 #ifndef USE_POSIX_TIME
GetTime64(const FILETIME & t)279 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
280 #endif
281
GetUserTime()282 static UInt64 GetUserTime()
283 {
284 #ifdef USE_POSIX_TIME
285 return clock();
286 #else
287 FILETIME creationTime, exitTime, kernelTime, userTime;
288 if (
289 #ifdef UNDER_CE
290 ::GetThreadTimes(::GetCurrentThread()
291 #else
292 ::GetProcessTimes(::GetCurrentProcess()
293 #endif
294 , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
295 return GetTime64(userTime) + GetTime64(kernelTime);
296 return (UInt64)GetTickCount() * 10000;
297 #endif
298 }
299
GetUserFreq()300 static UInt64 GetUserFreq()
301 {
302 #ifdef USE_POSIX_TIME
303 return CLOCKS_PER_SEC;
304 #else
305 return 10000000;
306 #endif
307 }
308
309 class CBenchProgressStatus
310 {
311 #ifndef _7ZIP_ST
312 NWindows::NSynchronization::CCriticalSection CS;
313 #endif
314 public:
315 HRESULT Res;
316 bool EncodeMode;
SetResult(HRESULT res)317 void SetResult(HRESULT res)
318 {
319 #ifndef _7ZIP_ST
320 NWindows::NSynchronization::CCriticalSectionLock lock(CS);
321 #endif
322 Res = res;
323 }
GetResult()324 HRESULT GetResult()
325 {
326 #ifndef _7ZIP_ST
327 NWindows::NSynchronization::CCriticalSectionLock lock(CS);
328 #endif
329 return Res;
330 }
331 };
332
333 class CBenchProgressInfo:
334 public ICompressProgressInfo,
335 public CMyUnknownImp
336 {
337 public:
338 CBenchProgressStatus *Status;
339 CBenchInfo BenchInfo;
340 HRESULT Res;
341 IBenchCallback *callback;
CBenchProgressInfo()342 CBenchProgressInfo(): callback(0) {}
343 MY_UNKNOWN_IMP
344 STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
345 };
346
SetStartTime(CBenchInfo & bi)347 static void SetStartTime(CBenchInfo &bi)
348 {
349 bi.GlobalFreq = GetFreq();
350 bi.UserFreq = GetUserFreq();
351 bi.GlobalTime = ::GetTimeCount();
352 bi.UserTime = ::GetUserTime();
353 }
354
SetFinishTime(const CBenchInfo & biStart,CBenchInfo & dest)355 static void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
356 {
357 dest.GlobalFreq = GetFreq();
358 dest.UserFreq = GetUserFreq();
359 dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
360 dest.UserTime = ::GetUserTime() - biStart.UserTime;
361 }
362
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)363 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
364 {
365 HRESULT res = Status->GetResult();
366 if (res != S_OK)
367 return res;
368 if (!callback)
369 return res;
370 CBenchInfo info = BenchInfo;
371 SetFinishTime(BenchInfo, info);
372 if (Status->EncodeMode)
373 {
374 info.UnpackSize = *inSize;
375 info.PackSize = *outSize;
376 res = callback->SetEncodeResult(info, false);
377 }
378 else
379 {
380 info.PackSize = BenchInfo.PackSize + *inSize;
381 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
382 res = callback->SetDecodeResult(info, false);
383 }
384 if (res != S_OK)
385 Status->SetResult(res);
386 return res;
387 }
388
389 static const int kSubBits = 8;
390
GetLogSize(UInt32 size)391 static UInt32 GetLogSize(UInt32 size)
392 {
393 for (int i = kSubBits; i < 32; i++)
394 for (UInt32 j = 0; j < (1 << kSubBits); j++)
395 if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
396 return (i << kSubBits) + j;
397 return (32 << kSubBits);
398 }
399
NormalizeVals(UInt64 & v1,UInt64 & v2)400 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
401 {
402 while (v1 > 1000000)
403 {
404 v1 >>= 1;
405 v2 >>= 1;
406 }
407 }
408
GetUsage(const CBenchInfo & info)409 UInt64 GetUsage(const CBenchInfo &info)
410 {
411 UInt64 userTime = info.UserTime;
412 UInt64 userFreq = info.UserFreq;
413 UInt64 globalTime = info.GlobalTime;
414 UInt64 globalFreq = info.GlobalFreq;
415 NormalizeVals(userTime, userFreq);
416 NormalizeVals(globalFreq, globalTime);
417 if (userFreq == 0)
418 userFreq = 1;
419 if (globalTime == 0)
420 globalTime = 1;
421 return userTime * globalFreq * 1000000 / userFreq / globalTime;
422 }
423
GetRatingPerUsage(const CBenchInfo & info,UInt64 rating)424 UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
425 {
426 UInt64 userTime = info.UserTime;
427 UInt64 userFreq = info.UserFreq;
428 UInt64 globalTime = info.GlobalTime;
429 UInt64 globalFreq = info.GlobalFreq;
430 NormalizeVals(userFreq, userTime);
431 NormalizeVals(globalTime, globalFreq);
432 if (globalFreq == 0)
433 globalFreq = 1;
434 if (userTime == 0)
435 userTime = 1;
436 return userFreq * globalTime / globalFreq * rating / userTime;
437 }
438
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)439 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
440 {
441 UInt64 elTime = elapsedTime;
442 NormalizeVals(freq, elTime);
443 if (elTime == 0)
444 elTime = 1;
445 return value * freq / elTime;
446 }
447
GetCompressRating(UInt32 dictionarySize,UInt64 elapsedTime,UInt64 freq,UInt64 size)448 UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
449 {
450 UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
451 UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits));
452 UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
453 return MyMultDiv64(numCommands, elapsedTime, freq);
454 }
455
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt32 numIterations)456 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
457 {
458 UInt64 numCommands = (inSize * 200 + outSize * 4) * numIterations;
459 return MyMultDiv64(numCommands, elapsedTime, freq);
460 }
461
462 struct CEncoderInfo;
463
464 struct CEncoderInfo
465 {
466 #ifndef _7ZIP_ST
467 NWindows::CThread thread[2];
468 #endif
469 CMyComPtr<ICompressCoder> encoder;
470 CBenchProgressInfo *progressInfoSpec[2];
471 CMyComPtr<ICompressProgressInfo> progressInfo[2];
472 UInt32 NumIterations;
473 #ifdef USE_ALLOCA
474 size_t AllocaSize;
475 #endif
476
477 struct CDecoderInfo
478 {
479 CEncoderInfo *Encoder;
480 UInt32 DecoderIndex;
481 #ifdef USE_ALLOCA
482 size_t AllocaSize;
483 #endif
484 bool CallbackMode;
485 };
486 CDecoderInfo decodersInfo[2];
487
488 CMyComPtr<ICompressCoder> decoders[2];
489 HRESULT Results[2];
490 CBenchmarkOutStream *outStreamSpec;
491 CMyComPtr<ISequentialOutStream> outStream;
492 IBenchCallback *callback;
493 UInt32 crc;
494 UInt32 kBufferSize;
495 UInt32 compressedSize;
496 CBenchRandomGenerator rg;
497 CBenchmarkOutStream *propStreamSpec;
498 CMyComPtr<ISequentialOutStream> propStream;
499 HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
500 HRESULT Encode();
501 HRESULT Decode(UInt32 decoderIndex);
502
CEncoderInfoCEncoderInfo503 CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
504
505 #ifndef _7ZIP_ST
EncodeThreadFunctionCEncoderInfo506 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
507 {
508 CEncoderInfo *encoder = (CEncoderInfo *)param;
509 #ifdef USE_ALLOCA
510 alloca(encoder->AllocaSize);
511 #endif
512 HRESULT res = encoder->Encode();
513 encoder->Results[0] = res;
514 if (res != S_OK)
515 encoder->progressInfoSpec[0]->Status->SetResult(res);
516
517 return 0;
518 }
DecodeThreadFunctionCEncoderInfo519 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
520 {
521 CDecoderInfo *decoder = (CDecoderInfo *)param;
522 #ifdef USE_ALLOCA
523 alloca(decoder->AllocaSize);
524 #endif
525 CEncoderInfo *encoder = decoder->Encoder;
526 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
527 return 0;
528 }
529
CreateEncoderThreadCEncoderInfo530 HRESULT CreateEncoderThread()
531 {
532 return thread[0].Create(EncodeThreadFunction, this);
533 }
534
CreateDecoderThreadCEncoderInfo535 HRESULT CreateDecoderThread(int index, bool callbackMode
536 #ifdef USE_ALLOCA
537 , size_t allocaSize
538 #endif
539 )
540 {
541 CDecoderInfo &decoder = decodersInfo[index];
542 decoder.DecoderIndex = index;
543 decoder.Encoder = this;
544 #ifdef USE_ALLOCA
545 decoder.AllocaSize = allocaSize;
546 #endif
547 decoder.CallbackMode = callbackMode;
548 return thread[index].Create(DecodeThreadFunction, &decoder);
549 }
550 #endif
551 };
552
Init(UInt32 dictionarySize,UInt32 numThreads,CBaseRandomGenerator * rgLoc)553 HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
554 {
555 rg.Set(rgLoc);
556 kBufferSize = dictionarySize + kAdditionalSize;
557 UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
558 if (!rg.Alloc(kBufferSize))
559 return E_OUTOFMEMORY;
560 rg.Generate();
561 crc = CrcCalc(rg.Buffer, rg.BufferSize);
562
563 outStreamSpec = new CBenchmarkOutStream;
564 if (!outStreamSpec->Alloc(kCompressedBufferSize))
565 return E_OUTOFMEMORY;
566
567 outStream = outStreamSpec;
568
569 propStreamSpec = 0;
570 if (!propStream)
571 {
572 propStreamSpec = new CBenchmarkOutStream;
573 propStream = propStreamSpec;
574 }
575 if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
576 return E_OUTOFMEMORY;
577 propStreamSpec->Init();
578
579 PROPID propIDs[] =
580 {
581 NCoderPropID::kDictionarySize,
582 NCoderPropID::kNumThreads
583 };
584 const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
585 PROPVARIANT props[kNumProps];
586 props[0].vt = VT_UI4;
587 props[0].ulVal = dictionarySize;
588
589 props[1].vt = VT_UI4;
590 props[1].ulVal = numThreads;
591
592 {
593 CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
594 RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
595 if (!setCoderProperties)
596 return E_FAIL;
597 RINOK(setCoderProperties->SetCoderProperties(propIDs, props, kNumProps));
598
599 CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
600 encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
601 if (writeCoderProperties)
602 {
603 RINOK(writeCoderProperties->WriteCoderProperties(propStream));
604 }
605 }
606 return S_OK;
607 }
608
Encode()609 HRESULT CEncoderInfo::Encode()
610 {
611 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
612 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
613 inStreamSpec->Init(rg.Buffer, rg.BufferSize);
614 outStreamSpec->Init();
615
616 RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
617 compressedSize = outStreamSpec->Pos;
618 encoder.Release();
619 return S_OK;
620 }
621
Decode(UInt32 decoderIndex)622 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
623 {
624 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
625 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
626 CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
627
628 CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
629 decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
630 if (!compressSetDecoderProperties)
631 return E_FAIL;
632
633 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
634 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
635
636 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
637 pi->BenchInfo.UnpackSize = 0;
638 pi->BenchInfo.PackSize = 0;
639
640 for (UInt32 j = 0; j < NumIterations; j++)
641 {
642 inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
643 crcOutStreamSpec->Init();
644
645 RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
646 UInt64 outSize = kBufferSize;
647 RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
648 if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
649 return S_FALSE;
650 pi->BenchInfo.UnpackSize += kBufferSize;
651 pi->BenchInfo.PackSize += compressedSize;
652 }
653 decoder.Release();
654 return S_OK;
655 }
656
657 static const UInt32 kNumThreadsMax = (1 << 16);
658
659 struct CBenchEncoders
660 {
661 CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders662 CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders663 ~CBenchEncoders() { delete []encoders; }
664 };
665
LzmaBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt32 numThreads,UInt32 dictionarySize,IBenchCallback * callback)666 HRESULT LzmaBench(
667 DECL_EXTERNAL_CODECS_LOC_VARS
668 UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
669 {
670 UInt32 numEncoderThreads =
671 #ifndef _7ZIP_ST
672 (numThreads > 1 ? numThreads / 2 : 1);
673 #else
674 1;
675 #endif
676 UInt32 numSubDecoderThreads =
677 #ifndef _7ZIP_ST
678 (numThreads > 1 ? 2 : 1);
679 #else
680 1;
681 #endif
682 if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
683 {
684 return E_INVALIDARG;
685 }
686
687 CBenchEncoders encodersSpec(numEncoderThreads);
688 CEncoderInfo *encoders = encodersSpec.encoders;
689
690
691 UInt32 i;
692 for (i = 0; i < numEncoderThreads; i++)
693 {
694 CEncoderInfo &encoder = encoders[i];
695 encoder.callback = (i == 0) ? callback : 0;
696
697 const UInt32 kLzmaId = 0x030101;
698 RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS kLzmaId, encoder.encoder, true));
699 if (!encoder.encoder)
700 return E_NOTIMPL;
701 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
702 {
703 RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS kLzmaId, encoder.decoders[j], false));
704 if (!encoder.decoders[j])
705 return E_NOTIMPL;
706 }
707 }
708
709 CBaseRandomGenerator rg;
710 rg.Init();
711 for (i = 0; i < numEncoderThreads; i++)
712 {
713 RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
714 }
715
716 CBenchProgressStatus status;
717 status.Res = S_OK;
718 status.EncodeMode = true;
719
720 for (i = 0; i < numEncoderThreads; i++)
721 {
722 CEncoderInfo &encoder = encoders[i];
723 for (int j = 0; j < 2; j++)
724 {
725 encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
726 encoder.progressInfoSpec[j]->Status = &status;
727 }
728 if (i == 0)
729 {
730 encoder.progressInfoSpec[0]->callback = callback;
731 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
732 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
733 }
734
735 #ifndef _7ZIP_ST
736 if (numEncoderThreads > 1)
737 {
738 #ifdef USE_ALLOCA
739 encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
740 #endif
741 RINOK(encoder.CreateEncoderThread())
742 }
743 else
744 #endif
745 {
746 RINOK(encoder.Encode());
747 }
748 }
749 #ifndef _7ZIP_ST
750 if (numEncoderThreads > 1)
751 for (i = 0; i < numEncoderThreads; i++)
752 encoders[i].thread[0].Wait();
753 #endif
754
755 RINOK(status.Res);
756
757 CBenchInfo info;
758
759 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
760 info.UnpackSize = 0;
761 info.PackSize = 0;
762 info.NumIterations = 1; // progressInfoSpec->NumIterations;
763 for (i = 0; i < numEncoderThreads; i++)
764 {
765 CEncoderInfo &encoder = encoders[i];
766 info.UnpackSize += encoder.kBufferSize;
767 info.PackSize += encoder.compressedSize;
768 }
769 RINOK(callback->SetEncodeResult(info, true));
770
771
772 status.Res = S_OK;
773 status.EncodeMode = false;
774
775 UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
776 for (i = 0; i < numEncoderThreads; i++)
777 {
778 CEncoderInfo &encoder = encoders[i];
779 encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
780
781 if (i == 0)
782 {
783 encoder.progressInfoSpec[0]->callback = callback;
784 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
785 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
786 }
787
788 #ifndef _7ZIP_ST
789 if (numDecoderThreads > 1)
790 {
791 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
792 {
793 HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
794 #ifdef USE_ALLOCA
795 , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
796 #endif
797 );
798 RINOK(res);
799 }
800 }
801 else
802 #endif
803 {
804 RINOK(encoder.Decode(0));
805 }
806 }
807 #ifndef _7ZIP_ST
808 HRESULT res = S_OK;
809 if (numDecoderThreads > 1)
810 for (i = 0; i < numEncoderThreads; i++)
811 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
812 {
813 CEncoderInfo &encoder = encoders[i];
814 encoder.thread[j].Wait();
815 if (encoder.Results[j] != S_OK)
816 res = encoder.Results[j];
817 }
818 RINOK(res);
819 #endif
820 RINOK(status.Res);
821 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
822 #ifndef _7ZIP_ST
823 #ifdef UNDER_CE
824 if (numDecoderThreads > 1)
825 for (i = 0; i < numEncoderThreads; i++)
826 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
827 {
828 FILETIME creationTime, exitTime, kernelTime, userTime;
829 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
830 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
831 }
832 #endif
833 #endif
834 info.UnpackSize = 0;
835 info.PackSize = 0;
836 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
837 for (i = 0; i < numEncoderThreads; i++)
838 {
839 CEncoderInfo &encoder = encoders[i];
840 info.UnpackSize += encoder.kBufferSize;
841 info.PackSize += encoder.compressedSize;
842 }
843 RINOK(callback->SetDecodeResult(info, false));
844 RINOK(callback->SetDecodeResult(info, true));
845 return S_OK;
846 }
847
848
GetLZMAUsage(bool multiThread,UInt32 dictionary)849 inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
850 {
851 UInt32 hs = dictionary - 1;
852 hs |= (hs >> 1);
853 hs |= (hs >> 2);
854 hs |= (hs >> 4);
855 hs |= (hs >> 8);
856 hs >>= 1;
857 hs |= 0xFFFF;
858 if (hs > (1 << 24))
859 hs >>= 1;
860 hs++;
861 return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
862 (1 << 20) + (multiThread ? (6 << 20) : 0);
863 }
864
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary)865 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
866 {
867 const UInt32 kBufferSize = dictionary;
868 const UInt32 kCompressedBufferSize = (kBufferSize / 2);
869 UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
870 UInt32 numBigThreads = numThreads / numSubThreads;
871 return (kBufferSize + kCompressedBufferSize +
872 GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
873 }
874
CrcBig(const void * data,UInt32 size,UInt32 numCycles,UInt32 crcBase)875 static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
876 {
877 for (UInt32 i = 0; i < numCycles; i++)
878 if (CrcCalc(data, size) != crcBase)
879 return false;
880 return true;
881 }
882
883 #ifndef _7ZIP_ST
884 struct CCrcInfo
885 {
886 NWindows::CThread Thread;
887 const Byte *Data;
888 UInt32 Size;
889 UInt32 NumCycles;
890 UInt32 Crc;
891 bool Res;
WaitCCrcInfo892 void Wait()
893 {
894 Thread.Wait();
895 Thread.Close();
896 }
897 };
898
CrcThreadFunction(void * param)899 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
900 {
901 CCrcInfo *p = (CCrcInfo *)param;
902 p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
903 return 0;
904 }
905
906 struct CCrcThreads
907 {
908 UInt32 NumThreads;
909 CCrcInfo *Items;
CCrcThreadsCCrcThreads910 CCrcThreads(): Items(0), NumThreads(0) {}
WaitAllCCrcThreads911 void WaitAll()
912 {
913 for (UInt32 i = 0; i < NumThreads; i++)
914 Items[i].Wait();
915 NumThreads = 0;
916 }
~CCrcThreadsCCrcThreads917 ~CCrcThreads()
918 {
919 WaitAll();
920 delete []Items;
921 }
922 };
923 #endif
924
CrcCalc1(const Byte * buf,UInt32 size)925 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
926 {
927 UInt32 crc = CRC_INIT_VAL;;
928 for (UInt32 i = 0; i < size; i++)
929 crc = CRC_UPDATE_BYTE(crc, buf[i]);
930 return CRC_GET_DIGEST(crc);
931 }
932
RandGen(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)933 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
934 {
935 for (UInt32 i = 0; i < size; i++)
936 buf[i] = (Byte)RG.GetRnd();
937 }
938
RandGenCrc(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)939 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
940 {
941 RandGen(buf, size, RG);
942 return CrcCalc1(buf, size);
943 }
944
CrcInternalTest()945 bool CrcInternalTest()
946 {
947 CBenchBuffer buffer;
948 const UInt32 kBufferSize0 = (1 << 8);
949 const UInt32 kBufferSize1 = (1 << 10);
950 const UInt32 kCheckSize = (1 << 5);
951 if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
952 return false;
953 Byte *buf = buffer.Buffer;
954 UInt32 i;
955 for (i = 0; i < kBufferSize0; i++)
956 buf[i] = (Byte)i;
957 UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
958 if (crc1 != 0x29058C73)
959 return false;
960 CBaseRandomGenerator RG;
961 RandGen(buf + kBufferSize0, kBufferSize1, RG);
962 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
963 for (UInt32 j = 0; j < kCheckSize; j++)
964 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
965 return false;
966 return true;
967 }
968
CrcBench(UInt32 numThreads,UInt32 bufferSize,UInt64 & speed)969 HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
970 {
971 if (numThreads == 0)
972 numThreads = 1;
973
974 CBenchBuffer buffer;
975 size_t totalSize = (size_t)bufferSize * numThreads;
976 if (totalSize / numThreads != bufferSize)
977 return E_OUTOFMEMORY;
978 if (!buffer.Alloc(totalSize))
979 return E_OUTOFMEMORY;
980
981 Byte *buf = buffer.Buffer;
982 CBaseRandomGenerator RG;
983 UInt32 numCycles = (kCrcBlockSize) / ((bufferSize >> 2) + 1) + 1;
984
985 UInt64 timeVal;
986 #ifndef _7ZIP_ST
987 CCrcThreads threads;
988 if (numThreads > 1)
989 {
990 threads.Items = new CCrcInfo[numThreads];
991 UInt32 i;
992 for (i = 0; i < numThreads; i++)
993 {
994 CCrcInfo &info = threads.Items[i];
995 Byte *data = buf + (size_t)bufferSize * i;
996 info.Data = data;
997 info.NumCycles = numCycles;
998 info.Size = bufferSize;
999 info.Crc = RandGenCrc(data, bufferSize, RG);
1000 }
1001 timeVal = GetTimeCount();
1002 for (i = 0; i < numThreads; i++)
1003 {
1004 CCrcInfo &info = threads.Items[i];
1005 RINOK(info.Thread.Create(CrcThreadFunction, &info));
1006 threads.NumThreads++;
1007 }
1008 threads.WaitAll();
1009 for (i = 0; i < numThreads; i++)
1010 if (!threads.Items[i].Res)
1011 return S_FALSE;
1012 }
1013 else
1014 #endif
1015 {
1016 UInt32 crc = RandGenCrc(buf, bufferSize, RG);
1017 timeVal = GetTimeCount();
1018 if (!CrcBig(buf, bufferSize, numCycles, crc))
1019 return S_FALSE;
1020 }
1021 timeVal = GetTimeCount() - timeVal;
1022 if (timeVal == 0)
1023 timeVal = 1;
1024
1025 UInt64 size = (UInt64)numCycles * totalSize;
1026 speed = MyMultDiv64(size, timeVal, GetFreq());
1027 return S_OK;
1028 }
1029