1 /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2 2018-12-29 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "LzHash.h"
7 
8 #include "LzFindMt.h"
9 
MtSync_Construct(CMtSync * p)10 static void MtSync_Construct(CMtSync *p)
11 {
12   p->wasCreated = False;
13   p->csWasInitialized = False;
14   p->csWasEntered = False;
15   Thread_Construct(&p->thread);
16   Event_Construct(&p->canStart);
17   Event_Construct(&p->wasStarted);
18   Event_Construct(&p->wasStopped);
19   Semaphore_Construct(&p->freeSemaphore);
20   Semaphore_Construct(&p->filledSemaphore);
21 }
22 
MtSync_GetNextBlock(CMtSync * p)23 static void MtSync_GetNextBlock(CMtSync *p)
24 {
25   if (p->needStart)
26   {
27     p->numProcessedBlocks = 1;
28     p->needStart = False;
29     p->stopWriting = False;
30     p->exit = False;
31     Event_Reset(&p->wasStarted);
32     Event_Reset(&p->wasStopped);
33 
34     Event_Set(&p->canStart);
35     Event_Wait(&p->wasStarted);
36 
37     // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
38   }
39   else
40   {
41     CriticalSection_Leave(&p->cs);
42     p->csWasEntered = False;
43     p->numProcessedBlocks++;
44     Semaphore_Release1(&p->freeSemaphore);
45   }
46   Semaphore_Wait(&p->filledSemaphore);
47   CriticalSection_Enter(&p->cs);
48   p->csWasEntered = True;
49 }
50 
51 /* MtSync_StopWriting must be called if Writing was started */
52 
MtSync_StopWriting(CMtSync * p)53 static void MtSync_StopWriting(CMtSync *p)
54 {
55   UInt32 myNumBlocks = p->numProcessedBlocks;
56   if (!Thread_WasCreated(&p->thread) || p->needStart)
57     return;
58   p->stopWriting = True;
59   if (p->csWasEntered)
60   {
61     CriticalSection_Leave(&p->cs);
62     p->csWasEntered = False;
63   }
64   Semaphore_Release1(&p->freeSemaphore);
65 
66   Event_Wait(&p->wasStopped);
67 
68   while (myNumBlocks++ != p->numProcessedBlocks)
69   {
70     Semaphore_Wait(&p->filledSemaphore);
71     Semaphore_Release1(&p->freeSemaphore);
72   }
73   p->needStart = True;
74 }
75 
MtSync_Destruct(CMtSync * p)76 static void MtSync_Destruct(CMtSync *p)
77 {
78   if (Thread_WasCreated(&p->thread))
79   {
80     MtSync_StopWriting(p);
81     p->exit = True;
82     if (p->needStart)
83       Event_Set(&p->canStart);
84     Thread_Wait(&p->thread);
85     Thread_Close(&p->thread);
86   }
87   if (p->csWasInitialized)
88   {
89     CriticalSection_Delete(&p->cs);
90     p->csWasInitialized = False;
91   }
92 
93   Event_Close(&p->canStart);
94   Event_Close(&p->wasStarted);
95   Event_Close(&p->wasStopped);
96   Semaphore_Close(&p->freeSemaphore);
97   Semaphore_Close(&p->filledSemaphore);
98 
99   p->wasCreated = False;
100 }
101 
102 #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
103 
MtSync_Create2(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj,UInt32 numBlocks)104 static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
105 {
106   if (p->wasCreated)
107     return SZ_OK;
108 
109   RINOK_THREAD(CriticalSection_Init(&p->cs));
110   p->csWasInitialized = True;
111 
112   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
113   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
114   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
115 
116   RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
117   RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
118 
119   p->needStart = True;
120 
121   RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
122   p->wasCreated = True;
123   return SZ_OK;
124 }
125 
MtSync_Create(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj,UInt32 numBlocks)126 static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
127 {
128   SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
129   if (res != SZ_OK)
130     MtSync_Destruct(p);
131   return res;
132 }
133 
MtSync_Init(CMtSync * p)134 void MtSync_Init(CMtSync *p) { p->needStart = True; }
135 
136 #define kMtMaxValForNormalize 0xFFFFFFFF
137 
138 #define DEF_GetHeads2(name, v, action) \
139   static void GetHeads ## name(const Byte *p, UInt32 pos, \
140       UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
141     { action; for (; numHeads != 0; numHeads--) { \
142       const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++;  } }
143 
144 #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
145 
146 DEF_GetHeads2(2,  (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
147 DEF_GetHeads(3,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
148 DEF_GetHeads(4,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
149 DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
150 /* DEF_GetHeads(5,  (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
151 
HashThreadFunc(CMatchFinderMt * mt)152 static void HashThreadFunc(CMatchFinderMt *mt)
153 {
154   CMtSync *p = &mt->hashSync;
155   for (;;)
156   {
157     UInt32 numProcessedBlocks = 0;
158     Event_Wait(&p->canStart);
159     Event_Set(&p->wasStarted);
160 
161     MatchFinder_Init_HighHash(mt->MatchFinder);
162 
163     for (;;)
164     {
165       if (p->exit)
166         return;
167       if (p->stopWriting)
168       {
169         p->numProcessedBlocks = numProcessedBlocks;
170         Event_Set(&p->wasStopped);
171         break;
172       }
173 
174       {
175         CMatchFinder *mf = mt->MatchFinder;
176         if (MatchFinder_NeedMove(mf))
177         {
178           CriticalSection_Enter(&mt->btSync.cs);
179           CriticalSection_Enter(&mt->hashSync.cs);
180           {
181             const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
182             ptrdiff_t offset;
183             MatchFinder_MoveBlock(mf);
184             offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
185             mt->pointerToCurPos -= offset;
186             mt->buffer -= offset;
187           }
188           CriticalSection_Leave(&mt->btSync.cs);
189           CriticalSection_Leave(&mt->hashSync.cs);
190           continue;
191         }
192 
193         Semaphore_Wait(&p->freeSemaphore);
194 
195         MatchFinder_ReadIfRequired(mf);
196         if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
197         {
198           UInt32 subValue = (mf->pos - mf->historySize - 1);
199           MatchFinder_ReduceOffsets(mf, subValue);
200           MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
201         }
202         {
203           UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
204           UInt32 num = mf->streamPos - mf->pos;
205           heads[0] = 2;
206           heads[1] = num;
207           if (num >= mf->numHashBytes)
208           {
209             num = num - mf->numHashBytes + 1;
210             if (num > kMtHashBlockSize - 2)
211               num = kMtHashBlockSize - 2;
212             mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
213             heads[0] = 2 + num;
214           }
215           mf->pos += num;
216           mf->buffer += num;
217         }
218       }
219 
220       Semaphore_Release1(&p->filledSemaphore);
221     }
222   }
223 }
224 
MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt * p)225 static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
226 {
227   MtSync_GetNextBlock(&p->hashSync);
228   p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
229   p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
230   p->hashNumAvail = p->hashBuf[p->hashBufPos++];
231 }
232 
233 #define kEmptyHashValue 0
234 
235 #define MFMT_GM_INLINE
236 
237 #ifdef MFMT_GM_INLINE
238 
239 /*
240   we use size_t for _cyclicBufferPos instead of UInt32
241   to eliminate "movsx" BUG in old MSVC x64 compiler.
242 */
243 
244 MY_NO_INLINE
GetMatchesSpecN(UInt32 lenLimit,UInt32 pos,const Byte * cur,CLzRef * son,size_t _cyclicBufferPos,UInt32 _cyclicBufferSize,UInt32 _cutValue,UInt32 * distances,UInt32 _maxLen,const UInt32 * hash,const UInt32 * limit,UInt32 size,UInt32 * posRes)245 static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
246     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
247     UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
248 {
249   do
250   {
251   UInt32 *_distances = ++distances;
252   UInt32 delta = *hash++;
253 
254   CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
255   CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
256   unsigned len0 = 0, len1 = 0;
257   UInt32 cutValue = _cutValue;
258   unsigned maxLen = (unsigned)_maxLen;
259 
260   /*
261   if (size > 1)
262   {
263     UInt32 delta = *hash;
264     if (delta < _cyclicBufferSize)
265     {
266       UInt32 cyc1 = _cyclicBufferPos + 1;
267       CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
268       Byte b = *(cur + 1 - delta);
269       _distances[0] = pair[0];
270       _distances[1] = b;
271     }
272   }
273   */
274   if (cutValue == 0 || delta >= _cyclicBufferSize)
275   {
276     *ptr0 = *ptr1 = kEmptyHashValue;
277   }
278   else
279   for(;;)
280   {
281     {
282       CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
283       const Byte *pb = cur - delta;
284       unsigned len = (len0 < len1 ? len0 : len1);
285       UInt32 pair0 = *pair;
286       if (pb[len] == cur[len])
287       {
288         if (++len != lenLimit && pb[len] == cur[len])
289           while (++len != lenLimit)
290             if (pb[len] != cur[len])
291               break;
292         if (maxLen < len)
293         {
294           maxLen = len;
295           *distances++ = (UInt32)len;
296           *distances++ = delta - 1;
297           if (len == lenLimit)
298           {
299             UInt32 pair1 = pair[1];
300             *ptr1 = pair0;
301             *ptr0 = pair1;
302             break;
303           }
304         }
305       }
306       {
307         UInt32 curMatch = pos - delta;
308         // delta = pos - *pair;
309         // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
310         if (pb[len] < cur[len])
311         {
312           delta = pos - pair[1];
313           *ptr1 = curMatch;
314           ptr1 = pair + 1;
315           len1 = len;
316         }
317         else
318         {
319           delta = pos - *pair;
320           *ptr0 = curMatch;
321           ptr0 = pair;
322           len0 = len;
323         }
324       }
325     }
326     if (--cutValue == 0 || delta >= _cyclicBufferSize)
327     {
328       *ptr0 = *ptr1 = kEmptyHashValue;
329       break;
330     }
331   }
332   pos++;
333   _cyclicBufferPos++;
334   cur++;
335   {
336     UInt32 num = (UInt32)(distances - _distances);
337     _distances[-1] = num;
338   }
339   }
340   while (distances < limit && --size != 0);
341   *posRes = pos;
342   return distances;
343 }
344 
345 #endif
346 
347 
348 
BtGetMatches(CMatchFinderMt * p,UInt32 * distances)349 static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
350 {
351   UInt32 numProcessed = 0;
352   UInt32 curPos = 2;
353   UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); //  * 2
354 
355   distances[1] = p->hashNumAvail;
356 
357   while (curPos < limit)
358   {
359     if (p->hashBufPos == p->hashBufPosLimit)
360     {
361       MatchFinderMt_GetNextBlock_Hash(p);
362       distances[1] = numProcessed + p->hashNumAvail;
363       if (p->hashNumAvail >= p->numHashBytes)
364         continue;
365       distances[0] = curPos + p->hashNumAvail;
366       distances += curPos;
367       for (; p->hashNumAvail != 0; p->hashNumAvail--)
368         *distances++ = 0;
369       return;
370     }
371     {
372       UInt32 size = p->hashBufPosLimit - p->hashBufPos;
373       UInt32 lenLimit = p->matchMaxLen;
374       UInt32 pos = p->pos;
375       UInt32 cyclicBufferPos = p->cyclicBufferPos;
376       if (lenLimit >= p->hashNumAvail)
377         lenLimit = p->hashNumAvail;
378       {
379         UInt32 size2 = p->hashNumAvail - lenLimit + 1;
380         if (size2 < size)
381           size = size2;
382         size2 = p->cyclicBufferSize - cyclicBufferPos;
383         if (size2 < size)
384           size = size2;
385       }
386 
387       #ifndef MFMT_GM_INLINE
388       while (curPos < limit && size-- != 0)
389       {
390         UInt32 *startDistances = distances + curPos;
391         UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
392             pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
393             startDistances + 1, p->numHashBytes - 1) - startDistances);
394         *startDistances = num - 1;
395         curPos += num;
396         cyclicBufferPos++;
397         pos++;
398         p->buffer++;
399       }
400       #else
401       {
402         UInt32 posRes;
403         curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
404             distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
405             distances + limit,
406             size, &posRes) - distances);
407         p->hashBufPos += posRes - pos;
408         cyclicBufferPos += posRes - pos;
409         p->buffer += posRes - pos;
410         pos = posRes;
411       }
412       #endif
413 
414       numProcessed += pos - p->pos;
415       p->hashNumAvail -= pos - p->pos;
416       p->pos = pos;
417       if (cyclicBufferPos == p->cyclicBufferSize)
418         cyclicBufferPos = 0;
419       p->cyclicBufferPos = cyclicBufferPos;
420     }
421   }
422 
423   distances[0] = curPos;
424 }
425 
BtFillBlock(CMatchFinderMt * p,UInt32 globalBlockIndex)426 static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
427 {
428   CMtSync *sync = &p->hashSync;
429   if (!sync->needStart)
430   {
431     CriticalSection_Enter(&sync->cs);
432     sync->csWasEntered = True;
433   }
434 
435   BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
436 
437   if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
438   {
439     UInt32 subValue = p->pos - p->cyclicBufferSize;
440     MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
441     p->pos -= subValue;
442   }
443 
444   if (!sync->needStart)
445   {
446     CriticalSection_Leave(&sync->cs);
447     sync->csWasEntered = False;
448   }
449 }
450 
BtThreadFunc(CMatchFinderMt * mt)451 void BtThreadFunc(CMatchFinderMt *mt)
452 {
453   CMtSync *p = &mt->btSync;
454   for (;;)
455   {
456     UInt32 blockIndex = 0;
457     Event_Wait(&p->canStart);
458     Event_Set(&p->wasStarted);
459     for (;;)
460     {
461       if (p->exit)
462         return;
463       if (p->stopWriting)
464       {
465         p->numProcessedBlocks = blockIndex;
466         MtSync_StopWriting(&mt->hashSync);
467         Event_Set(&p->wasStopped);
468         break;
469       }
470       Semaphore_Wait(&p->freeSemaphore);
471       BtFillBlock(mt, blockIndex++);
472       Semaphore_Release1(&p->filledSemaphore);
473     }
474   }
475 }
476 
MatchFinderMt_Construct(CMatchFinderMt * p)477 void MatchFinderMt_Construct(CMatchFinderMt *p)
478 {
479   p->hashBuf = NULL;
480   MtSync_Construct(&p->hashSync);
481   MtSync_Construct(&p->btSync);
482 }
483 
MatchFinderMt_FreeMem(CMatchFinderMt * p,ISzAllocPtr alloc)484 static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
485 {
486   ISzAlloc_Free(alloc, p->hashBuf);
487   p->hashBuf = NULL;
488 }
489 
MatchFinderMt_Destruct(CMatchFinderMt * p,ISzAllocPtr alloc)490 void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
491 {
492   MtSync_Destruct(&p->hashSync);
493   MtSync_Destruct(&p->btSync);
494   MatchFinderMt_FreeMem(p, alloc);
495 }
496 
497 #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
498 #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
499 
HashThreadFunc2(void * p)500 static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
BtThreadFunc2(void * p)501 static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
502 {
503   Byte allocaDummy[0x180];
504   unsigned i = 0;
505   for (i = 0; i < 16; i++)
506     allocaDummy[i] = (Byte)0;
507   if (allocaDummy[0] == 0)
508     BtThreadFunc((CMatchFinderMt *)p);
509   return 0;
510 }
511 
MatchFinderMt_Create(CMatchFinderMt * p,UInt32 historySize,UInt32 keepAddBufferBefore,UInt32 matchMaxLen,UInt32 keepAddBufferAfter,ISzAllocPtr alloc)512 SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
513     UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
514 {
515   CMatchFinder *mf = p->MatchFinder;
516   p->historySize = historySize;
517   if (kMtBtBlockSize <= matchMaxLen * 4)
518     return SZ_ERROR_PARAM;
519   if (!p->hashBuf)
520   {
521     p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
522     if (!p->hashBuf)
523       return SZ_ERROR_MEM;
524     p->btBuf = p->hashBuf + kHashBufferSize;
525   }
526   keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
527   keepAddBufferAfter += kMtHashBlockSize;
528   if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
529     return SZ_ERROR_MEM;
530 
531   RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
532   RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
533   return SZ_OK;
534 }
535 
536 /* Call it after ReleaseStream / SetStream */
MatchFinderMt_Init(CMatchFinderMt * p)537 static void MatchFinderMt_Init(CMatchFinderMt *p)
538 {
539   CMatchFinder *mf = p->MatchFinder;
540 
541   p->btBufPos =
542   p->btBufPosLimit = 0;
543   p->hashBufPos =
544   p->hashBufPosLimit = 0;
545 
546   /* Init without data reading. We don't want to read data in this thread */
547   MatchFinder_Init_3(mf, False);
548   MatchFinder_Init_LowHash(mf);
549 
550   p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
551   p->btNumAvailBytes = 0;
552   p->lzPos = p->historySize + 1;
553 
554   p->hash = mf->hash;
555   p->fixedHashSize = mf->fixedHashSize;
556   p->crc = mf->crc;
557 
558   p->son = mf->son;
559   p->matchMaxLen = mf->matchMaxLen;
560   p->numHashBytes = mf->numHashBytes;
561   p->pos = mf->pos;
562   p->buffer = mf->buffer;
563   p->cyclicBufferPos = mf->cyclicBufferPos;
564   p->cyclicBufferSize = mf->cyclicBufferSize;
565   p->cutValue = mf->cutValue;
566 }
567 
568 /* ReleaseStream is required to finish multithreading */
MatchFinderMt_ReleaseStream(CMatchFinderMt * p)569 void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
570 {
571   MtSync_StopWriting(&p->btSync);
572   /* p->MatchFinder->ReleaseStream(); */
573 }
574 
MatchFinderMt_Normalize(CMatchFinderMt * p)575 static void MatchFinderMt_Normalize(CMatchFinderMt *p)
576 {
577   MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
578   p->lzPos = p->historySize + 1;
579 }
580 
MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt * p)581 static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
582 {
583   UInt32 blockIndex;
584   MtSync_GetNextBlock(&p->btSync);
585   blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
586   p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
587   p->btBufPosLimit += p->btBuf[p->btBufPos++];
588   p->btNumAvailBytes = p->btBuf[p->btBufPos++];
589   if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
590     MatchFinderMt_Normalize(p);
591 }
592 
MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt * p)593 static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
594 {
595   return p->pointerToCurPos;
596 }
597 
598 #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
599 
MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt * p)600 static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
601 {
602   GET_NEXT_BLOCK_IF_REQUIRED;
603   return p->btNumAvailBytes;
604 }
605 
MixMatches2(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * distances)606 static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
607 {
608   UInt32 h2, curMatch2;
609   UInt32 *hash = p->hash;
610   const Byte *cur = p->pointerToCurPos;
611   UInt32 lzPos = p->lzPos;
612   MT_HASH2_CALC
613 
614   curMatch2 = hash[h2];
615   hash[h2] = lzPos;
616 
617   if (curMatch2 >= matchMinPos)
618     if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
619     {
620       *distances++ = 2;
621       *distances++ = lzPos - curMatch2 - 1;
622     }
623 
624   return distances;
625 }
626 
MixMatches3(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * distances)627 static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
628 {
629   UInt32 h2, h3, curMatch2, curMatch3;
630   UInt32 *hash = p->hash;
631   const Byte *cur = p->pointerToCurPos;
632   UInt32 lzPos = p->lzPos;
633   MT_HASH3_CALC
634 
635   curMatch2 = hash[                h2];
636   curMatch3 = (hash + kFix3HashSize)[h3];
637 
638   hash[                h2] = lzPos;
639   (hash + kFix3HashSize)[h3] = lzPos;
640 
641   if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
642   {
643     distances[1] = lzPos - curMatch2 - 1;
644     if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
645     {
646       distances[0] = 3;
647       return distances + 2;
648     }
649     distances[0] = 2;
650     distances += 2;
651   }
652 
653   if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
654   {
655     *distances++ = 3;
656     *distances++ = lzPos - curMatch3 - 1;
657   }
658 
659   return distances;
660 }
661 
662 /*
663 static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
664 {
665   UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
666   UInt32 *hash = p->hash;
667   const Byte *cur = p->pointerToCurPos;
668   UInt32 lzPos = p->lzPos;
669   MT_HASH4_CALC
670 
671   curMatch2 = hash[                h2];
672   curMatch3 = (hash + kFix3HashSize)[h3];
673   curMatch4 = (hash + kFix4HashSize)[h4];
674 
675   hash[                h2] = lzPos;
676   (hash + kFix3HashSize)[h3] = lzPos;
677   (hash + kFix4HashSize)[h4] = lzPos;
678 
679   if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
680   {
681     distances[1] = lzPos - curMatch2 - 1;
682     if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
683     {
684       distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
685       return distances + 2;
686     }
687     distances[0] = 2;
688     distances += 2;
689   }
690 
691   if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
692   {
693     distances[1] = lzPos - curMatch3 - 1;
694     if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
695     {
696       distances[0] = 4;
697       return distances + 2;
698     }
699     distances[0] = 3;
700     distances += 2;
701   }
702 
703   if (curMatch4 >= matchMinPos)
704     if (
705       cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
706       cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
707       )
708     {
709       *distances++ = 4;
710       *distances++ = lzPos - curMatch4 - 1;
711     }
712 
713   return distances;
714 }
715 */
716 
717 #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
718 
MatchFinderMt2_GetMatches(CMatchFinderMt * p,UInt32 * distances)719 static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
720 {
721   const UInt32 *btBuf = p->btBuf + p->btBufPos;
722   UInt32 len = *btBuf++;
723   p->btBufPos += 1 + len;
724   p->btNumAvailBytes--;
725   {
726     UInt32 i;
727     for (i = 0; i < len; i += 2)
728     {
729       UInt32 v0 = btBuf[0];
730       UInt32 v1 = btBuf[1];
731       btBuf += 2;
732       distances[0] = v0;
733       distances[1] = v1;
734       distances += 2;
735     }
736   }
737   INCREASE_LZ_POS
738   return len;
739 }
740 
MatchFinderMt_GetMatches(CMatchFinderMt * p,UInt32 * distances)741 static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
742 {
743   const UInt32 *btBuf = p->btBuf + p->btBufPos;
744   UInt32 len = *btBuf++;
745   p->btBufPos += 1 + len;
746 
747   if (len == 0)
748   {
749     /* change for bt5 ! */
750     if (p->btNumAvailBytes-- >= 4)
751       len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
752   }
753   else
754   {
755     /* Condition: there are matches in btBuf with length < p->numHashBytes */
756     UInt32 *distances2;
757     p->btNumAvailBytes--;
758     distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
759     do
760     {
761       UInt32 v0 = btBuf[0];
762       UInt32 v1 = btBuf[1];
763       btBuf += 2;
764       distances2[0] = v0;
765       distances2[1] = v1;
766       distances2 += 2;
767     }
768     while ((len -= 2) != 0);
769     len = (UInt32)(distances2 - (distances));
770   }
771   INCREASE_LZ_POS
772   return len;
773 }
774 
775 #define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
776 #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
777 #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
778 
MatchFinderMt0_Skip(CMatchFinderMt * p,UInt32 num)779 static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
780 {
781   SKIP_HEADER2_MT { p->btNumAvailBytes--;
782   SKIP_FOOTER_MT
783 }
784 
785 static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
786 {
787   SKIP_HEADER_MT(2)
788       UInt32 h2;
789       MT_HASH2_CALC
790       hash[h2] = p->lzPos;
791   SKIP_FOOTER_MT
792 }
793 
794 static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
795 {
796   SKIP_HEADER_MT(3)
797       UInt32 h2, h3;
798       MT_HASH3_CALC
799       (hash + kFix3HashSize)[h3] =
800       hash[                h2] =
801         p->lzPos;
802   SKIP_FOOTER_MT
803 }
804 
805 /*
806 static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
807 {
808   SKIP_HEADER_MT(4)
809       UInt32 h2, h3, h4;
810       MT_HASH4_CALC
811       (hash + kFix4HashSize)[h4] =
812       (hash + kFix3HashSize)[h3] =
813       hash[                h2] =
814         p->lzPos;
815   SKIP_FOOTER_MT
816 }
817 */
818 
819 void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
820 {
821   vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
822   vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
823   vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
824   vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
825 
826   switch (p->MatchFinder->numHashBytes)
827   {
828     case 2:
829       p->GetHeadsFunc = GetHeads2;
830       p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
831       vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
832       vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
833       break;
834     case 3:
835       p->GetHeadsFunc = GetHeads3;
836       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
837       vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
838       break;
839     default:
840     /* case 4: */
841       p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
842       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
843       vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
844       break;
845     /*
846     default:
847       p->GetHeadsFunc = GetHeads5;
848       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
849       vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
850       break;
851     */
852   }
853 }
854