1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file arena.h
24 *
25 * @brief Arena memory manager
26 *        The arena is convenient and fast for managing allocations for any of
27 *        our allocations that are associated with operations and can all be freed
28 *        once when their operation has completed. Allocations are cheap since
29 *        most of the time its simply an increment of an offset. Also, no need to
30 *        free individual allocations. All of the arena memory can be freed at once.
31 *
32 ******************************************************************************/
33 #pragma once
34 
35 #include <mutex>
36 #include <algorithm>
37 #include <atomic>
38 #include "core/utils.h"
39 
40 static const size_t ARENA_BLOCK_ALIGN = 64;
41 
42 struct ArenaBlock
43 {
44     size_t      blockSize = 0;
45     ArenaBlock* pNext = nullptr;
46 };
47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
48     "Increase BLOCK_ALIGN size");
49 
50 class DefaultAllocator
51 {
52 public:
AllocateAligned(size_t size,size_t align)53     ArenaBlock* AllocateAligned(size_t size, size_t align)
54     {
55         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
56 
57         ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
58         p->blockSize = size;
59         return p;
60     }
61 
Free(ArenaBlock * pMem)62     void Free(ArenaBlock* pMem)
63     {
64         if (pMem)
65         {
66             SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
67             AlignedFree(pMem);
68         }
69     }
70 };
71 
72 // Caching Allocator for Arena
73 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
74 struct CachingAllocatorT : DefaultAllocator
75 {
AllocateAlignedCachingAllocatorT76     ArenaBlock* AllocateAligned(size_t size, size_t align)
77     {
78         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
79         SWR_ASSUME_ASSERT(size <= uint32_t(-1));
80 
81         uint32_t bucket = GetBucketId(size);
82 
83         {
84             // search cached blocks
85             std::lock_guard<std::mutex> l(m_mutex);
86             ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
87             ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
88 
89             if (pBlock)
90             {
91                 m_cachedSize -= pBlock->blockSize;
92                 if (pBlock == m_pLastCachedBlocks[bucket])
93                 {
94                     m_pLastCachedBlocks[bucket] = pPrevBlock;
95                 }
96             }
97             else
98             {
99                 pPrevBlock = &m_oldCachedBlocks[bucket];
100                 pBlock = SearchBlocks(pPrevBlock, size, align);
101 
102                 if (pBlock)
103                 {
104                     m_oldCachedSize -= pBlock->blockSize;
105                     if (pBlock == m_pOldLastCachedBlocks[bucket])
106                     {
107                         m_pOldLastCachedBlocks[bucket] = pPrevBlock;
108                     }
109                 }
110             }
111 
112             if (pBlock)
113             {
114                 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
115                 pPrevBlock->pNext = pBlock->pNext;
116                 pBlock->pNext = nullptr;
117 
118                 return pBlock;
119             }
120 
121             m_totalAllocated += size;
122 
123 #if 0
124             {
125                 static uint32_t count = 0;
126                 char buf[128];
127                 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
128                 OutputDebugStringA(buf);
129             }
130 #endif
131         }
132 
133         if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
134         {
135             // Make all blocks in this bucket the same size
136             size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
137         }
138 
139         return this->DefaultAllocator::AllocateAligned(size, align);
140     }
141 
FreeCachingAllocatorT142     void Free(ArenaBlock* pMem)
143     {
144         if (pMem)
145         {
146             std::unique_lock<std::mutex> l(m_mutex);
147             InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
148         }
149     }
150 
FreeOldBlocksCachingAllocatorT151     void FreeOldBlocks()
152     {
153         if (!m_cachedSize) { return; }
154         std::lock_guard<std::mutex> l(m_mutex);
155 
156         bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
157 
158         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
159         {
160             if (doFree)
161             {
162                 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
163                 while (pBlock)
164                 {
165                     ArenaBlock* pNext = pBlock->pNext;
166                     m_oldCachedSize -= pBlock->blockSize;
167                     m_totalAllocated -= pBlock->blockSize;
168                     this->DefaultAllocator::Free(pBlock);
169                     pBlock = pNext;
170                 }
171                 m_oldCachedBlocks[i].pNext = nullptr;
172                 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
173             }
174 
175             if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
176             {
177                 if (i && i < (CACHE_NUM_BUCKETS - 1))
178                 {
179                     // We know that all blocks are the same size.
180                     // Just move the list over.
181                     m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
182                     m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
183                     m_cachedBlocks[i].pNext = nullptr;
184                     if (m_pOldLastCachedBlocks[i]->pNext)
185                     {
186                         m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
187                     }
188                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
189                 }
190                 else
191                 {
192                     // The end buckets can have variable sized lists.
193                     // Insert each block based on size
194                     ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
195                     while (pBlock)
196                     {
197                         ArenaBlock* pNext = pBlock->pNext;
198                         pBlock->pNext = nullptr;
199                         m_cachedSize -= pBlock->blockSize;
200                         InsertCachedBlock<true>(i, pBlock);
201                         pBlock = pNext;
202                     }
203 
204                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
205                     m_cachedBlocks[i].pNext = nullptr;
206                 }
207             }
208         }
209 
210         m_oldCachedSize += m_cachedSize;
211         m_cachedSize = 0;
212     }
213 
CachingAllocatorTCachingAllocatorT214     CachingAllocatorT()
215     {
216         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
217         {
218             m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
219             m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
220         }
221     }
222 
~CachingAllocatorTCachingAllocatorT223     ~CachingAllocatorT()
224     {
225         // Free all cached blocks
226         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
227         {
228             ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
229             while (pBlock)
230             {
231                 ArenaBlock* pNext = pBlock->pNext;
232                 this->DefaultAllocator::Free(pBlock);
233                 pBlock = pNext;
234             }
235             pBlock = m_oldCachedBlocks[i].pNext;
236             while (pBlock)
237             {
238                 ArenaBlock* pNext = pBlock->pNext;
239                 this->DefaultAllocator::Free(pBlock);
240                 pBlock = pNext;
241             }
242         }
243     }
244 
245 private:
GetBucketIdCachingAllocatorT246     static uint32_t GetBucketId(size_t blockSize)
247     {
248         uint32_t bucketId = 0;
249 
250 #if defined(BitScanReverseSizeT)
251         BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
252         bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
253 #endif
254 
255         return bucketId;
256     }
257 
258     template <bool OldBlockT = false>
InsertCachedBlockCachingAllocatorT259     void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
260     {
261         SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
262 
263         ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
264         ArenaBlock* pBlock = pPrevBlock->pNext;
265 
266         while (pBlock)
267         {
268             if (pNewBlock->blockSize >= pBlock->blockSize)
269             {
270                 // Insert here
271                 break;
272             }
273             pPrevBlock = pBlock;
274             pBlock = pBlock->pNext;
275         }
276 
277         // Insert into list
278         SWR_ASSUME_ASSERT(pPrevBlock);
279         pPrevBlock->pNext = pNewBlock;
280         pNewBlock->pNext = pBlock;
281 
282         if (OldBlockT)
283         {
284             if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
285             {
286                 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
287             }
288 
289             m_oldCachedSize += pNewBlock->blockSize;
290         }
291         else
292         {
293             if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
294             {
295                 m_pLastCachedBlocks[bucketId] = pNewBlock;
296             }
297 
298             m_cachedSize += pNewBlock->blockSize;
299         }
300     }
301 
SearchBlocksCachingAllocatorT302     static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
303     {
304         ArenaBlock* pBlock = pPrevBlock->pNext;
305         ArenaBlock* pPotentialBlock = nullptr;
306         ArenaBlock* pPotentialPrev = nullptr;
307 
308         while (pBlock)
309         {
310             if (pBlock->blockSize >= blockSize)
311             {
312                 if (pBlock == AlignUp(pBlock, align))
313                 {
314                     if (pBlock->blockSize == blockSize)
315                     {
316                         // Won't find a better match
317                         break;
318                     }
319 
320                     // We could use this as it is larger than we wanted, but
321                     // continue to search for a better match
322                     pPotentialBlock = pBlock;
323                     pPotentialPrev = pPrevBlock;
324                 }
325             }
326             else
327             {
328                 // Blocks are sorted by size (biggest first)
329                 // So, if we get here, there are no blocks
330                 // large enough, fall through to allocation.
331                 pBlock = nullptr;
332                 break;
333             }
334 
335             pPrevBlock = pBlock;
336             pBlock = pBlock->pNext;
337         }
338 
339         if (!pBlock)
340         {
341             // Couldn't find an exact match, use next biggest size
342             pBlock = pPotentialBlock;
343             pPrevBlock = pPotentialPrev;
344         }
345 
346         return pBlock;
347     }
348 
349     // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
350     static const uint32_t   CACHE_NUM_BUCKETS       = NumBucketsT;
351     static const uint32_t   CACHE_START_BUCKET_BIT  = StartBucketBitT;
352     static const size_t     MAX_UNUSED_SIZE         = sizeof(MEGABYTE);
353 
354     ArenaBlock              m_cachedBlocks[CACHE_NUM_BUCKETS];
355     ArenaBlock*             m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
356     ArenaBlock              m_oldCachedBlocks[CACHE_NUM_BUCKETS];
357     ArenaBlock*             m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
358     std::mutex              m_mutex;
359 
360     size_t                  m_totalAllocated = 0;
361 
362     size_t                  m_cachedSize = 0;
363     size_t                  m_oldCachedSize = 0;
364 };
365 typedef CachingAllocatorT<> CachingAllocator;
366 
367 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
368 class TArena
369 {
370 public:
TArena(T & in_allocator)371     TArena(T& in_allocator)  : m_allocator(in_allocator) {}
TArena()372     TArena()                 : m_allocator(m_defAllocator) {}
~TArena()373     ~TArena()
374     {
375         Reset(true);
376     }
377 
AllocAligned(size_t size,size_t align)378     void* AllocAligned(size_t size, size_t  align)
379     {
380         if (0 == size)
381         {
382             return nullptr;
383         }
384 
385         SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
386 
387         if (m_pCurBlock)
388         {
389             ArenaBlock* pCurBlock = m_pCurBlock;
390             size_t offset = AlignUp(m_offset, align);
391 
392             if ((offset + size) <= pCurBlock->blockSize)
393             {
394                 void* pMem = PtrAdd(pCurBlock, offset);
395                 m_offset = offset + size;
396                 return pMem;
397             }
398 
399             // Not enough memory in this block, fall through to allocate
400             // a new block
401         }
402 
403         static const size_t ArenaBlockSize = BlockSizeT;
404         size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
405 
406         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
407         blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
408 
409         ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
410         SWR_ASSERT(pNewBlock != nullptr);
411 
412         if (pNewBlock != nullptr)
413         {
414             m_offset = ARENA_BLOCK_ALIGN;
415             pNewBlock->pNext = m_pCurBlock;
416 
417             m_pCurBlock = pNewBlock;
418         }
419 
420         return AllocAligned(size, align);
421     }
422 
Alloc(size_t size)423     void* Alloc(size_t  size)
424     {
425         return AllocAligned(size, 1);
426     }
427 
AllocAlignedSync(size_t size,size_t align)428     void* AllocAlignedSync(size_t size, size_t align)
429     {
430         void* pAlloc = nullptr;
431 
432         m_mutex.lock();
433         pAlloc = AllocAligned(size, align);
434         m_mutex.unlock();
435 
436         return pAlloc;
437     }
438 
AllocSync(size_t size)439     void* AllocSync(size_t size)
440     {
441         void* pAlloc = nullptr;
442 
443         m_mutex.lock();
444         pAlloc = Alloc(size);
445         m_mutex.unlock();
446 
447         return pAlloc;
448     }
449 
450     void Reset(bool removeAll = false)
451     {
452         m_offset = ARENA_BLOCK_ALIGN;
453 
454         if (m_pCurBlock)
455         {
456             ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
457             m_pCurBlock->pNext = nullptr;
458             while (pUsedBlocks)
459             {
460                 ArenaBlock* pBlock = pUsedBlocks;
461                 pUsedBlocks = pBlock->pNext;
462 
463                 m_allocator.Free(pBlock);
464             }
465 
466             if (removeAll)
467             {
468                 m_allocator.Free(m_pCurBlock);
469                 m_pCurBlock = nullptr;
470             }
471         }
472     }
473 
IsEmpty()474     bool IsEmpty()
475     {
476         return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
477     }
478 
479 private:
480 
481     ArenaBlock*         m_pCurBlock = nullptr;
482     size_t              m_offset    = ARENA_BLOCK_ALIGN;
483 
484     /// @note Mutex is only used by sync allocation functions.
485     std::mutex          m_mutex;
486 
487     DefaultAllocator    m_defAllocator;
488     T&                  m_allocator;
489 };
490 
491 using StdArena      = TArena<DefaultAllocator>;
492 using CachingArena  = TArena<CachingAllocator>;
493