1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file fifo.hpp
24 *
25 * @brief Definitions for our fifos used for thread communication.
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 
31 #include "common/os.h"
32 #include "arena.h"
33 
34 #include <vector>
35 #include <cassert>
36 
37 template<class T>
38 struct QUEUE
39 {
OSALIGNLINEQUEUE40     OSALIGNLINE(volatile uint32_t) mLock{ 0 };
OSALIGNLINEQUEUE41     OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
42     std::vector<T*> mBlocks;
43     T* mCurBlock{ nullptr };
44     uint32_t mHead{ 0 };
45     uint32_t mTail{ 0 };
46     uint32_t mCurBlockIdx{ 0 };
47 
48     // power of 2
49     static const uint32_t mBlockSizeShift = 6;
50     static const uint32_t mBlockSize = 1 << mBlockSizeShift;
51 
52     template <typename ArenaT>
clearQUEUE53     void clear(ArenaT& arena)
54     {
55         mHead = 0;
56         mTail = 0;
57         mBlocks.clear();
58         T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
59         mBlocks.push_back(pNewBlock);
60         mCurBlock = pNewBlock;
61         mCurBlockIdx = 0;
62         mNumEntries = 0;
63         mLock = 0;
64     }
65 
getNumQueuedQUEUE66     uint32_t getNumQueued()
67     {
68         return mNumEntries;
69     }
70 
tryLockQUEUE71     bool tryLock()
72     {
73         if (mLock)
74         {
75             return false;
76         }
77 
78         // try to lock the FIFO
79         long initial = InterlockedCompareExchange(&mLock, 1, 0);
80         return (initial == 0);
81     }
82 
unlockQUEUE83     void unlock()
84     {
85         mLock = 0;
86     }
87 
peekQUEUE88     T* peek()
89     {
90         if (mNumEntries == 0)
91         {
92             return nullptr;
93         }
94         uint32_t block = mHead >> mBlockSizeShift;
95         return &mBlocks[block][mHead & (mBlockSize-1)];
96     }
97 
dequeue_noincQUEUE98     void dequeue_noinc()
99     {
100         mHead ++;
101         mNumEntries --;
102     }
103 
104     template <typename ArenaT>
enqueue_try_nosyncQUEUE105     bool enqueue_try_nosync(ArenaT& arena, const T* entry)
106     {
107         const float* pSrc = (const float*)entry;
108         float* pDst = (float*)&mCurBlock[mTail];
109 
110         auto lambda = [&](int32_t i)
111         {
112             __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
113             _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
114         };
115 
116         const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
117         static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
118             "FIFO element size should be multiple of SIMD width.");
119 
120         UnrollerL<0, numSimdLines, 1>::step(lambda);
121 
122         mTail ++;
123         if (mTail == mBlockSize)
124         {
125             if (++mCurBlockIdx < mBlocks.size())
126             {
127                 mCurBlock = mBlocks[mCurBlockIdx];
128             }
129             else
130             {
131                 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
132                 SWR_ASSERT(newBlock);
133 
134                 mBlocks.push_back(newBlock);
135                 mCurBlock = newBlock;
136             }
137 
138             mTail = 0;
139         }
140 
141         mNumEntries ++;
142         return true;
143     }
144 
destroyQUEUE145     void destroy()
146     {
147     }
148 
149 };
150