1 /*-------------------------------------------------------------------------
2  * drawElements C++ Base Library
3  * -----------------------------
4  *
5  * Copyright 2015 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Cross-thread barrier.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "deSpinBarrier.hpp"
25 #include "deThread.hpp"
26 #include "deRandom.hpp"
27 #include "deInt32.h"
28 
29 #include <vector>
30 
31 namespace de
32 {
33 
SpinBarrier(deInt32 numThreads)34 SpinBarrier::SpinBarrier (deInt32 numThreads)
35 	: m_numCores	(deGetNumAvailableLogicalCores())
36 	, m_numThreads	(numThreads)
37 	, m_numEntered	(0)
38 	, m_numLeaving	(0)
39 	, m_numRemoved	(0)
40 {
41 	DE_ASSERT(numThreads > 0);
42 }
43 
~SpinBarrier(void)44 SpinBarrier::~SpinBarrier (void)
45 {
46 	DE_ASSERT(m_numEntered == 0 && m_numLeaving == 0);
47 }
48 
reset(deUint32 numThreads)49 void SpinBarrier::reset (deUint32 numThreads)
50 {
51 	// If last threads were removed, m_numEntered > 0 && m_numRemoved > 0
52 	DE_ASSERT(m_numLeaving == 0);
53 	DE_ASSERT(numThreads > 0);
54 	m_numThreads = numThreads;
55 	m_numEntered = 0;
56 	m_numLeaving = 0;
57 	m_numRemoved = 0;
58 }
59 
getWaitMode(SpinBarrier::WaitMode requested,deUint32 numCores,deInt32 numThreads)60 inline SpinBarrier::WaitMode getWaitMode (SpinBarrier::WaitMode requested, deUint32 numCores, deInt32 numThreads)
61 {
62 	if (requested == SpinBarrier::WAIT_MODE_AUTO)
63 		return ((deUint32)numThreads <= numCores) ? SpinBarrier::WAIT_MODE_BUSY : SpinBarrier::WAIT_MODE_YIELD;
64 	else
65 		return requested;
66 }
67 
wait(SpinBarrier::WaitMode mode)68 inline void wait (SpinBarrier::WaitMode mode)
69 {
70 	DE_ASSERT(mode == SpinBarrier::WAIT_MODE_YIELD || mode == SpinBarrier::WAIT_MODE_BUSY);
71 
72 	if (mode == SpinBarrier::WAIT_MODE_YIELD)
73 		deYield();
74 }
75 
sync(WaitMode requestedMode)76 void SpinBarrier::sync (WaitMode requestedMode)
77 {
78 	const deInt32	cachedNumThreads	= m_numThreads;
79 	const WaitMode	waitMode			= getWaitMode(requestedMode, m_numCores, cachedNumThreads);
80 
81 	deMemoryReadWriteFence();
82 
83 	// m_numEntered must not be touched until all threads have had
84 	// a chance to observe it being 0.
85 	if (m_numLeaving > 0)
86 	{
87 		for (;;)
88 		{
89 			if (m_numLeaving == 0)
90 				break;
91 
92 			wait(waitMode);
93 		}
94 	}
95 
96 	// If m_numRemoved > 0, m_numThreads will decrease. If m_numThreads is decreased
97 	// just after atomicOp and before comparison, the branch could be taken by multiple
98 	// threads. Since m_numThreads only changes if all threads are inside the spinbarrier,
99 	// cached value at snapshotted at the beginning of the function will be equal for
100 	// all threads.
101 	if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
102 	{
103 		// Release all waiting threads. Since this thread has not been removed, m_numLeaving will
104 		// be >= 1 until m_numLeaving is decremented at the end of this function.
105 		m_numThreads -= m_numRemoved;
106 		m_numLeaving  = m_numThreads;
107 		m_numRemoved  = 0;
108 
109 		deMemoryReadWriteFence();
110 		m_numEntered  = 0;
111 	}
112 	else
113 	{
114 		for (;;)
115 		{
116 			if (m_numEntered == 0)
117 				break;
118 
119 			wait(waitMode);
120 		}
121 	}
122 
123 	deAtomicDecrement32(&m_numLeaving);
124 	deMemoryReadWriteFence();
125 }
126 
removeThread(WaitMode requestedMode)127 void SpinBarrier::removeThread (WaitMode requestedMode)
128 {
129 	const deInt32	cachedNumThreads	= m_numThreads;
130 	const WaitMode	waitMode			= getWaitMode(requestedMode, m_numCores, cachedNumThreads);
131 
132 	// Wait for other threads exiting previous barrier
133 	if (m_numLeaving > 0)
134 	{
135 		for (;;)
136 		{
137 			if (m_numLeaving == 0)
138 				break;
139 
140 			wait(waitMode);
141 		}
142 	}
143 
144 	// Ask for last thread entering barrier to adjust thread count
145 	deAtomicIncrement32(&m_numRemoved);
146 
147 	// See sync() - use cached value
148 	if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
149 	{
150 		// Release all waiting threads.
151 		m_numThreads -= m_numRemoved;
152 		m_numLeaving  = m_numThreads;
153 		m_numRemoved  = 0;
154 
155 		deMemoryReadWriteFence();
156 		m_numEntered  = 0;
157 	}
158 }
159 
160 namespace
161 {
162 
singleThreadTest(SpinBarrier::WaitMode mode)163 void singleThreadTest (SpinBarrier::WaitMode mode)
164 {
165 	SpinBarrier barrier(1);
166 
167 	barrier.sync(mode);
168 	barrier.sync(mode);
169 	barrier.sync(mode);
170 }
171 
172 class TestThread : public de::Thread
173 {
174 public:
TestThread(SpinBarrier & barrier,volatile deInt32 * sharedVar,int numThreads,int threadNdx)175 	TestThread (SpinBarrier& barrier, volatile deInt32* sharedVar, int numThreads, int threadNdx)
176 		: m_barrier		(barrier)
177 		, m_sharedVar	(sharedVar)
178 		, m_numThreads	(numThreads)
179 		, m_threadNdx	(threadNdx)
180 		, m_busyOk		((deUint32)m_numThreads <= deGetNumAvailableLogicalCores())
181 	{
182 	}
183 
run(void)184 	void run (void)
185 	{
186 		const int	numIters	= 10000;
187 		de::Random	rnd			(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx));
188 
189 		for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
190 		{
191 			// Phase 1: count up
192 			deAtomicIncrement32(m_sharedVar);
193 
194 			// Verify
195 			m_barrier.sync(getWaitMode(rnd));
196 
197 			DE_TEST_ASSERT(*m_sharedVar == m_numThreads);
198 
199 			m_barrier.sync(getWaitMode(rnd));
200 
201 			// Phase 2: count down
202 			deAtomicDecrement32(m_sharedVar);
203 
204 			// Verify
205 			m_barrier.sync(getWaitMode(rnd));
206 
207 			DE_TEST_ASSERT(*m_sharedVar == 0);
208 
209 			m_barrier.sync(getWaitMode(rnd));
210 		}
211 	}
212 
213 private:
214 	SpinBarrier&			m_barrier;
215 	volatile deInt32* const	m_sharedVar;
216 	const int				m_numThreads;
217 	const int				m_threadNdx;
218 	const bool				m_busyOk;
219 
getWaitMode(de::Random & rnd)220 	SpinBarrier::WaitMode getWaitMode (de::Random& rnd)
221 	{
222 		static const SpinBarrier::WaitMode	s_allModes[]	=
223 		{
224 			SpinBarrier::WAIT_MODE_YIELD,
225 			SpinBarrier::WAIT_MODE_AUTO,
226 			SpinBarrier::WAIT_MODE_BUSY,
227 		};
228 		const int							numModes		= DE_LENGTH_OF_ARRAY(s_allModes) - (m_busyOk ? 0 : 1);
229 
230 		return rnd.choose<SpinBarrier::WaitMode>(DE_ARRAY_BEGIN(s_allModes), DE_ARRAY_BEGIN(s_allModes) + numModes);
231 	}
232 };
233 
multiThreadTest(int numThreads)234 void multiThreadTest (int numThreads)
235 {
236 	SpinBarrier					barrier		(numThreads);
237 	volatile deInt32			sharedVar	= 0;
238 	std::vector<TestThread*>	threads		(numThreads, static_cast<TestThread*>(DE_NULL));
239 
240 	for (int ndx = 0; ndx < numThreads; ndx++)
241 	{
242 		threads[ndx] = new TestThread(barrier, &sharedVar, numThreads, ndx);
243 		DE_TEST_ASSERT(threads[ndx]);
244 		threads[ndx]->start();
245 	}
246 
247 	for (int ndx = 0; ndx < numThreads; ndx++)
248 	{
249 		threads[ndx]->join();
250 		delete threads[ndx];
251 	}
252 
253 	DE_TEST_ASSERT(sharedVar == 0);
254 }
255 
singleThreadRemoveTest(SpinBarrier::WaitMode mode)256 void singleThreadRemoveTest (SpinBarrier::WaitMode mode)
257 {
258 	SpinBarrier barrier(3);
259 
260 	barrier.removeThread(mode);
261 	barrier.removeThread(mode);
262 	barrier.sync(mode);
263 	barrier.removeThread(mode);
264 
265 	barrier.reset(1);
266 	barrier.sync(mode);
267 
268 	barrier.reset(2);
269 	barrier.removeThread(mode);
270 	barrier.sync(mode);
271 }
272 
273 class TestExitThread : public de::Thread
274 {
275 public:
TestExitThread(SpinBarrier & barrier,int numThreads,int threadNdx,SpinBarrier::WaitMode waitMode)276 	TestExitThread (SpinBarrier& barrier, int numThreads, int threadNdx, SpinBarrier::WaitMode waitMode)
277 		: m_barrier		(barrier)
278 		, m_numThreads	(numThreads)
279 		, m_threadNdx	(threadNdx)
280 		, m_waitMode	(waitMode)
281 	{
282 	}
283 
run(void)284 	void run (void)
285 	{
286 		const int	numIters	= 10000;
287 		de::Random	rnd			(deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx) ^ deInt32Hash((deInt32)m_waitMode));
288 		const int	invExitProb	= 1000;
289 
290 		for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
291 		{
292 			if (rnd.getInt(0, invExitProb) == 0)
293 			{
294 				m_barrier.removeThread(m_waitMode);
295 				break;
296 			}
297 			else
298 				m_barrier.sync(m_waitMode);
299 		}
300 	}
301 
302 private:
303 	SpinBarrier&				m_barrier;
304 	const int					m_numThreads;
305 	const int					m_threadNdx;
306 	const SpinBarrier::WaitMode	m_waitMode;
307 };
308 
multiThreadRemoveTest(int numThreads,SpinBarrier::WaitMode waitMode)309 void multiThreadRemoveTest (int numThreads, SpinBarrier::WaitMode waitMode)
310 {
311 	SpinBarrier						barrier		(numThreads);
312 	std::vector<TestExitThread*>	threads		(numThreads, static_cast<TestExitThread*>(DE_NULL));
313 
314 	for (int ndx = 0; ndx < numThreads; ndx++)
315 	{
316 		threads[ndx] = new TestExitThread(barrier, numThreads, ndx, waitMode);
317 		DE_TEST_ASSERT(threads[ndx]);
318 		threads[ndx]->start();
319 	}
320 
321 	for (int ndx = 0; ndx < numThreads; ndx++)
322 	{
323 		threads[ndx]->join();
324 		delete threads[ndx];
325 	}
326 }
327 
328 } // anonymous
329 
SpinBarrier_selfTest(void)330 void SpinBarrier_selfTest (void)
331 {
332 	singleThreadTest(SpinBarrier::WAIT_MODE_YIELD);
333 	singleThreadTest(SpinBarrier::WAIT_MODE_BUSY);
334 	singleThreadTest(SpinBarrier::WAIT_MODE_AUTO);
335 	multiThreadTest(1);
336 	multiThreadTest(2);
337 	multiThreadTest(4);
338 	multiThreadTest(8);
339 	multiThreadTest(16);
340 
341 	singleThreadRemoveTest(SpinBarrier::WAIT_MODE_YIELD);
342 	singleThreadRemoveTest(SpinBarrier::WAIT_MODE_BUSY);
343 	singleThreadRemoveTest(SpinBarrier::WAIT_MODE_AUTO);
344 	multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_BUSY);
345 	multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_AUTO);
346 	multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_AUTO);
347 	multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_AUTO);
348 	multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_AUTO);
349 	multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_YIELD);
350 	multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_YIELD);
351 	multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_YIELD);
352 	multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_YIELD);
353 	multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_YIELD);
354 }
355 
356 } // de
357