1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Buffer data upload performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pBufferDataUploadTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuVectorUtil.hpp"
28 #include "tcuSurface.hpp"
29 #include "tcuCPUWarmup.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "gluRenderContext.hpp"
32 #include "gluShaderProgram.hpp"
33 #include "gluStrUtil.hpp"
34 #include "gluPixelTransfer.hpp"
35 #include "gluObjectWrapper.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 #include "deClock.h"
39 #include "deMath.h"
40 #include "deStringUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deMemory.h"
43 #include "deThread.h"
44 #include "deMeta.hpp"
45 
46 #include <algorithm>
47 #include <iomanip>
48 #include <limits>
49 
50 namespace deqp
51 {
52 namespace gles3
53 {
54 namespace Performance
55 {
56 namespace
57 {
58 
59 using gls::theilSenSiegelLinearRegression;
60 using gls::LineParametersWithConfidence;
61 using de::meta::EnableIf;
62 using de::meta::Not;
63 
64 static const char* const s_dummyVertexShader =		"#version 300 es\n"
65 													"in highp vec4 a_position;\n"
66 													"void main (void)\n"
67 													"{\n"
68 													"	gl_Position = a_position;\n"
69 													"}\n";
70 
71 static const char* const s_dummyFragnentShader =	"#version 300 es\n"
72 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
73 													"void main (void)\n"
74 													"{\n"
75 													"	dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
76 													"}\n";
77 
78 static const char* const s_colorVertexShader =		"#version 300 es\n"
79 													"in highp vec4 a_position;\n"
80 													"in highp vec4 a_color;\n"
81 													"out highp vec4 v_color;\n"
82 													"void main (void)\n"
83 													"{\n"
84 													"	gl_Position = a_position;\n"
85 													"	v_color = a_color;\n"
86 													"}\n";
87 
88 static const char* const s_colorFragmentShader =	"#version 300 es\n"
89 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
90 													"in mediump vec4 v_color;\n"
91 													"void main (void)\n"
92 													"{\n"
93 													"	dEQP_FragColor = v_color;\n"
94 													"}\n";
95 
96 struct SingleOperationDuration
97 {
98 	deUint64 totalDuration;
99 	deUint64 fitResponseDuration; // used for fitting
100 };
101 
102 struct MapBufferRangeDuration
103 {
104 	deUint64 mapDuration;
105 	deUint64 unmapDuration;
106 	deUint64 writeDuration;
107 	deUint64 allocDuration;
108 	deUint64 totalDuration;
109 
110 	deUint64 fitResponseDuration;
111 };
112 
113 struct MapBufferRangeDurationNoAlloc
114 {
115 	deUint64 mapDuration;
116 	deUint64 unmapDuration;
117 	deUint64 writeDuration;
118 	deUint64 totalDuration;
119 
120 	deUint64 fitResponseDuration;
121 };
122 
123 struct MapBufferRangeFlushDuration
124 {
125 	deUint64 mapDuration;
126 	deUint64 unmapDuration;
127 	deUint64 writeDuration;
128 	deUint64 flushDuration;
129 	deUint64 allocDuration;
130 	deUint64 totalDuration;
131 
132 	deUint64 fitResponseDuration;
133 };
134 
135 struct MapBufferRangeFlushDurationNoAlloc
136 {
137 	deUint64 mapDuration;
138 	deUint64 unmapDuration;
139 	deUint64 writeDuration;
140 	deUint64 flushDuration;
141 	deUint64 totalDuration;
142 
143 	deUint64 fitResponseDuration;
144 };
145 
146 struct RenderReadDuration
147 {
148 	deUint64 renderDuration;
149 	deUint64 readDuration;
150 	deUint64 renderReadDuration;
151 	deUint64 totalDuration;
152 
153 	deUint64 fitResponseDuration;
154 };
155 
156 struct UnrelatedUploadRenderReadDuration
157 {
158 	deUint64 renderDuration;
159 	deUint64 readDuration;
160 	deUint64 renderReadDuration;
161 	deUint64 totalDuration;
162 
163 	deUint64 fitResponseDuration;
164 };
165 
166 struct UploadRenderReadDuration
167 {
168 	deUint64 uploadDuration;
169 	deUint64 renderDuration;
170 	deUint64 readDuration;
171 	deUint64 totalDuration;
172 	deUint64 renderReadDuration;
173 
174 	deUint64 fitResponseDuration;
175 };
176 
177 struct UploadRenderReadDurationWithUnrelatedUploadSize
178 {
179 	deUint64 uploadDuration;
180 	deUint64 renderDuration;
181 	deUint64 readDuration;
182 	deUint64 totalDuration;
183 	deUint64 renderReadDuration;
184 
185 	deUint64 fitResponseDuration;
186 };
187 
188 struct RenderUploadRenderReadDuration
189 {
190 	deUint64 firstRenderDuration;
191 	deUint64 uploadDuration;
192 	deUint64 secondRenderDuration;
193 	deUint64 readDuration;
194 	deUint64 totalDuration;
195 	deUint64 renderReadDuration;
196 
197 	deUint64 fitResponseDuration;
198 };
199 
200 template <typename SampleT>
201 struct UploadSampleResult
202 {
203 	typedef SampleT SampleType;
204 
205 	int			bufferSize;
206 	int			allocatedSize;
207 	int			writtenSize;
208 	SampleType	duration;
209 };
210 
211 template <typename SampleT>
212 struct RenderSampleResult
213 {
214 	typedef SampleT SampleType;
215 
216 	int			uploadedDataSize;
217 	int			renderDataSize;
218 	int			unrelatedDataSize;
219 	int			numVertices;
220 	SampleT		duration;
221 };
222 
223 struct SingleOperationStatistics
224 {
225 	float minTime;
226 	float maxTime;
227 	float medianTime;
228 	float min2DecileTime;		// !< minimum value in the 2nd decile
229 	float max9DecileTime;		// !< maximum value in the 9th decile
230 };
231 
232 struct SingleCallStatistics
233 {
234 	SingleOperationStatistics	result;
235 
236 	float						medianRate;
237 	float						maxDiffTime;
238 	float						maxDiff9DecileTime;
239 	float						medianDiffTime;
240 
241 	float						maxRelDiffTime;
242 	float						max9DecileRelDiffTime;
243 	float						medianRelDiffTime;
244 };
245 
246 struct MapCallStatistics
247 {
248 	SingleOperationStatistics	map;
249 	SingleOperationStatistics	unmap;
250 	SingleOperationStatistics	write;
251 	SingleOperationStatistics	alloc;
252 	SingleOperationStatistics	result;
253 
254 	float						medianRate;
255 	float						maxDiffTime;
256 	float						maxDiff9DecileTime;
257 	float						medianDiffTime;
258 
259 	float						maxRelDiffTime;
260 	float						max9DecileRelDiffTime;
261 	float						medianRelDiffTime;
262 };
263 
264 struct MapFlushCallStatistics
265 {
266 	SingleOperationStatistics	map;
267 	SingleOperationStatistics	unmap;
268 	SingleOperationStatistics	write;
269 	SingleOperationStatistics	flush;
270 	SingleOperationStatistics	alloc;
271 	SingleOperationStatistics	result;
272 
273 	float						medianRate;
274 	float						maxDiffTime;
275 	float						maxDiff9DecileTime;
276 	float						medianDiffTime;
277 
278 	float						maxRelDiffTime;
279 	float						max9DecileRelDiffTime;
280 	float						medianRelDiffTime;
281 };
282 
283 struct RenderReadStatistics
284 {
285 	SingleOperationStatistics	render;
286 	SingleOperationStatistics	read;
287 	SingleOperationStatistics	result;
288 	SingleOperationStatistics	total;
289 
290 	float						medianRate;
291 	float						maxDiffTime;
292 	float						maxDiff9DecileTime;
293 	float						medianDiffTime;
294 
295 	float						maxRelDiffTime;
296 	float						max9DecileRelDiffTime;
297 	float						medianRelDiffTime;
298 };
299 
300 struct UploadRenderReadStatistics
301 {
302 	SingleOperationStatistics	upload;
303 	SingleOperationStatistics	render;
304 	SingleOperationStatistics	read;
305 	SingleOperationStatistics	result;
306 	SingleOperationStatistics	total;
307 
308 	float						medianRate;
309 	float						maxDiffTime;
310 	float						maxDiff9DecileTime;
311 	float						medianDiffTime;
312 
313 	float						maxRelDiffTime;
314 	float						max9DecileRelDiffTime;
315 	float						medianRelDiffTime;
316 };
317 
318 struct RenderUploadRenderReadStatistics
319 {
320 	SingleOperationStatistics	firstRender;
321 	SingleOperationStatistics	upload;
322 	SingleOperationStatistics	secondRender;
323 	SingleOperationStatistics	read;
324 	SingleOperationStatistics	result;
325 	SingleOperationStatistics	total;
326 
327 	float						medianRate;
328 	float						maxDiffTime;
329 	float						maxDiff9DecileTime;
330 	float						medianDiffTime;
331 
332 	float						maxRelDiffTime;
333 	float						max9DecileRelDiffTime;
334 	float						medianRelDiffTime;
335 };
336 
337 template <typename T>
338 struct SampleTypeTraits
339 {
340 };
341 
342 template <>
343 struct SampleTypeTraits<SingleOperationDuration>
344 {
345 	typedef SingleCallStatistics StatsType;
346 
347 	enum { HAS_MAP_STATS		= 0	};
348 	enum { HAS_UNMAP_STATS		= 0	};
349 	enum { HAS_WRITE_STATS		= 0	};
350 	enum { HAS_FLUSH_STATS		= 0	};
351 	enum { HAS_ALLOC_STATS		= 0	};
352 	enum { LOG_CONTRIBUTIONS	= 0	};
353 };
354 
355 template <>
356 struct SampleTypeTraits<MapBufferRangeDuration>
357 {
358 	typedef MapCallStatistics StatsType;
359 
360 	enum { HAS_MAP_STATS		= 1	};
361 	enum { HAS_UNMAP_STATS		= 1	};
362 	enum { HAS_WRITE_STATS		= 1	};
363 	enum { HAS_FLUSH_STATS		= 0	};
364 	enum { HAS_ALLOC_STATS		= 1	};
365 	enum { LOG_CONTRIBUTIONS	= 1	};
366 };
367 
368 template <>
369 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
370 {
371 	typedef MapCallStatistics StatsType;
372 
373 	enum { HAS_MAP_STATS		= 1	};
374 	enum { HAS_UNMAP_STATS		= 1	};
375 	enum { HAS_WRITE_STATS		= 1	};
376 	enum { HAS_FLUSH_STATS		= 0	};
377 	enum { HAS_ALLOC_STATS		= 0	};
378 	enum { LOG_CONTRIBUTIONS	= 1	};
379 };
380 
381 template <>
382 struct SampleTypeTraits<MapBufferRangeFlushDuration>
383 {
384 	typedef MapFlushCallStatistics StatsType;
385 
386 	enum { HAS_MAP_STATS		= 1	};
387 	enum { HAS_UNMAP_STATS		= 1	};
388 	enum { HAS_WRITE_STATS		= 1	};
389 	enum { HAS_FLUSH_STATS		= 1	};
390 	enum { HAS_ALLOC_STATS		= 1	};
391 	enum { LOG_CONTRIBUTIONS	= 1	};
392 };
393 
394 template <>
395 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
396 {
397 	typedef MapFlushCallStatistics StatsType;
398 
399 	enum { HAS_MAP_STATS		= 1	};
400 	enum { HAS_UNMAP_STATS		= 1	};
401 	enum { HAS_WRITE_STATS		= 1	};
402 	enum { HAS_FLUSH_STATS		= 1	};
403 	enum { HAS_ALLOC_STATS		= 0	};
404 	enum { LOG_CONTRIBUTIONS	= 1	};
405 };
406 
407 template <>
408 struct SampleTypeTraits<RenderReadDuration>
409 {
410 	typedef RenderReadStatistics StatsType;
411 
412 	enum { HAS_RENDER_STATS			= 1	};
413 	enum { HAS_READ_STATS			= 1	};
414 	enum { HAS_UPLOAD_STATS			= 0	};
415 	enum { HAS_TOTAL_STATS			= 1	};
416 	enum { HAS_FIRST_RENDER_STATS	= 0	};
417 	enum { HAS_SECOND_RENDER_STATS	= 0	};
418 
419 	enum { LOG_CONTRIBUTIONS	= 1	};
420 };
421 
422 template <>
423 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
424 {
425 	typedef RenderReadStatistics StatsType;
426 
427 	enum { HAS_RENDER_STATS			= 1	};
428 	enum { HAS_READ_STATS			= 1	};
429 	enum { HAS_UPLOAD_STATS			= 0	};
430 	enum { HAS_TOTAL_STATS			= 1	};
431 	enum { HAS_FIRST_RENDER_STATS	= 0	};
432 	enum { HAS_SECOND_RENDER_STATS	= 0	};
433 
434 	enum { LOG_CONTRIBUTIONS	= 1	};
435 };
436 
437 template <>
438 struct SampleTypeTraits<UploadRenderReadDuration>
439 {
440 	typedef UploadRenderReadStatistics StatsType;
441 
442 	enum { HAS_RENDER_STATS			= 1	};
443 	enum { HAS_READ_STATS			= 1	};
444 	enum { HAS_UPLOAD_STATS			= 1	};
445 	enum { HAS_TOTAL_STATS			= 1	};
446 	enum { HAS_FIRST_RENDER_STATS	= 0	};
447 	enum { HAS_SECOND_RENDER_STATS	= 0	};
448 
449 	enum { LOG_CONTRIBUTIONS			= 1	};
450 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 0 };
451 };
452 
453 template <>
454 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
455 {
456 	typedef UploadRenderReadStatistics StatsType;
457 
458 	enum { HAS_RENDER_STATS			= 1	};
459 	enum { HAS_READ_STATS			= 1	};
460 	enum { HAS_UPLOAD_STATS			= 1	};
461 	enum { HAS_TOTAL_STATS			= 1	};
462 	enum { HAS_FIRST_RENDER_STATS	= 0	};
463 	enum { HAS_SECOND_RENDER_STATS	= 0	};
464 
465 	enum { LOG_CONTRIBUTIONS			= 1	};
466 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
467 };
468 
469 template <>
470 struct SampleTypeTraits<RenderUploadRenderReadDuration>
471 {
472 	typedef RenderUploadRenderReadStatistics StatsType;
473 
474 	enum { HAS_RENDER_STATS			= 0	};
475 	enum { HAS_READ_STATS			= 1	};
476 	enum { HAS_UPLOAD_STATS			= 1	};
477 	enum { HAS_TOTAL_STATS			= 1	};
478 	enum { HAS_FIRST_RENDER_STATS	= 1	};
479 	enum { HAS_SECOND_RENDER_STATS	= 1	};
480 
481 	enum { LOG_CONTRIBUTIONS			= 1	};
482 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
483 };
484 
485 struct UploadSampleAnalyzeResult
486 {
487 	float transferRateMedian;
488 	float transferRateAtRange;
489 	float transferRateAtInfinity;
490 };
491 
492 struct RenderSampleAnalyzeResult
493 {
494 	float renderRateMedian;
495 	float renderRateAtRange;
496 	float renderRateAtInfinity;
497 };
498 
499 class UnmapFailureError : public std::exception
500 {
501 public:
UnmapFailureError(void)502 	UnmapFailureError (void) : std::exception() {}
503 };
504 
getHumanReadableByteSize(int numBytes)505 static std::string getHumanReadableByteSize (int numBytes)
506 {
507 	std::ostringstream buf;
508 
509 	if (numBytes < 1024)
510 		buf << numBytes << " byte(s)";
511 	else if (numBytes < 1024 * 1024)
512 		buf << de::floatToString(numBytes/1024.0f, 1) << " KiB";
513 	else
514 		buf << de::floatToString(numBytes/1024.0f/1024.0f, 1) << " MiB";
515 
516 	return buf.str();
517 }
518 
medianTimeMemcpy(void * dst,const void * src,int numBytes)519 static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
520 {
521 	// Time used by memcpy is assumed to be asymptotically linear
522 
523 	// With large numBytes, the probability of context switch or other random
524 	// event is high. Apply memcpy in parts and report how much time would
525 	// memcpy have used with the median transfer rate.
526 
527 	// Less than 1MiB, no need to do anything special
528 	if (numBytes < 1048576)
529 	{
530 		deUint64 startTime;
531 		deUint64 endTime;
532 
533 		deYield();
534 
535 		startTime = deGetMicroseconds();
536 		deMemcpy(dst, src, numBytes);
537 		endTime = deGetMicroseconds();
538 
539 		return endTime - startTime;
540 	}
541 	else
542 	{
543 		// Do memcpy in multiple parts
544 
545 		const int	numSections		= 5;
546 		const int	sectionAlign	= 16;
547 
548 		int			sectionStarts[numSections+1];
549 		int			sectionLens[numSections];
550 		deUint64	sectionTimes[numSections];
551 		deUint64	medianTime;
552 		deUint64	bestTime		= 0;
553 
554 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
555 			sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
556 		sectionStarts[numSections] = numBytes;
557 
558 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
559 			sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];
560 
561 		// Memcpy is usually called after mapbuffer range which may take
562 		// a lot of time. To prevent power management from kicking in during
563 		// copy, warm up more.
564 		{
565 			deYield();
566 			tcu::warmupCPU();
567 			deYield();
568 		}
569 
570 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
571 		{
572 			deUint64 startTime;
573 			deUint64 endTime;
574 
575 			startTime = deGetMicroseconds();
576 			deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
577 			endTime = deGetMicroseconds();
578 
579 			sectionTimes[sectionNdx] = endTime - startTime;
580 
581 			if (!bestTime || sectionTimes[sectionNdx] < bestTime)
582 				bestTime = sectionTimes[sectionNdx];
583 
584 			// Detect if write takes 50% longer than it should, and warm up if that happened
585 			if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * bestTime)
586 			{
587 				deYield();
588 				tcu::warmupCPU();
589 				deYield();
590 			}
591 		}
592 
593 		std::sort(sectionTimes, sectionTimes + numSections);
594 
595 		if ((numSections % 2) == 0)
596 			medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
597 		else
598 			medianTime = sectionTimes[numSections / 2];
599 
600 		return medianTime*numSections;
601 	}
602 }
603 
dummyCalculation(float initial,int workSize)604 static float dummyCalculation (float initial, int workSize)
605 {
606 	float	a = initial;
607 	int		b = 123;
608 
609 	for (int ndx = 0; ndx < workSize; ++ndx)
610 	{
611 		a = deFloatCos(a + (float)b);
612 		b = (b + 63) % 107 + de::abs((int)(a*10.0f));
613 	}
614 
615 	return a + (float)b;
616 }
617 
busyWait(int microseconds)618 static void busyWait (int microseconds)
619 {
620 	const deUint64	maxSingleWaitTime	= 1000; // 1ms
621 	const deUint64	endTime				= deGetMicroseconds() + microseconds;
622 	float			dummy				= *tcu::warmupCPUInternal::g_dummy.m_v;
623 	int				workSize			= 500;
624 
625 	// exponentially increase work, cap to 1ms
626 	while (deGetMicroseconds() < endTime)
627 	{
628 		const deUint64	startTime		= deGetMicroseconds();
629 		deUint64		totalTime;
630 
631 		dummy = dummyCalculation(dummy, workSize);
632 
633 		totalTime = deGetMicroseconds() - startTime;
634 
635 		if (totalTime >= maxSingleWaitTime)
636 			break;
637 		else
638 			workSize *= 2;
639 	}
640 
641 	// "wait"
642 	while (deGetMicroseconds() < endTime)
643 		dummy = dummyCalculation(dummy, workSize);
644 
645 	*tcu::warmupCPUInternal::g_dummy.m_v = dummy;
646 }
647 
648 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
649 template <typename T>
linearSample(const std::vector<T> & values,float position)650 static float linearSample (const std::vector<T>& values, float position)
651 {
652 	DE_ASSERT(position >= 0.0f);
653 	DE_ASSERT(position <= 1.0f);
654 
655 	const float	floatNdx			= ((int)values.size() - 1) * position;
656 	const int	lowerNdx			= (int)deFloatFloor(floatNdx);
657 	const int	higherNdx			= lowerNdx + 1;
658 	const float	interpolationFactor = floatNdx - (float)lowerNdx;
659 
660 	DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
661 	DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
662 	DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
663 
664 	return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
665 }
666 
667 template <typename T>
calculateSingleOperationStatistics(const std::vector<T> & samples,deUint64 T::SampleType::* target)668 SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
669 {
670 	SingleOperationStatistics	stats;
671 	std::vector<deUint64>		values(samples.size());
672 
673 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
674 		values[ndx] = samples[ndx].duration.*target;
675 
676 	std::sort(values.begin(), values.end());
677 
678 	stats.minTime			= (float)values.front();
679 	stats.maxTime			= (float)values.back();
680 	stats.medianTime		= linearSample(values, 0.5f);
681 	stats.min2DecileTime	= linearSample(values, 0.1f);
682 	stats.max9DecileTime	= linearSample(values, 0.9f);
683 
684 	return stats;
685 }
686 
687 template <typename StatisticsType, typename SampleType>
calculateBasicStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples,int SampleType::* predictor)688 void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
689 {
690 	std::vector<deUint64> values(samples.size());
691 
692 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
693 		values[ndx] = samples[ndx].duration.fitResponseDuration;
694 
695 	// median rate
696 	{
697 		std::vector<float> processingRates(samples.size());
698 
699 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
700 		{
701 			const float timeInSeconds = values[ndx] / 1000.0f / 1000.0f;
702 			processingRates[ndx] = samples[ndx].*predictor / timeInSeconds;
703 		}
704 
705 		std::sort(processingRates.begin(), processingRates.end());
706 
707 		stats.medianRate = linearSample(processingRates, 0.5f);
708 	}
709 
710 	// results compared to the approximation
711 	{
712 		std::vector<float> timeDiffs(samples.size());
713 
714 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
715 		{
716 			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
717 			const float actual		= (float)values[ndx];
718 			timeDiffs[ndx] = actual - prediction;
719 		}
720 		std::sort(timeDiffs.begin(), timeDiffs.end());
721 
722 		stats.maxDiffTime			= timeDiffs.back();
723 		stats.maxDiff9DecileTime	= linearSample(timeDiffs, 0.9f);
724 		stats.medianDiffTime		= linearSample(timeDiffs, 0.5f);
725 	}
726 
727 	// relative comparison to the approximation
728 	{
729 		std::vector<float> relativeDiffs(samples.size());
730 
731 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
732 		{
733 			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
734 			const float actual		= (float)values[ndx];
735 
736 			// Ignore cases where we predict negative times, or if
737 			// ratio would be (nearly) infinite: ignore if predicted
738 			// time is less than 1 microsecond
739 			if (prediction < 1.0f)
740 				relativeDiffs[ndx] = 0.0f;
741 			else
742 				relativeDiffs[ndx] = (actual - prediction) / prediction;
743 		}
744 		std::sort(relativeDiffs.begin(), relativeDiffs.end());
745 
746 		stats.maxRelDiffTime		= relativeDiffs.back();
747 		stats.max9DecileRelDiffTime	= linearSample(relativeDiffs, 0.9f);
748 		stats.medianRelDiffTime		= linearSample(relativeDiffs, 0.5f);
749 	}
750 
751 	// values calculated using sorted timings
752 
753 	std::sort(values.begin(), values.end());
754 
755 	stats.result.minTime = (float)values.front();
756 	stats.result.maxTime = (float)values.back();
757 	stats.result.medianTime = linearSample(values, 0.5f);
758 	stats.result.min2DecileTime = linearSample(values, 0.1f);
759 	stats.result.max9DecileTime = linearSample(values, 0.9f);
760 }
761 
762 template <typename StatisticsType, typename SampleType>
calculateBasicTransferStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)763 void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
764 {
765 	calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
766 }
767 
768 template <typename StatisticsType, typename SampleType>
calculateBasicRenderStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)769 void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
770 {
771 	calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
772 }
773 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)774 static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
775 {
776 	SingleCallStatistics stats;
777 
778 	calculateBasicTransferStatistics(stats, fit, samples);
779 
780 	return stats;
781 }
782 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)783 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
784 {
785 	MapCallStatistics stats;
786 
787 	calculateBasicTransferStatistics(stats, fit, samples);
788 
789 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
790 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
791 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
792 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
793 
794 	return stats;
795 }
796 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)797 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
798 {
799 	MapFlushCallStatistics stats;
800 
801 	calculateBasicTransferStatistics(stats, fit, samples);
802 
803 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
804 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
805 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
806 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
807 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
808 
809 	return stats;
810 }
811 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)812 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
813 {
814 	MapCallStatistics stats;
815 
816 	calculateBasicTransferStatistics(stats, fit, samples);
817 
818 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
819 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
820 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
821 
822 	return stats;
823 }
824 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)825 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
826 {
827 	MapFlushCallStatistics stats;
828 
829 	calculateBasicTransferStatistics(stats, fit, samples);
830 
831 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
832 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
833 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
834 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
835 
836 	return stats;
837 }
838 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)839 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
840 {
841 	RenderReadStatistics stats;
842 
843 	calculateBasicRenderStatistics(stats, fit, samples);
844 
845 	stats.render	= calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
846 	stats.read		= calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
847 	stats.total		= calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
848 
849 	return stats;
850 }
851 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)852 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
853 {
854 	RenderReadStatistics stats;
855 
856 	calculateBasicRenderStatistics(stats, fit, samples);
857 
858 	stats.render	= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
859 	stats.read		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
860 	stats.total		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
861 
862 	return stats;
863 }
864 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)865 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
866 {
867 	UploadRenderReadStatistics stats;
868 
869 	calculateBasicRenderStatistics(stats, fit, samples);
870 
871 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
872 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
873 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
874 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
875 
876 	return stats;
877 }
878 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)879 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
880 {
881 	UploadRenderReadStatistics stats;
882 
883 	calculateBasicRenderStatistics(stats, fit, samples);
884 
885 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
886 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
887 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
888 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
889 
890 	return stats;
891 }
892 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)893 static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
894 {
895 	RenderUploadRenderReadStatistics stats;
896 
897 	calculateBasicRenderStatistics(stats, fit, samples);
898 
899 	stats.firstRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
900 	stats.upload		= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
901 	stats.secondRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
902 	stats.read			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
903 	stats.total			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
904 
905 	return stats;
906 }
907 
908 template <typename DurationType>
fitLineToSamples(const std::vector<UploadSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,deUint64 DurationType::* target=& DurationType::fitResponseDuration)909 static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
910 {
911 	std::vector<tcu::Vec2> samplePoints;
912 
913 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
914 	{
915 		tcu::Vec2 point;
916 
917 		point.x() = (float)(samples[sampleNdx].writtenSize);
918 		point.y() = (float)(samples[sampleNdx].duration.*target);
919 
920 		samplePoints.push_back(point);
921 	}
922 
923 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
924 }
925 
926 template <typename DurationType>
fitLineToSamples(const std::vector<RenderSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,deUint64 DurationType::* target=& DurationType::fitResponseDuration)927 static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
928 {
929 	std::vector<tcu::Vec2> samplePoints;
930 
931 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
932 	{
933 		tcu::Vec2 point;
934 
935 		point.x() = (float)(samples[sampleNdx].renderDataSize);
936 		point.y() = (float)(samples[sampleNdx].duration.*target);
937 
938 		samplePoints.push_back(point);
939 	}
940 
941 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
942 }
943 
944 template <typename T>
fitLineToSamples(const std::vector<T> & samples,int beginNdx,int endNdx,deUint64 T::SampleType::* target=& T::SampleType::fitResponseDuration)945 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
946 {
947 	return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
948 }
949 
950 template <typename T>
fitLineToSamples(const std::vector<T> & samples,deUint64 T::SampleType::* target=& T::SampleType::fitResponseDuration)951 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
952 {
953 	return fitLineToSamples(samples, 0, (int)samples.size(), target);
954 }
955 
getAreaBetweenLines(float xmin,float xmax,float lineAOffset,float lineACoefficient,float lineBOffset,float lineBCoefficient)956 static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
957 {
958 	const float lineAMin		= lineAOffset + lineACoefficient * xmin;
959 	const float lineAMax		= lineAOffset + lineACoefficient * xmax;
960 	const float lineBMin		= lineBOffset + lineBCoefficient * xmin;
961 	const float lineBMax		= lineBOffset + lineBCoefficient * xmax;
962 	const bool	aOverBAtBegin	= (lineAMin > lineBMin);
963 	const bool	aOverBAtEnd		= (lineAMax > lineBMax);
964 
965 	if (aOverBAtBegin == aOverBAtEnd)
966 	{
967 		// lines do not intersect
968 
969 		const float midpoint	= (xmin + xmax) / 2.0f;
970 		const float width		= (xmax - xmin);
971 
972 		const float lineAHeight	= lineAOffset + lineACoefficient * midpoint;
973 		const float lineBHeight	= lineBOffset + lineBCoefficient * midpoint;
974 
975 		return width * de::abs(lineAHeight - lineBHeight);
976 	}
977 	else
978 	{
979 
980 		// lines intersect
981 
982 		const float approachCoeffient	= de::abs(lineACoefficient - lineBCoefficient);
983 		const float epsilon				= 0.0001f;
984 		const float leftHeight			= de::abs(lineAMin - lineBMin);
985 		const float rightHeight			= de::abs(lineAMax - lineBMax);
986 
987 		if (approachCoeffient < epsilon)
988 			return 0.0f;
989 
990 		return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
991 	}
992 }
993 
994 template <typename T>
calculateSampleFitLinearity(const std::vector<T> & samples,int T::* predictor)995 static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
996 {
997 	// Compare the fitted line of first half of the samples to the fitted line of
998 	// the second half of the samples. Calculate a AABB that fully contains every
999 	// sample's x component and both fit lines in this range. Calculate the ratio
1000 	// of the area between the lines and the AABB.
1001 
1002 	const float				epsilon				= 1.e-6f;
1003 	const int				midPoint			= (int)samples.size() / 2;
1004 	const LineParametersWithConfidence	startApproximation	= fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1005 	const LineParametersWithConfidence	endApproximation	= fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1006 
1007 	const float				aabbMinX			= (float)(samples.front().*predictor);
1008 	const float				aabbMinY			= de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
1009 	const float				aabbMaxX			= (float)(samples.back().*predictor);
1010 	const float				aabbMaxY			= de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);
1011 
1012 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1013 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
1014 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1015 
1016 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1017 }
1018 
1019 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> & samples)1020 static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
1021 {
1022 	return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1023 }
1024 
1025 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> & samples)1026 static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
1027 {
1028 	return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1029 }
1030 
1031 template <typename T>
calculateSampleTemporalStability(const std::vector<T> & samples,int T::* predictor)1032 static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
1033 {
1034 	// Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1035 	// Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1036 	// contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1037 	// the lines and the AABB.
1038 
1039 	const float				epsilon				= 1.e-6f;
1040 	const LineParametersWithConfidence	evenApproximation	= fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1041 	const LineParametersWithConfidence	oddApproximation	= fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1042 
1043 	const float				aabbMinX			= (float)(samples.front().*predictor);
1044 	const float				aabbMinY			= de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
1045 	const float				aabbMaxX			= (float)(samples.back().*predictor);
1046 	const float				aabbMaxY			= de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);
1047 
1048 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1049 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
1050 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1051 
1052 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1053 }
1054 
1055 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> & samples)1056 static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
1057 {
1058 	return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1059 }
1060 
1061 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> & samples)1062 static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
1063 {
1064 	return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1065 }
1066 
1067 template <typename DurationType>
bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> & samples,std::vector<UploadSampleResult<DurationType>> * buckets,int numBuckets,int & minBufferSize,int & maxBufferSize)1068 static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
1069 {
1070 	minBufferSize = 0;
1071 	maxBufferSize = 0;
1072 
1073 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1074 	{
1075 		DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1076 
1077 		if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1078 			minBufferSize = samples[sampleNdx].allocatedSize;
1079 		if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1080 			maxBufferSize = samples[sampleNdx].allocatedSize;
1081 	}
1082 
1083 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1084 	{
1085 		const float bucketNdxFloat	= (samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * numBuckets;
1086 		const int bucketNdx			= de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);
1087 
1088 		buckets[bucketNdx].push_back(samples[sampleNdx]);
1089 	}
1090 }
1091 
1092 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1093 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1094 {
1095 	log	<< tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1096 		<< tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1097 		<< tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
1098 		<< tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
1099 		<< tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1100 }
1101 
1102 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1103 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1104 {
1105 	log	<< tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1106 		<< tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1107 		<< tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1108 		<< tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1109 		<< tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1110 }
1111 
1112 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1113 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1114 {
1115 	log	<< tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1116 		<< tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1117 		<< tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1118 		<< tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1119 		<< tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1120 }
1121 
1122 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1123 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1124 {
1125 	log	<< tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1126 		<< tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1127 		<< tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1128 		<< tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1129 		<< tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1130 }
1131 
1132 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1133 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1134 {
1135 	log	<< tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1136 		<< tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1137 		<< tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1138 		<< tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1139 		<< tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1140 }
1141 
1142 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1143 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1144 {
1145 	DE_UNREF(log);
1146 	DE_UNREF(stats);
1147 }
1148 
1149 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1150 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1151 {
1152 	DE_UNREF(log);
1153 	DE_UNREF(stats);
1154 }
1155 
1156 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1157 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1158 {
1159 	DE_UNREF(log);
1160 	DE_UNREF(stats);
1161 }
1162 
1163 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1164 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1165 {
1166 	DE_UNREF(log);
1167 	DE_UNREF(stats);
1168 }
1169 
1170 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1171 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1172 {
1173 	DE_UNREF(log);
1174 	DE_UNREF(stats);
1175 }
1176 
1177 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1178 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1179 {
1180 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1181 	log	<< tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1182 		<< tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1183 		<< tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1184 }
1185 
1186 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1187 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1188 {
1189 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1190 	log	<< tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1191 		<< tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1192 		<< tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1193 }
1194 
1195 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1196 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1197 {
1198 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1199 	log	<< tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1200 		<< tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1201 		<< tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1202 }
1203 
1204 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1205 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1206 {
1207 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1208 	log	<< tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1209 		<< tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1210 		<< tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1211 }
1212 
1213 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1214 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1215 {
1216 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1217 	log	<< tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1218 		<< tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1219 		<< tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1220 }
1221 
1222 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1223 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1224 {
1225 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1226 	log	<< tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1227 		<< tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1228 		<< tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
1229 }
1230 
1231 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1232 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1233 {
1234 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1235 	log	<< tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1236 		<< tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1237 		<< tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1238 }
1239 
1240 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1241 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1242 {
1243 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1244 	log	<< tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1245 		<< tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1246 		<< tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
1247 }
1248 
1249 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1250 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1251 {
1252 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1253 	log	<< tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1254 		<< tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1255 		<< tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1256 }
1257 
1258 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1259 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1260 {
1261 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
1262 	log	<< tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1263 		<< tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1264 		<< tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
1265 }
1266 
1267 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1268 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1269 {
1270 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
1271 	log	<< tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1272 		<< tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1273 		<< tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
1274 }
1275 
1276 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1277 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1278 {
1279 	DE_UNREF(log);
1280 	DE_UNREF(samples);
1281 	DE_UNREF(stats);
1282 }
1283 
1284 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1285 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1286 {
1287 	DE_UNREF(log);
1288 	DE_UNREF(samples);
1289 	DE_UNREF(stats);
1290 }
1291 
1292 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1293 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1294 {
1295 	DE_UNREF(log);
1296 	DE_UNREF(samples);
1297 	DE_UNREF(stats);
1298 }
1299 
1300 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1301 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1302 {
1303 	DE_UNREF(log);
1304 	DE_UNREF(samples);
1305 	DE_UNREF(stats);
1306 }
1307 
1308 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1309 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1310 {
1311 	DE_UNREF(log);
1312 	DE_UNREF(samples);
1313 	DE_UNREF(stats);
1314 }
1315 
1316 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1317 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1318 {
1319 	DE_UNREF(log);
1320 	DE_UNREF(samples);
1321 	DE_UNREF(stats);
1322 }
1323 
1324 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1325 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1326 {
1327 	DE_UNREF(log);
1328 	DE_UNREF(samples);
1329 	DE_UNREF(stats);
1330 }
1331 
1332 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1333 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1334 {
1335 	DE_UNREF(log);
1336 	DE_UNREF(samples);
1337 	DE_UNREF(stats);
1338 }
1339 
1340 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1341 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1342 {
1343 	DE_UNREF(log);
1344 	DE_UNREF(samples);
1345 	DE_UNREF(stats);
1346 }
1347 
1348 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1349 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1350 {
1351 	DE_UNREF(log);
1352 	DE_UNREF(samples);
1353 	DE_UNREF(stats);
1354 }
1355 
1356 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1357 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1358 {
1359 	DE_UNREF(log);
1360 	DE_UNREF(samples);
1361 	DE_UNREF(stats);
1362 }
1363 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)1364 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
1365 {
1366 	log << tcu::TestLog::SampleList("Samples", "Samples")
1367 		<< tcu::TestLog::SampleInfo
1368 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1369 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1370 		<< tcu::TestLog::ValueInfo("UploadTime",		"Upload time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1371 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1372 		<< tcu::TestLog::EndSampleInfo;
1373 
1374 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1375 	{
1376 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1377 		log	<< tcu::TestLog::Sample
1378 			<< samples[sampleNdx].writtenSize
1379 			<< samples[sampleNdx].bufferSize
1380 			<< (int)samples[sampleNdx].duration.totalDuration
1381 			<< fitResidual
1382 			<< tcu::TestLog::EndSample;
1383 	}
1384 
1385 	log << tcu::TestLog::EndSampleList;
1386 }
1387 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)1388 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
1389 {
1390 	log << tcu::TestLog::SampleList("Samples", "Samples")
1391 		<< tcu::TestLog::SampleInfo
1392 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1393 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1394 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1395 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1396 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1397 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1398 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1399 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1400 		<< tcu::TestLog::EndSampleInfo;
1401 
1402 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1403 	{
1404 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1405 		log	<< tcu::TestLog::Sample
1406 			<< samples[sampleNdx].writtenSize
1407 			<< samples[sampleNdx].bufferSize
1408 			<< (int)samples[sampleNdx].duration.totalDuration
1409 			<< (int)samples[sampleNdx].duration.allocDuration
1410 			<< (int)samples[sampleNdx].duration.mapDuration
1411 			<< (int)samples[sampleNdx].duration.unmapDuration
1412 			<< (int)samples[sampleNdx].duration.writeDuration
1413 			<< fitResidual
1414 			<< tcu::TestLog::EndSample;
1415 	}
1416 
1417 	log << tcu::TestLog::EndSampleList;
1418 }
1419 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1420 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
1421 {
1422 	log << tcu::TestLog::SampleList("Samples", "Samples")
1423 		<< tcu::TestLog::SampleInfo
1424 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1425 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1426 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1427 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1428 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1429 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1430 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1431 		<< tcu::TestLog::EndSampleInfo;
1432 
1433 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1434 	{
1435 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1436 		log	<< tcu::TestLog::Sample
1437 			<< samples[sampleNdx].writtenSize
1438 			<< samples[sampleNdx].bufferSize
1439 			<< (int)samples[sampleNdx].duration.totalDuration
1440 			<< (int)samples[sampleNdx].duration.mapDuration
1441 			<< (int)samples[sampleNdx].duration.unmapDuration
1442 			<< (int)samples[sampleNdx].duration.writeDuration
1443 			<< fitResidual
1444 			<< tcu::TestLog::EndSample;
1445 	}
1446 
1447 	log << tcu::TestLog::EndSampleList;
1448 }
1449 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1450 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
1451 {
1452 	log << tcu::TestLog::SampleList("Samples", "Samples")
1453 		<< tcu::TestLog::SampleInfo
1454 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1455 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1456 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1457 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1458 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1459 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1460 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1461 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1462 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1463 		<< tcu::TestLog::EndSampleInfo;
1464 
1465 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1466 	{
1467 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1468 		log	<< tcu::TestLog::Sample
1469 			<< samples[sampleNdx].writtenSize
1470 			<< samples[sampleNdx].bufferSize
1471 			<< (int)samples[sampleNdx].duration.totalDuration
1472 			<< (int)samples[sampleNdx].duration.allocDuration
1473 			<< (int)samples[sampleNdx].duration.mapDuration
1474 			<< (int)samples[sampleNdx].duration.unmapDuration
1475 			<< (int)samples[sampleNdx].duration.writeDuration
1476 			<< (int)samples[sampleNdx].duration.flushDuration
1477 			<< fitResidual
1478 			<< tcu::TestLog::EndSample;
1479 	}
1480 
1481 	log << tcu::TestLog::EndSampleList;
1482 }
1483 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1484 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
1485 {
1486 	log << tcu::TestLog::SampleList("Samples", "Samples")
1487 		<< tcu::TestLog::SampleInfo
1488 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1489 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1490 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1491 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1492 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1493 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1494 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1495 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1496 		<< tcu::TestLog::EndSampleInfo;
1497 
1498 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1499 	{
1500 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1501 		log	<< tcu::TestLog::Sample
1502 			<< samples[sampleNdx].writtenSize
1503 			<< samples[sampleNdx].bufferSize
1504 			<< (int)samples[sampleNdx].duration.totalDuration
1505 			<< (int)samples[sampleNdx].duration.mapDuration
1506 			<< (int)samples[sampleNdx].duration.unmapDuration
1507 			<< (int)samples[sampleNdx].duration.writeDuration
1508 			<< (int)samples[sampleNdx].duration.flushDuration
1509 			<< fitResidual
1510 			<< tcu::TestLog::EndSample;
1511 	}
1512 
1513 	log << tcu::TestLog::EndSampleList;
1514 }
1515 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1516 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
1517 {
1518 	log << tcu::TestLog::SampleList("Samples", "Samples")
1519 		<< tcu::TestLog::SampleInfo
1520 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",		"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1521 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",	"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1522 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1523 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1524 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1525 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1526 		<< tcu::TestLog::EndSampleInfo;
1527 
1528 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1529 	{
1530 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1531 		log	<< tcu::TestLog::Sample
1532 			<< samples[sampleNdx].renderDataSize
1533 			<< samples[sampleNdx].numVertices
1534 			<< (int)samples[sampleNdx].duration.renderReadDuration
1535 			<< (int)samples[sampleNdx].duration.renderDuration
1536 			<< (int)samples[sampleNdx].duration.readDuration
1537 			<< fitResidual
1538 			<< tcu::TestLog::EndSample;
1539 	}
1540 
1541 	log << tcu::TestLog::EndSampleList;
1542 }
1543 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1544 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
1545 {
1546 	log << tcu::TestLog::SampleList("Samples", "Samples")
1547 		<< tcu::TestLog::SampleInfo
1548 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1549 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",		"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1550 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",	"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1551 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1552 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1553 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1554 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1555 		<< tcu::TestLog::EndSampleInfo;
1556 
1557 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1558 	{
1559 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1560 		log	<< tcu::TestLog::Sample
1561 			<< samples[sampleNdx].renderDataSize
1562 			<< samples[sampleNdx].numVertices
1563 			<< samples[sampleNdx].unrelatedDataSize
1564 			<< (int)samples[sampleNdx].duration.renderReadDuration
1565 			<< (int)samples[sampleNdx].duration.renderDuration
1566 			<< (int)samples[sampleNdx].duration.readDuration
1567 			<< fitResidual
1568 			<< tcu::TestLog::EndSample;
1569 	}
1570 
1571 	log << tcu::TestLog::EndSampleList;
1572 }
1573 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1574 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
1575 {
1576 	log << tcu::TestLog::SampleList("Samples", "Samples")
1577 		<< tcu::TestLog::SampleInfo
1578 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1579 		<< tcu::TestLog::ValueInfo("UploadSize",		"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1580 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1581 		<< tcu::TestLog::ValueInfo("DrawReadTime",		"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1582 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1583 		<< tcu::TestLog::ValueInfo("Upload time",		"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1584 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1585 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1586 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1587 		<< tcu::TestLog::EndSampleInfo;
1588 
1589 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1590 	{
1591 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1592 		log	<< tcu::TestLog::Sample
1593 			<< samples[sampleNdx].renderDataSize
1594 			<< samples[sampleNdx].uploadedDataSize
1595 			<< samples[sampleNdx].numVertices
1596 			<< (int)samples[sampleNdx].duration.renderReadDuration
1597 			<< (int)samples[sampleNdx].duration.totalDuration
1598 			<< (int)samples[sampleNdx].duration.uploadDuration
1599 			<< (int)samples[sampleNdx].duration.renderDuration
1600 			<< (int)samples[sampleNdx].duration.readDuration
1601 			<< fitResidual
1602 			<< tcu::TestLog::EndSample;
1603 	}
1604 
1605 	log << tcu::TestLog::EndSampleList;
1606 }
1607 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1608 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
1609 {
1610 	log << tcu::TestLog::SampleList("Samples", "Samples")
1611 		<< tcu::TestLog::SampleInfo
1612 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1613 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1614 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1615 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1616 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1617 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1618 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1619 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1620 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1621 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1622 		<< tcu::TestLog::EndSampleInfo;
1623 
1624 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1625 	{
1626 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1627 		log	<< tcu::TestLog::Sample
1628 			<< samples[sampleNdx].renderDataSize
1629 			<< samples[sampleNdx].uploadedDataSize
1630 			<< samples[sampleNdx].numVertices
1631 			<< samples[sampleNdx].unrelatedDataSize
1632 			<< (int)samples[sampleNdx].duration.renderReadDuration
1633 			<< (int)samples[sampleNdx].duration.totalDuration
1634 			<< (int)samples[sampleNdx].duration.uploadDuration
1635 			<< (int)samples[sampleNdx].duration.renderDuration
1636 			<< (int)samples[sampleNdx].duration.readDuration
1637 			<< fitResidual
1638 			<< tcu::TestLog::EndSample;
1639 	}
1640 
1641 	log << tcu::TestLog::EndSampleList;
1642 }
1643 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1644 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
1645 {
1646 	log << tcu::TestLog::SampleList("Samples", "Samples")
1647 		<< tcu::TestLog::SampleInfo
1648 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1649 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1650 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",					"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1651 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Second draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1652 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1653 		<< tcu::TestLog::ValueInfo("FirstDrawCallTime",		"First draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1654 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1655 		<< tcu::TestLog::ValueInfo("SecondDrawCallTime",	"Second draw call time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1656 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1657 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1658 		<< tcu::TestLog::EndSampleInfo;
1659 
1660 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1661 	{
1662 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1663 		log	<< tcu::TestLog::Sample
1664 			<< samples[sampleNdx].renderDataSize
1665 			<< samples[sampleNdx].uploadedDataSize
1666 			<< samples[sampleNdx].numVertices
1667 			<< (int)samples[sampleNdx].duration.renderReadDuration
1668 			<< (int)samples[sampleNdx].duration.totalDuration
1669 			<< (int)samples[sampleNdx].duration.firstRenderDuration
1670 			<< (int)samples[sampleNdx].duration.uploadDuration
1671 			<< (int)samples[sampleNdx].duration.secondRenderDuration
1672 			<< (int)samples[sampleNdx].duration.readDuration
1673 			<< fitResidual
1674 			<< tcu::TestLog::EndSample;
1675 	}
1676 
1677 	log << tcu::TestLog::EndSampleList;
1678 }
1679 
1680 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,bool logBucketPerformance)1681 static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
1682 {
1683 	// Assume data is linear with some outliers, fit a line
1684 	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1685 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1686 	float													approximatedTransferRate;
1687 	float													approximatedTransferRateNoConstant;
1688 
1689 	// Output raw samples
1690 	{
1691 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1692 		logSampleList(log, theilSenFitting, samples);
1693 	}
1694 
1695 	// Calculate results for different ranges
1696 	if (logBucketPerformance)
1697 	{
1698 		const int										numBuckets				= 4;
1699 		int												minBufferSize			= 0;
1700 		int												maxBufferSize			= 0;
1701 		std::vector<UploadSampleResult<SampleType> >	buckets[numBuckets];
1702 
1703 		bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
1704 
1705 		for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
1706 		{
1707 			if (buckets[bucketNdx].empty())
1708 				continue;
1709 
1710 			// Print a nice result summary
1711 
1712 			const int												bucketRangeMin	= minBufferSize + (int)(( bucketNdx    / (float)numBuckets) * (maxBufferSize - minBufferSize));
1713 			const int												bucketRangeMax	= minBufferSize + (int)(((bucketNdx+1) / (float)numBuckets) * (maxBufferSize - minBufferSize));
1714 			const typename SampleTypeTraits<SampleType>::StatsType	stats			= calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
1715 			const tcu::ScopedLogSection								section			(log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
1716 
1717 			logMapRangeStats<SampleType>(log, stats);
1718 			logUnmapStats<SampleType>(log, stats);
1719 			logWriteStats<SampleType>(log, stats);
1720 			logFlushStats<SampleType>(log, stats);
1721 			logAllocStats<SampleType>(log, stats);
1722 
1723 			log	<< tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
1724 				<< tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
1725 				<< tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
1726 				<< tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
1727 				<< tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
1728 				<< tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
1729 				<< tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
1730 				<< tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
1731 				<< tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
1732 				<< tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
1733 				<< tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
1734 				<< tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
1735 		}
1736 	}
1737 
1738 	// Contributions
1739 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1740 	{
1741 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1742 
1743 		logMapContribution(log, samples, resultStats);
1744 		logUnmapContribution(log, samples, resultStats);
1745 		logWriteContribution(log, samples, resultStats);
1746 		logFlushContribution(log, samples, resultStats);
1747 		logAllocContribution(log, samples, resultStats);
1748 	}
1749 
1750 	// Print results
1751 	{
1752 		const tcu::ScopedLogSection	section(log, "Results", "Results");
1753 
1754 		const int	medianBufferSize					= (samples.front().bufferSize + samples.back().bufferSize) / 2;
1755 		const float	approximatedTransferTime			= (theilSenFitting.offset + theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1756 		const float	approximatedTransferTimeNoConstant	= (theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1757 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1758 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1759 
1760 		approximatedTransferRateNoConstant				= medianBufferSize / approximatedTransferTimeNoConstant;
1761 		approximatedTransferRate						= medianBufferSize / approximatedTransferTime;
1762 
1763 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1764 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1765 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1766 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1767 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1768 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1769 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1770 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1771 			<< tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
1772 			<< tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
1773 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1774 			<< tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1775 	}
1776 
1777 	// return approximated transfer rate
1778 	{
1779 		UploadSampleAnalyzeResult result;
1780 
1781 		result.transferRateMedian = resultStats.medianRate;
1782 		result.transferRateAtRange = approximatedTransferRate;
1783 		result.transferRateAtInfinity = approximatedTransferRateNoConstant;
1784 
1785 		return result;
1786 	}
1787 }
1788 
1789 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples)1790 static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
1791 {
1792 	// Assume data is linear with some outliers, fit a line
1793 	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1794 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1795 	float													approximatedProcessingRate;
1796 	float													approximatedProcessingRateNoConstant;
1797 
1798 	// output raw samples
1799 	{
1800 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1801 		logSampleList(log, theilSenFitting, samples);
1802 	}
1803 
1804 	// Contributions
1805 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1806 	{
1807 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1808 
1809 		logFirstRenderContribution(log, samples, resultStats);
1810 		logUploadContribution(log, samples, resultStats);
1811 		logRenderContribution(log, samples, resultStats);
1812 		logSecondRenderContribution(log, samples, resultStats);
1813 		logReadContribution(log, samples, resultStats);
1814 		logTotalContribution(log, samples, resultStats);
1815 	}
1816 
1817 	// print results
1818 	{
1819 		const tcu::ScopedLogSection	section(log, "Results", "Results");
1820 
1821 		const int	medianDataSize						= (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
1822 		const float	approximatedRenderTime				= (theilSenFitting.offset + theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1823 		const float	approximatedRenderTimeNoConstant	= (theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1824 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1825 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1826 
1827 		approximatedProcessingRateNoConstant			= medianDataSize / approximatedRenderTimeNoConstant;
1828 		approximatedProcessingRate						= medianDataSize / approximatedRenderTime;
1829 
1830 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1831 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1832 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1833 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1834 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1835 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1836 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1837 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1838 			<< tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
1839 			<< tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
1840 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1841 			<< tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1842 	}
1843 
1844 	// return approximated render rate
1845 	{
1846 		RenderSampleAnalyzeResult result;
1847 
1848 		result.renderRateMedian		= resultStats.medianRate;
1849 		result.renderRateAtRange	= approximatedProcessingRate;
1850 		result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
1851 
1852 		return result;
1853 	}
1854 	return RenderSampleAnalyzeResult();
1855 }
1856 
generateTwoPassRandomIterationOrder(std::vector<int> & iterationOrder,int numSamples)1857 static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
1858 {
1859 	de::Random	rnd			(0xabc);
1860 	const int	midPoint	= (numSamples+1) / 2;		// !< ceil(m_numSamples / 2)
1861 
1862 	DE_ASSERT((int)iterationOrder.size() == numSamples);
1863 
1864 	// Two "passes" over range, randomize order in both passes
1865 	// This allows to us detect if iterations are not independent
1866 	// (first run and later run samples differ significantly?)
1867 
1868 	for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
1869 		iterationOrder[sampleNdx] = sampleNdx * 2;
1870 	for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
1871 		iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
1872 
1873 	for (int ndx = 0; ndx < midPoint; ++ndx)
1874 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
1875 	for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
1876 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
1877 }
1878 
1879 template <typename SampleType>
1880 class BasicBufferCase : public TestCase
1881 {
1882 public:
1883 
1884 	enum Flags
1885 	{
1886 		FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
1887 	};
1888 							BasicBufferCase		(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
1889 							~BasicBufferCase	(void);
1890 
1891 	virtual void			init				(void);
1892 	virtual void			deinit				(void);
1893 
1894 protected:
1895 	IterateResult			iterate				(void);
1896 
1897 	virtual bool			runSample			(int iteration, UploadSampleResult<SampleType>& sample) = 0;
1898 	virtual void			logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results) = 0;
1899 
1900 	void					disableGLWarmup		(void);
1901 	void					waitGLResults		(void);
1902 
1903 	enum
1904 	{
1905 		DUMMY_RENDER_AREA_SIZE = 32
1906 	};
1907 
1908 	glu::ShaderProgram*		m_dummyProgram;
1909 	deInt32					m_dummyProgramPosLoc;
1910 	deUint32				m_bufferID;
1911 
1912 	const int				m_numSamples;
1913 	const int				m_bufferSizeMin;
1914 	const int				m_bufferSizeMax;
1915 	const bool				m_allocateLargerBuffer;
1916 
1917 private:
1918 	int						m_iteration;
1919 	std::vector<int>		m_iterationOrder;
1920 	std::vector<UploadSampleResult<SampleType> > m_results;
1921 
1922 	bool					m_useGL;
1923 	int						m_bufferRandomizerTimer;
1924 };
1925 
1926 template <typename SampleType>
BasicBufferCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,int flags)1927 BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
1928 	: TestCase					(context, tcu::NODETYPE_PERFORMANCE, name, desc)
1929 	, m_dummyProgram			(DE_NULL)
1930 	, m_dummyProgramPosLoc		(-1)
1931 	, m_bufferID				(0)
1932 	, m_numSamples				(numSamples)
1933 	, m_bufferSizeMin			(bufferSizeMin)
1934 	, m_bufferSizeMax			(bufferSizeMax)
1935 	, m_allocateLargerBuffer	((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
1936 	, m_iteration				(0)
1937 	, m_iterationOrder			(numSamples)
1938 	, m_results					(numSamples)
1939 	, m_useGL					(true)
1940 	, m_bufferRandomizerTimer	(0)
1941 {
1942 	// "randomize" iteration order. Deterministic, patternless
1943 	generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
1944 
1945 	// choose buffer sizes
1946 	for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
1947 	{
1948 		const int rawBufferSize			= (int)deFloatFloor(bufferSizeMin + (bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / m_numSamples));
1949 		const int bufferSize			= deAlign32(rawBufferSize, 16);
1950 		const int allocatedBufferSize	= deAlign32((m_allocateLargerBuffer) ? ((int)(bufferSize * 1.5f)) : (bufferSize), 16);
1951 
1952 		m_results[sampleNdx].bufferSize		= bufferSize;
1953 		m_results[sampleNdx].allocatedSize	= allocatedBufferSize;
1954 		m_results[sampleNdx].writtenSize	= -1;
1955 	}
1956 }
1957 
1958 template <typename SampleType>
~BasicBufferCase(void)1959 BasicBufferCase<SampleType>::~BasicBufferCase (void)
1960 {
1961 	deinit();
1962 }
1963 
1964 template <typename SampleType>
init(void)1965 void BasicBufferCase<SampleType>::init (void)
1966 {
1967 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1968 
1969 	if (!m_useGL)
1970 		return;
1971 
1972 	// \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE
1973 
1974 	// dummy shader
1975 
1976 	m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
1977 	if (!m_dummyProgram->isOk())
1978 	{
1979 		m_testCtx.getLog() << *m_dummyProgram;
1980 		throw tcu::TestError("failed to build shader program");
1981 	}
1982 
1983 	m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
1984 	if (m_dummyProgramPosLoc == -1)
1985 		throw tcu::TestError("a_position location was -1");
1986 }
1987 
1988 template <typename SampleType>
deinit(void)1989 void BasicBufferCase<SampleType>::deinit (void)
1990 {
1991 	if (m_bufferID)
1992 	{
1993 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
1994 		m_bufferID = 0;
1995 	}
1996 
1997 	delete m_dummyProgram;
1998 	m_dummyProgram = DE_NULL;
1999 }
2000 
2001 template <typename SampleType>
iterate(void)2002 TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
2003 {
2004 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2005 	static bool				buffersWarmedUp	= false;
2006 
2007 	static const deUint32	usages[] =
2008 	{
2009 		GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
2010 		GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
2011 		GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2012 	};
2013 
2014 	// Allocate some random sized buffers and remove them to
2015 	// make sure the first samples too have some buffers removed
2016 	// just before their allocation. This is only needed by the
2017 	// the first test.
2018 
2019 	if (m_useGL && !buffersWarmedUp)
2020 	{
2021 		const int					numRandomBuffers				= 6;
2022 		const int					numRepeats						= 10;
2023 		const int					maxBufferSize					= 16777216;
2024 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2025 		de::Random					rnd								(0x1234);
2026 		deUint32					bufferIDs[numRandomBuffers]		= {0};
2027 
2028 		gl.useProgram(m_dummyProgram->getProgram());
2029 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2030 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2031 
2032 		for (int ndx = 0; ndx < numRepeats; ++ndx)
2033 		{
2034 			// Create buffer and maybe draw from it
2035 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2036 			{
2037 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2038 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2039 
2040 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2041 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2042 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2043 
2044 				if (rnd.getBool())
2045 				{
2046 					gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2047 					gl.drawArrays(GL_POINTS, 0, 1);
2048 					gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2049 				}
2050 			}
2051 
2052 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2053 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2054 
2055 			waitGLResults();
2056 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2057 
2058 			m_testCtx.touchWatchdog();
2059 		}
2060 
2061 		buffersWarmedUp = true;
2062 		return CONTINUE;
2063 	}
2064 	else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2065 	{
2066 		// Do some random buffer operations to every now and then
2067 		// to make sure the previous test iterations won't affect
2068 		// following test runs.
2069 
2070 		const int					numRandomBuffers				= 3;
2071 		const int					maxBufferSize					= 16777216;
2072 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2073 		de::Random					rnd								(0x1234 + 0xabc * m_bufferRandomizerTimer);
2074 
2075 		// BufferData
2076 		{
2077 			deUint32 bufferIDs[numRandomBuffers] = {0};
2078 
2079 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2080 			{
2081 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2082 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2083 
2084 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2085 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2086 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2087 			}
2088 
2089 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2090 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2091 		}
2092 
2093 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2094 
2095 		// Do some memory mappings
2096 		{
2097 			deUint32 bufferIDs[numRandomBuffers] = {0};
2098 
2099 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2100 			{
2101 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2102 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2103 				void*			ptr;
2104 
2105 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2106 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2107 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2108 
2109 				gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2110 				gl.drawArrays(GL_POINTS, 0, 1);
2111 				gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2112 
2113 				if (rnd.getBool())
2114 					waitGLResults();
2115 
2116 				ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2117 				if (ptr)
2118 				{
2119 					medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2120 					gl.unmapBuffer(GL_ARRAY_BUFFER);
2121 				}
2122 			}
2123 
2124 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2125 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2126 
2127 			waitGLResults();
2128 		}
2129 
2130 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2131 		return CONTINUE;
2132 	}
2133 	else
2134 	{
2135 		const int	currentIteration	= m_iteration;
2136 		const int	sampleNdx			= m_iterationOrder[currentIteration];
2137 		const bool	sampleRunSuccessful	= runSample(currentIteration, m_results[sampleNdx]);
2138 
2139 		GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2140 
2141 		// Retry failed samples
2142 		if (!sampleRunSuccessful)
2143 			return CONTINUE;
2144 
2145 		if (++m_iteration >= m_numSamples)
2146 		{
2147 			logAndSetTestResult(m_results);
2148 			return STOP;
2149 		}
2150 		else
2151 			return CONTINUE;
2152 	}
2153 }
2154 
2155 template <typename SampleType>
disableGLWarmup(void)2156 void BasicBufferCase<SampleType>::disableGLWarmup (void)
2157 {
2158 	m_useGL = false;
2159 }
2160 
2161 template <typename SampleType>
waitGLResults(void)2162 void BasicBufferCase<SampleType>::waitGLResults (void)
2163 {
2164 	tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2165 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
2166 }
2167 
2168 template <typename SampleType>
2169 class BasicUploadCase : public BasicBufferCase<SampleType>
2170 {
2171 public:
2172 	enum CaseType
2173 	{
2174 		CASE_NO_BUFFERS = 0,
2175 		CASE_NEW_BUFFER,
2176 		CASE_UNSPECIFIED_BUFFER,
2177 		CASE_SPECIFIED_BUFFER,
2178 		CASE_USED_BUFFER,
2179 		CASE_USED_LARGER_BUFFER,
2180 
2181 		CASE_LAST
2182 	};
2183 
2184 	enum CaseFlags
2185 	{
2186 		FLAG_DONT_LOG_BUFFER_INFO				= 0x01,
2187 		FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT	= 0x02,
2188 	};
2189 
2190 	enum ResultType
2191 	{
2192 		RESULT_MEDIAN_TRANSFER_RATE = 0,
2193 		RESULT_ASYMPTOTIC_TRANSFER_RATE,
2194 	};
2195 
2196 						BasicUploadCase		(Context& context,
2197 											 const char* name,
2198 											 const char* desc,
2199 											 int bufferSizeMin,
2200 											 int bufferSizeMax,
2201 											 int numSamples,
2202 											 deUint32 bufferUsage,
2203 											 CaseType caseType,
2204 											 ResultType resultType,
2205 											 int flags = 0);
2206 
2207 						~BasicUploadCase	(void);
2208 
2209 	virtual void		init				(void);
2210 	virtual void		deinit				(void);
2211 
2212 private:
2213 	bool				runSample			(int iteration, UploadSampleResult<SampleType>& sample);
2214 	void				createBuffer		(int bufferSize, int iteration);
2215 	void				deleteBuffer		(int bufferSize);
2216 	void				useBuffer			(int bufferSize);
2217 
2218 	virtual void		testBufferUpload	(UploadSampleResult<SampleType>& result, int writeSize) = 0;
2219 	void				logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results);
2220 
2221 	deUint32			m_dummyBufferID;
2222 
2223 protected:
2224 	const CaseType		m_caseType;
2225 	const ResultType	m_resultType;
2226 	const deUint32		m_bufferUsage;
2227 	const bool			m_logBufferInfo;
2228 	const bool			m_bufferUnspecifiedContent;
2229 	std::vector<deUint8> m_zeroData;
2230 
2231 	using BasicBufferCase<SampleType>::m_testCtx;
2232 	using BasicBufferCase<SampleType>::m_context;
2233 
2234 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
2235 	using BasicBufferCase<SampleType>::m_dummyProgram;
2236 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
2237 	using BasicBufferCase<SampleType>::m_bufferID;
2238 	using BasicBufferCase<SampleType>::m_numSamples;
2239 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
2240 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
2241 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2242 };
2243 
2244 template <typename SampleType>
BasicUploadCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,deUint32 bufferUsage,CaseType caseType,ResultType resultType,int flags)2245 BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
2246 	: BasicBufferCase<SampleType>	(context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2247 	, m_dummyBufferID				(0)
2248 	, m_caseType					(caseType)
2249 	, m_resultType					(resultType)
2250 	, m_bufferUsage					(bufferUsage)
2251 	, m_logBufferInfo				((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2252 	, m_bufferUnspecifiedContent	((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2253 	, m_zeroData					()
2254 {
2255 	DE_ASSERT(m_caseType < CASE_LAST);
2256 }
2257 
2258 template <typename SampleType>
~BasicUploadCase(void)2259 BasicUploadCase<SampleType>::~BasicUploadCase (void)
2260 {
2261 	deinit();
2262 }
2263 
2264 template <typename SampleType>
init(void)2265 void BasicUploadCase<SampleType>::init (void)
2266 {
2267 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2268 
2269 	BasicBufferCase<SampleType>::init();
2270 
2271 	// zero buffer as upload source
2272 	m_zeroData.resize(m_bufferSizeMax, 0x00);
2273 
2274 	// dummy buffer
2275 
2276 	gl.genBuffers(1, &m_dummyBufferID);
2277 	GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2278 
2279 	// log basic info
2280 
2281 	m_testCtx.getLog()
2282 		<< tcu::TestLog::Message
2283 		<< "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
2284 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
2285 		<< tcu::TestLog::EndMessage;
2286 
2287 	if (m_logBufferInfo)
2288 	{
2289 		switch (m_caseType)
2290 		{
2291 			case CASE_NO_BUFFERS:
2292 				break;
2293 
2294 			case CASE_NEW_BUFFER:
2295 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
2296 				break;
2297 
2298 			case CASE_UNSPECIFIED_BUFFER:
2299 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
2300 				break;
2301 
2302 			case CASE_SPECIFIED_BUFFER:
2303 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
2304 				break;
2305 
2306 			case CASE_USED_BUFFER:
2307 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
2308 				break;
2309 
2310 			case CASE_USED_LARGER_BUFFER:
2311 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
2312 				break;
2313 
2314 			default:
2315 				DE_ASSERT(false);
2316 				break;
2317 		}
2318 	}
2319 
2320 	if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2321 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
2322 	else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2323 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
2324 	else
2325 		DE_ASSERT(false);
2326 }
2327 
2328 template <typename SampleType>
deinit(void)2329 void BasicUploadCase<SampleType>::deinit (void)
2330 {
2331 	if (m_dummyBufferID)
2332 	{
2333 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
2334 		m_dummyBufferID = 0;
2335 	}
2336 
2337 	m_zeroData.clear();
2338 
2339 	BasicBufferCase<SampleType>::deinit();
2340 }
2341 
2342 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)2343 bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
2344 {
2345 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2346 	const int				allocatedBufferSize	= sample.allocatedSize;
2347 	const int				bufferSize			= sample.bufferSize;
2348 
2349 	if (m_caseType != CASE_NO_BUFFERS)
2350 		createBuffer(iteration, allocatedBufferSize);
2351 
2352 	// warmup CPU before the test to make sure the power management governor
2353 	// keeps us in the "high performance" mode
2354 	{
2355 		deYield();
2356 		tcu::warmupCPU();
2357 		deYield();
2358 	}
2359 
2360 	testBufferUpload(sample, bufferSize);
2361 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2362 
2363 	if (m_caseType != CASE_NO_BUFFERS)
2364 		deleteBuffer(bufferSize);
2365 
2366 	return true;
2367 }
2368 
2369 template <typename SampleType>
createBuffer(int iteration,int bufferSize)2370 void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
2371 {
2372 	DE_ASSERT(!m_bufferID);
2373 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2374 
2375 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2376 
2377 	// create buffer
2378 
2379 	if (m_caseType == CASE_NO_BUFFERS)
2380 		return;
2381 
2382 	// create empty buffer
2383 
2384 	gl.genBuffers(1, &m_bufferID);
2385 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2386 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2387 
2388 	if (m_caseType == CASE_NEW_BUFFER)
2389 	{
2390 		// upload something else first, this should reduce noise in samples
2391 
2392 		de::Random					rng				(0xbadc * iteration);
2393 		const int					sizeDelta		= rng.getInt(0, 2097140);
2394 		const int					dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
2395 		const std::vector<deUint8>	dummyData		(dummyUploadSize, 0x20);
2396 
2397 		gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
2398 		gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);
2399 
2400 		// make sure upload won't interfere with the test
2401 		useBuffer(dummyUploadSize);
2402 
2403 		// don't kill the buffer so that the following upload cannot potentially reuse the buffer
2404 
2405 		return;
2406 	}
2407 
2408 	// specify it
2409 
2410 	if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2411 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2412 	else
2413 	{
2414 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2415 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2416 	}
2417 
2418 	if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2419 		return;
2420 
2421 	// use it and make sure it is uploaded
2422 
2423 	useBuffer(bufferSize);
2424 	DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2425 }
2426 
2427 template <typename SampleType>
deleteBuffer(int bufferSize)2428 void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
2429 {
2430 	DE_ASSERT(m_bufferID);
2431 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2432 
2433 	// render from the buffer to make sure it actually made it to the gpu. This is to
2434 	// make sure that if the upload actually happens later or is happening right now in
2435 	// the background, it will not interfere with further test runs
2436 
2437 	// if buffer contains unspecified content, sourcing data from it results in undefined
2438 	// results, possibly including program termination. Specify all data to prevent such
2439 	// case from happening
2440 
2441 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2442 
2443 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2444 
2445 	if (m_bufferUnspecifiedContent)
2446 	{
2447 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2448 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2449 
2450 		GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2451 	}
2452 
2453 	useBuffer(bufferSize);
2454 
2455 	gl.deleteBuffers(1, &m_bufferID);
2456 	m_bufferID = 0;
2457 }
2458 
2459 template <typename SampleType>
useBuffer(int bufferSize)2460 void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
2461 {
2462 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2463 
2464 	gl.useProgram(m_dummyProgram->getProgram());
2465 
2466 	gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2467 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2468 	gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2469 
2470 	// use whole buffer to make sure buffer is uploaded by drawing first and last
2471 	DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2472 	gl.drawArrays(GL_POINTS, 0, 1);
2473 	gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2474 
2475 	BasicBufferCase<SampleType>::waitGLResults();
2476 }
2477 
2478 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)2479 void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
2480 {
2481 	const UploadSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), results, true);
2482 
2483 	// with small buffers, report the median transfer rate of the samples
2484 	// with large buffers, report the expected preformance of infinitely large buffers
2485 	const float						rate		= (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);
2486 
2487 	if (rate == std::numeric_limits<float>::infinity())
2488 	{
2489 		// sample times are 1) invalid or 2) timer resolution too low
2490 		// report speed 0 bytes / s since real value cannot be determined
2491 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2492 	}
2493 	else
2494 	{
2495 		// report transfer rate in MB / s
2496 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2497 	}
2498 }
2499 
2500 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2501 {
2502 public:
2503 				ReferenceMemcpyCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
2504 				~ReferenceMemcpyCase	(void);
2505 
2506 	void		init					(void);
2507 	void		deinit					(void);
2508 private:
2509 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2510 
2511 	std::vector<deUint8> m_dstBuf;
2512 };
2513 
ReferenceMemcpyCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,bool largeBuffersCase)2514 ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
2515 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2516 	, m_dstBuf									()
2517 {
2518 	disableGLWarmup();
2519 }
2520 
~ReferenceMemcpyCase(void)2521 ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
2522 {
2523 }
2524 
init(void)2525 void ReferenceMemcpyCase::init (void)
2526 {
2527 	// Describe what the test tries to do
2528 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2529 
2530 	m_dstBuf.resize(m_bufferSizeMax, 0x00);
2531 
2532 	BasicUploadCase<SingleOperationDuration>::init();
2533 }
2534 
deinit(void)2535 void ReferenceMemcpyCase::deinit (void)
2536 {
2537 	m_dstBuf.clear();
2538 	BasicUploadCase<SingleOperationDuration>::deinit();
2539 }
2540 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2541 void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2542 {
2543 	// write
2544 	result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2545 	result.duration.fitResponseDuration = result.duration.totalDuration;
2546 
2547 	result.writtenSize = bufferSize;
2548 }
2549 
2550 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2551 {
2552 public:
2553 				BufferDataUploadCase	(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
2554 				~BufferDataUploadCase	(void);
2555 
2556 	void		init					(void);
2557 private:
2558 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2559 };
2560 
BufferDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,CaseType caseType)2561 BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
2562 	: BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
2563 {
2564 }
2565 
~BufferDataUploadCase(void)2566 BufferDataUploadCase::~BufferDataUploadCase (void)
2567 {
2568 }
2569 
init(void)2570 void BufferDataUploadCase::init (void)
2571 {
2572 	// Describe what the test tries to do
2573 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2574 
2575 	BasicUploadCase<SingleOperationDuration>::init();
2576 }
2577 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2578 void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2579 {
2580 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2581 
2582 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2583 
2584 	// upload
2585 	{
2586 		deUint64 startTime;
2587 		deUint64 endTime;
2588 
2589 		startTime = deGetMicroseconds();
2590 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2591 		endTime = deGetMicroseconds();
2592 
2593 		result.duration.totalDuration = endTime - startTime;
2594 		result.duration.fitResponseDuration = result.duration.totalDuration;
2595 		result.writtenSize = bufferSize;
2596 	}
2597 }
2598 
2599 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2600 {
2601 public:
2602 	enum Flags
2603 	{
2604 		FLAG_FULL_UPLOAD			= 0x01,
2605 		FLAG_PARTIAL_UPLOAD			= 0x02,
2606 		FLAG_INVALIDATE_BEFORE_USE	= 0x04,
2607 	};
2608 
2609 				BufferSubDataUploadCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
2610 				~BufferSubDataUploadCase	(void);
2611 
2612 	void		init						(void);
2613 private:
2614 	void		testBufferUpload			(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2615 
2616 	const bool	m_fullUpload;
2617 	const bool	m_invalidateBeforeUse;
2618 };
2619 
BufferSubDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,CaseType parentCase,int flags)2620 BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
2621 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
2622 	, m_fullUpload								((flags & FLAG_FULL_UPLOAD) != 0)
2623 	, m_invalidateBeforeUse						((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
2624 {
2625 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
2626 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
2627 }
2628 
~BufferSubDataUploadCase(void)2629 BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
2630 {
2631 }
2632 
init(void)2633 void BufferSubDataUploadCase::init (void)
2634 {
2635 	// Describe what the test tries to do
2636 	m_testCtx.getLog()
2637 		<< tcu::TestLog::Message
2638 		<< "Testing glBufferSubData() function call performance. "
2639 		<< ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
2640 		<< ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
2641 		<< tcu::TestLog::EndMessage;
2642 
2643 	BasicUploadCase<SingleOperationDuration>::init();
2644 }
2645 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2646 void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2647 {
2648 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2649 
2650 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2651 
2652 	// "invalidate", upload null
2653 	if (m_invalidateBeforeUse)
2654 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2655 
2656 	// upload
2657 	{
2658 		deUint64 startTime;
2659 		deUint64 endTime;
2660 
2661 		startTime = deGetMicroseconds();
2662 
2663 		if (m_fullUpload)
2664 			gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
2665 		else
2666 		{
2667 			// upload to buffer center
2668 			gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
2669 		}
2670 
2671 		endTime = deGetMicroseconds();
2672 
2673 		result.duration.totalDuration = endTime - startTime;
2674 		result.duration.fitResponseDuration = result.duration.totalDuration;
2675 
2676 		if (m_fullUpload)
2677 			result.writtenSize = bufferSize;
2678 		else
2679 			result.writtenSize = bufferSize / 2;
2680 	}
2681 }
2682 
2683 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
2684 {
2685 public:
2686 	enum Flags
2687 	{
2688 		FLAG_PARTIAL						= 0x01,
2689 		FLAG_MANUAL_INVALIDATION			= 0x02,
2690 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2691 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2692 	};
2693 
2694 					MapBufferRangeCase			(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2695 					~MapBufferRangeCase			(void);
2696 
2697 	void			init						(void);
2698 private:
2699 	static CaseType getBaseCaseType				(int caseFlags);
2700 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2701 
2702 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2703 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2704 
2705 	const bool		m_manualInvalidation;
2706 	const bool		m_fullUpload;
2707 	const bool		m_useUnusedUnspecifiedBuffer;
2708 	const bool		m_useUnusedSpecifiedBuffer;
2709 	const deUint32	m_mapFlags;
2710 	int				m_unmapFailures;
2711 };
2712 
MapBufferRangeCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,deUint32 mapFlags,int caseFlags)2713 MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2714 	: BasicUploadCase<MapBufferRangeDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2715 	, m_manualInvalidation						((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
2716 	, m_fullUpload								((caseFlags&FLAG_PARTIAL) == 0)
2717 	, m_useUnusedUnspecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2718 	, m_useUnusedSpecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2719 	, m_mapFlags								(mapFlags)
2720 	, m_unmapFailures							(0)
2721 {
2722 	DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
2723 	DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
2724 }
2725 
~MapBufferRangeCase(void)2726 MapBufferRangeCase::~MapBufferRangeCase (void)
2727 {
2728 }
2729 
init(void)2730 void MapBufferRangeCase::init (void)
2731 {
2732 	// Describe what the test tries to do
2733 	m_testCtx.getLog()
2734 		<< tcu::TestLog::Message
2735 		<< "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
2736 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2737 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2738 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2739 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2740 		<< ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
2741 		<< "Map bits:\n"
2742 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2743 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2744 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2745 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2746 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2747 		<< tcu::TestLog::EndMessage;
2748 
2749 	BasicUploadCase<MapBufferRangeDuration>::init();
2750 }
2751 
getBaseCaseType(int caseFlags)2752 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
2753 {
2754 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2755 		return CASE_USED_BUFFER;
2756 	else
2757 		return CASE_NEW_BUFFER;
2758 }
2759 
getBaseFlags(deUint32 mapFlags,int caseFlags)2760 int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2761 {
2762 	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2763 
2764 	// If buffer contains unspecified data when it is sourced (i.e drawn)
2765 	// results are undefined, and system errors may occur. Signal parent
2766 	// class to take this into account
2767 	if (caseFlags & FLAG_PARTIAL)
2768 	{
2769 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2770 			(caseFlags & FLAG_MANUAL_INVALIDATION) != 0				||
2771 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2772 		{
2773 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2774 		}
2775 	}
2776 
2777 	return flags;
2778 }
2779 
testBufferUpload(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)2780 void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2781 {
2782 	const int unmapFailureThreshold = 4;
2783 
2784 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
2785 	{
2786 		try
2787 		{
2788 			attemptBufferMap(result, bufferSize);
2789 			return;
2790 		}
2791 		catch (UnmapFailureError&)
2792 		{
2793 		}
2794 	}
2795 
2796 	throw tcu::TestError("Unmapping failures exceeded limit");
2797 }
2798 
attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)2799 void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2800 {
2801 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2802 
2803 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2804 
2805 	if (m_fullUpload)
2806 		result.writtenSize = bufferSize;
2807 	else
2808 		result.writtenSize = bufferSize / 2;
2809 
2810 	// Create unused buffer
2811 
2812 	if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
2813 	{
2814 		deUint64 startTime;
2815 		deUint64 endTime;
2816 
2817 		// "invalidate" or allocate, upload null
2818 		startTime = deGetMicroseconds();
2819 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2820 		endTime = deGetMicroseconds();
2821 
2822 		result.duration.allocDuration = endTime - startTime;
2823 	}
2824 	else if (m_useUnusedSpecifiedBuffer)
2825 	{
2826 		deUint64 startTime;
2827 		deUint64 endTime;
2828 
2829 		// Specify buffer contents
2830 		startTime = deGetMicroseconds();
2831 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2832 		endTime = deGetMicroseconds();
2833 
2834 		result.duration.allocDuration = endTime - startTime;
2835 	}
2836 	else
2837 	{
2838 		// No alloc, no time
2839 		result.duration.allocDuration = 0;
2840 	}
2841 
2842 	// upload
2843 	{
2844 		void* mapPtr;
2845 
2846 		// Map
2847 		{
2848 			deUint64 startTime;
2849 			deUint64 endTime;
2850 
2851 			startTime = deGetMicroseconds();
2852 			if (m_fullUpload)
2853 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
2854 			else
2855 			{
2856 				// upload to buffer center
2857 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
2858 			}
2859 			endTime = deGetMicroseconds();
2860 
2861 			if (!mapPtr)
2862 				throw tcu::Exception("MapBufferRange returned NULL");
2863 
2864 			result.duration.mapDuration = endTime - startTime;
2865 		}
2866 
2867 		// Write
2868 		{
2869 			result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
2870 		}
2871 
2872 		// Unmap
2873 		{
2874 			deUint64		startTime;
2875 			deUint64		endTime;
2876 			glw::GLboolean	unmapSuccessful;
2877 
2878 			startTime = deGetMicroseconds();
2879 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
2880 			endTime = deGetMicroseconds();
2881 
2882 			// if unmapping fails, just try again later
2883 			if (!unmapSuccessful)
2884 				throw UnmapFailureError();
2885 
2886 			result.duration.unmapDuration = endTime - startTime;
2887 		}
2888 
2889 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
2890 		result.duration.fitResponseDuration = result.duration.totalDuration;
2891 	}
2892 }
2893 
2894 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
2895 {
2896 public:
2897 	enum Flags
2898 	{
2899 		FLAG_PARTIAL						= 0x01,
2900 		FLAG_FLUSH_IN_PARTS					= 0x02,
2901 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2902 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2903 		FLAG_FLUSH_PARTIAL					= 0x10,
2904 	};
2905 
2906 					MapBufferRangeFlushCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2907 					~MapBufferRangeFlushCase	(void);
2908 
2909 	void			init						(void);
2910 private:
2911 	static CaseType getBaseCaseType				(int caseFlags);
2912 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2913 
2914 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2915 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2916 
2917 	const bool		m_fullUpload;
2918 	const bool		m_flushInParts;
2919 	const bool		m_flushPartial;
2920 	const bool		m_useUnusedUnspecifiedBuffer;
2921 	const bool		m_useUnusedSpecifiedBuffer;
2922 	const deUint32	m_mapFlags;
2923 	int				m_unmapFailures;
2924 };
2925 
MapBufferRangeFlushCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,deUint32 mapFlags,int caseFlags)2926 MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2927 	: BasicUploadCase<MapBufferRangeFlushDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2928 	, m_fullUpload									((caseFlags&FLAG_PARTIAL) == 0)
2929 	, m_flushInParts								((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
2930 	, m_flushPartial								((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
2931 	, m_useUnusedUnspecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2932 	, m_useUnusedSpecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2933 	, m_mapFlags									(mapFlags)
2934 	, m_unmapFailures								(0)
2935 {
2936 	DE_ASSERT(!(m_flushPartial && m_flushInParts));
2937 	DE_ASSERT(!(m_flushPartial && !m_fullUpload));
2938 }
2939 
~MapBufferRangeFlushCase(void)2940 MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
2941 {
2942 }
2943 
init(void)2944 void MapBufferRangeFlushCase::init (void)
2945 {
2946 	// Describe what the test tries to do
2947 	m_testCtx.getLog()
2948 		<< tcu::TestLog::Message
2949 		<< "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
2950 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2951 		<< ((m_flushInParts) ?
2952 			("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
2953 			(m_flushPartial) ?
2954 				("Half of the buffer range is flushed.") :
2955 				("The whole mapped range is flushed in one flush call.")) << "\n"
2956 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2957 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2958 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2959 		<< "Map bits:\n"
2960 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2961 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2962 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2963 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2964 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2965 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
2966 		<< tcu::TestLog::EndMessage;
2967 
2968 	BasicUploadCase<MapBufferRangeFlushDuration>::init();
2969 }
2970 
getBaseCaseType(int caseFlags)2971 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
2972 {
2973 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2974 		return CASE_USED_BUFFER;
2975 	else
2976 		return CASE_NEW_BUFFER;
2977 }
2978 
getBaseFlags(deUint32 mapFlags,int caseFlags)2979 int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2980 {
2981 	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2982 
2983 	// If buffer contains unspecified data when it is sourced (i.e drawn)
2984 	// results are undefined, and system errors may occur. Signal parent
2985 	// class to take this into account
2986 	if (caseFlags & FLAG_PARTIAL)
2987 	{
2988 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2989 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0	||
2990 			(caseFlags & FLAG_FLUSH_PARTIAL) != 0)
2991 		{
2992 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2993 		}
2994 	}
2995 
2996 	return flags;
2997 }
2998 
testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)2999 void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3000 {
3001 	const int unmapFailureThreshold = 4;
3002 
3003 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3004 	{
3005 		try
3006 		{
3007 			attemptBufferMap(result, bufferSize);
3008 			return;
3009 		}
3010 		catch (UnmapFailureError&)
3011 		{
3012 		}
3013 	}
3014 
3015 	throw tcu::TestError("Unmapping failures exceeded limit");
3016 }
3017 
attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3018 void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3019 {
3020 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
3021 	const int				mappedSize	= (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3022 
3023 	if (m_fullUpload && !m_flushPartial)
3024 		result.writtenSize = bufferSize;
3025 	else
3026 		result.writtenSize = bufferSize / 2;
3027 
3028 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3029 
3030 	// Create unused buffer
3031 
3032 	if (m_useUnusedUnspecifiedBuffer)
3033 	{
3034 		deUint64 startTime;
3035 		deUint64 endTime;
3036 
3037 		// Don't specify contents
3038 		startTime = deGetMicroseconds();
3039 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3040 		endTime = deGetMicroseconds();
3041 
3042 		result.duration.allocDuration = endTime - startTime;
3043 	}
3044 	else if (m_useUnusedSpecifiedBuffer)
3045 	{
3046 		deUint64 startTime;
3047 		deUint64 endTime;
3048 
3049 		// Specify buffer contents
3050 		startTime = deGetMicroseconds();
3051 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3052 		endTime = deGetMicroseconds();
3053 
3054 		result.duration.allocDuration = endTime - startTime;
3055 	}
3056 	else
3057 	{
3058 		// No alloc, no time
3059 		result.duration.allocDuration = 0;
3060 	}
3061 
3062 	// upload
3063 	{
3064 		void* mapPtr;
3065 
3066 		// Map
3067 		{
3068 			deUint64 startTime;
3069 			deUint64 endTime;
3070 
3071 			startTime = deGetMicroseconds();
3072 			if (m_fullUpload)
3073 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3074 			else
3075 			{
3076 				// upload to buffer center
3077 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3078 			}
3079 			endTime = deGetMicroseconds();
3080 
3081 			if (!mapPtr)
3082 				throw tcu::Exception("MapBufferRange returned NULL");
3083 
3084 			result.duration.mapDuration = endTime - startTime;
3085 		}
3086 
3087 		// Write
3088 		{
3089 			if (!m_flushPartial)
3090 				result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3091 			else
3092 				result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3093 		}
3094 
3095 		// Flush
3096 		{
3097 			deUint64	startTime;
3098 			deUint64	endTime;
3099 
3100 			startTime = deGetMicroseconds();
3101 
3102 			if (m_flushPartial)
3103 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
3104 			else if (!m_flushInParts)
3105 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3106 			else
3107 			{
3108 				const int p1 = 0;
3109 				const int p2 = mappedSize / 3;
3110 				const int p3 = mappedSize / 2;
3111 				const int p4 = mappedSize * 2 / 4;
3112 				const int p5 = mappedSize;
3113 
3114 				// flush in mixed order
3115 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,	p3-p2);
3116 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,	p2-p1);
3117 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,	p5-p4);
3118 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,	p4-p3);
3119 			}
3120 
3121 			endTime = deGetMicroseconds();
3122 
3123 			result.duration.flushDuration = endTime - startTime;
3124 		}
3125 
3126 		// Unmap
3127 		{
3128 			deUint64		startTime;
3129 			deUint64		endTime;
3130 			glw::GLboolean	unmapSuccessful;
3131 
3132 			startTime = deGetMicroseconds();
3133 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3134 			endTime = deGetMicroseconds();
3135 
3136 			// if unmapping fails, just try again later
3137 			if (!unmapSuccessful)
3138 				throw UnmapFailureError();
3139 
3140 			result.duration.unmapDuration = endTime - startTime;
3141 		}
3142 
3143 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
3144 		result.duration.fitResponseDuration = result.duration.totalDuration;
3145 	}
3146 }
3147 
3148 template <typename SampleType>
3149 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3150 {
3151 public:
3152 						ModifyAfterBasicCase	(Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
3153 						~ModifyAfterBasicCase	(void);
3154 
3155 	void				init					(void);
3156 	void				deinit					(void);
3157 
3158 protected:
3159 	void				drawBufferRange			(int begin, int end);
3160 
3161 private:
3162 	enum
3163 	{
3164 		NUM_SAMPLES = 20,
3165 	};
3166 
3167 
3168 	bool				runSample				(int iteration, UploadSampleResult<SampleType>& sample);
3169 	bool				prepareAndRunTest		(int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
3170 	void				logAndSetTestResult		(const std::vector<UploadSampleResult<SampleType> >& results);
3171 
3172 	virtual void		testWithBufferSize		(UploadSampleResult<SampleType>& result, int bufferSize) = 0;
3173 
3174 	int					m_unmappingErrors;
3175 
3176 protected:
3177 	const bool			m_bufferUnspecifiedAfterTest;
3178 	const deUint32		m_bufferUsage;
3179 	std::vector<deUint8> m_zeroData;
3180 
3181 	using BasicBufferCase<SampleType>::m_testCtx;
3182 	using BasicBufferCase<SampleType>::m_context;
3183 
3184 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
3185 	using BasicBufferCase<SampleType>::m_dummyProgram;
3186 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
3187 	using BasicBufferCase<SampleType>::m_bufferID;
3188 	using BasicBufferCase<SampleType>::m_numSamples;
3189 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
3190 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
3191 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3192 };
3193 
3194 template <typename SampleType>
ModifyAfterBasicCase(Context & context,const char * name,const char * description,int bufferSizeMin,int bufferSizeMax,deUint32 usage,bool bufferUnspecifiedAfterTest)3195 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
3196 	: BasicBufferCase<SampleType>	(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3197 	, m_unmappingErrors				(0)
3198 	, m_bufferUnspecifiedAfterTest	(bufferUnspecifiedAfterTest)
3199 	, m_bufferUsage					(usage)
3200 	, m_zeroData					()
3201 {
3202 }
3203 
3204 template <typename SampleType>
~ModifyAfterBasicCase(void)3205 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
3206 {
3207 	BasicBufferCase<SampleType>::deinit();
3208 }
3209 
3210 template <typename SampleType>
init(void)3211 void ModifyAfterBasicCase<SampleType>::init (void)
3212 {
3213 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3214 
3215 	// init parent
3216 
3217 	BasicBufferCase<SampleType>::init();
3218 
3219 	// upload source
3220 	m_zeroData.resize(m_bufferSizeMax, 0x00);
3221 
3222 	// log basic info
3223 
3224 	m_testCtx.getLog()
3225 		<< tcu::TestLog::Message
3226 		<< "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
3227 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
3228 		<< tcu::TestLog::EndMessage;
3229 
3230 	// log which transfer rate is the test result and buffer info
3231 
3232 	m_testCtx.getLog()
3233 		<< tcu::TestLog::Message
3234 		<< "Test result is the median transfer rate of the test samples.\n"
3235 		<< "Buffer usage = " << glu::getUsageName(m_bufferUsage)
3236 		<< tcu::TestLog::EndMessage;
3237 
3238 	// Set state for drawing so that we don't have to change these during the iteration
3239 	{
3240 		gl.useProgram(m_dummyProgram->getProgram());
3241 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
3242 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
3243 	}
3244 }
3245 
3246 template <typename SampleType>
deinit(void)3247 void ModifyAfterBasicCase<SampleType>::deinit (void)
3248 {
3249 	m_zeroData.clear();
3250 
3251 	BasicBufferCase<SampleType>::deinit();
3252 }
3253 
3254 template <typename SampleType>
drawBufferRange(int begin,int end)3255 void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
3256 {
3257 	DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3258 	DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3259 
3260 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3261 
3262 	// use given range
3263 	gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3264 	gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3265 }
3266 
3267 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)3268 bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
3269 {
3270 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
3271 	const int				bufferSize			= sample.bufferSize;
3272 	bool					testOk;
3273 
3274 	testOk = prepareAndRunTest(iteration, sample, bufferSize);
3275 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3276 
3277 	if (!testOk)
3278 	{
3279 		const int unmapFailureThreshold = 4;
3280 
3281 		// only unmapping error can cause iteration failure
3282 		if (++m_unmappingErrors >= unmapFailureThreshold)
3283 			throw tcu::TestError("Too many unmapping errors, cannot continue.");
3284 
3285 		// just try again
3286 		return false;
3287 	}
3288 
3289 	return true;
3290 }
3291 
3292 template <typename SampleType>
prepareAndRunTest(int iteration,UploadSampleResult<SampleType> & result,int bufferSize)3293 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
3294 {
3295 	DE_UNREF(iteration);
3296 
3297 	DE_ASSERT(!m_bufferID);
3298 	DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4
3299 
3300 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
3301 	bool						testRunOk		= true;
3302 	bool						unmappingFailed	= false;
3303 
3304 	// Upload initial buffer to the GPU...
3305 	gl.genBuffers(1, &m_bufferID);
3306 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3307 	gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3308 
3309 	// ...use it...
3310 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3311 	drawBufferRange(0, bufferSize);
3312 
3313 	// ..and make sure it is uploaded
3314 	BasicBufferCase<SampleType>::waitGLResults();
3315 
3316 	// warmup CPU before the test to make sure the power management governor
3317 	// keeps us in the "high performance" mode
3318 	{
3319 		deYield();
3320 		tcu::warmupCPU();
3321 		deYield();
3322 	}
3323 
3324 	// test
3325 	try
3326 	{
3327 		// buffer is uploaded to the GPU. Draw from it.
3328 		drawBufferRange(0, bufferSize);
3329 
3330 		// and test upload
3331 		testWithBufferSize(result, bufferSize);
3332 	}
3333 	catch (UnmapFailureError&)
3334 	{
3335 		testRunOk = false;
3336 		unmappingFailed = true;
3337 	}
3338 
3339 	// clean up: make sure buffer is not in upload queue and delete it
3340 
3341 	// sourcing unspecified data causes undefined results, possibly program termination
3342 	if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3343 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3344 
3345 	drawBufferRange(0, bufferSize);
3346 	BasicBufferCase<SampleType>::waitGLResults();
3347 
3348 	gl.deleteBuffers(1, &m_bufferID);
3349 	m_bufferID = 0;
3350 
3351 	return testRunOk;
3352 }
3353 
3354 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)3355 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
3356 {
3357 	const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3358 
3359 	// Return median transfer rate of the samples
3360 
3361 	if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3362 	{
3363 		// sample times are 1) invalid or 2) timer resolution too low
3364 		// report speed 0 bytes / s since real value cannot be determined
3365 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3366 	}
3367 	else
3368 	{
3369 		// report transfer rate in MB / s
3370 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3371 	}
3372 }
3373 
3374 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3375 {
3376 public:
3377 
3378 	enum CaseFlags
3379 	{
3380 		FLAG_RESPECIFY_SIZE		= 0x1,
3381 		FLAG_UPLOAD_REPEATED	= 0x2,
3382 	};
3383 
3384 					ModifyAfterWithBufferDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3385 					~ModifyAfterWithBufferDataCase	(void);
3386 
3387 	void			init							(void);
3388 	void			deinit							(void);
3389 private:
3390 	void			testWithBufferSize				(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3391 
3392 	enum
3393 	{
3394 		NUM_REPEATS = 2
3395 	};
3396 
3397 	const bool		m_respecifySize;
3398 	const bool		m_repeatedUpload;
3399 	const float		m_sizeDifferenceFactor;
3400 };
3401 
ModifyAfterWithBufferDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags)3402 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3403 	: ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3404 	, m_respecifySize								((flags & FLAG_RESPECIFY_SIZE) != 0)
3405 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3406 	, m_sizeDifferenceFactor						(1.3f)
3407 {
3408 	DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3409 }
3410 
~ModifyAfterWithBufferDataCase(void)3411 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
3412 {
3413 	deinit();
3414 }
3415 
init(void)3416 void ModifyAfterWithBufferDataCase::init (void)
3417 {
3418 	// Log the purpose of the test
3419 
3420 	if (m_repeatedUpload)
3421 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3422 	else
3423 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3424 
3425 	m_testCtx.getLog()
3426 		<< tcu::TestLog::Message
3427 		<< ((m_respecifySize) ?
3428 			("Buffer size is increased and contents are modified with BufferData().\n") :
3429 			("Buffer contents are modified with BufferData().\n"))
3430 		<< tcu::TestLog::EndMessage;
3431 
3432 	// init parent
3433 	ModifyAfterBasicCase<SingleOperationDuration>::init();
3434 
3435 	// make sure our zeroBuffer is large enough
3436 	if (m_respecifySize)
3437 	{
3438 		const int largerBufferSize = deAlign32((int)(m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
3439 		m_zeroData.resize(largerBufferSize, 0x00);
3440 	}
3441 }
3442 
deinit(void)3443 void ModifyAfterWithBufferDataCase::deinit (void)
3444 {
3445 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3446 }
3447 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3448 void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3449 {
3450 	// always draw the same amount to make compares between cases sensible
3451 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3452 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3453 
3454 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3455 	const int					largerBufferSize	= deAlign32((int)(bufferSize * m_sizeDifferenceFactor), 4*4);
3456 	const int					newBufferSize		= (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3457 	deUint64					startTime;
3458 	deUint64					endTime;
3459 
3460 	// repeat upload-draw
3461 	if (m_repeatedUpload)
3462 	{
3463 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3464 		{
3465 			gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3466 			drawBufferRange(drawStart, drawEnd);
3467 		}
3468 	}
3469 
3470 	// test upload
3471 	startTime = deGetMicroseconds();
3472 	gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3473 	endTime = deGetMicroseconds();
3474 
3475 	result.duration.totalDuration = endTime - startTime;
3476 	result.duration.fitResponseDuration = result.duration.totalDuration;
3477 	result.writtenSize = newBufferSize;
3478 }
3479 
3480 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3481 {
3482 public:
3483 
3484 	enum CaseFlags
3485 	{
3486 		FLAG_PARTIAL			= 0x1,
3487 		FLAG_UPLOAD_REPEATED	= 0x2,
3488 	};
3489 
3490 					ModifyAfterWithBufferSubDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3491 					~ModifyAfterWithBufferSubDataCase	(void);
3492 
3493 	void			init								(void);
3494 	void			deinit								(void);
3495 private:
3496 	void			testWithBufferSize					(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3497 
3498 	enum
3499 	{
3500 		NUM_REPEATS = 2
3501 	};
3502 
3503 	const bool		m_partialUpload;
3504 	const bool		m_repeatedUpload;
3505 };
3506 
ModifyAfterWithBufferSubDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags)3507 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3508 	: ModifyAfterBasicCase<SingleOperationDuration>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3509 	, m_partialUpload								((flags & FLAG_PARTIAL) != 0)
3510 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3511 {
3512 }
3513 
~ModifyAfterWithBufferSubDataCase(void)3514 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
3515 {
3516 	deinit();
3517 }
3518 
init(void)3519 void ModifyAfterWithBufferSubDataCase::init (void)
3520 {
3521 	// Log the purpose of the test
3522 
3523 	if (m_repeatedUpload)
3524 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3525 	else
3526 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3527 
3528 	m_testCtx.getLog()
3529 		<< tcu::TestLog::Message
3530 		<< ((m_partialUpload) ?
3531 			("Half of the buffer contents are modified.\n") :
3532 			("Buffer contents are fully respecified.\n"))
3533 		<< tcu::TestLog::EndMessage;
3534 
3535 	ModifyAfterBasicCase<SingleOperationDuration>::init();
3536 }
3537 
deinit(void)3538 void ModifyAfterWithBufferSubDataCase::deinit (void)
3539 {
3540 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3541 }
3542 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3543 void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3544 {
3545 	// always draw the same amount to make compares between cases sensible
3546 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3547 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3548 
3549 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3550 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3551 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3552 	deUint64					startTime;
3553 	deUint64					endTime;
3554 
3555 	// make upload-draw stream
3556 	if (m_repeatedUpload)
3557 	{
3558 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3559 		{
3560 			gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3561 			drawBufferRange(drawStart, drawEnd);
3562 		}
3563 	}
3564 
3565 	// test upload
3566 	startTime = deGetMicroseconds();
3567 	gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3568 	endTime = deGetMicroseconds();
3569 
3570 	result.duration.totalDuration = endTime - startTime;
3571 	result.duration.fitResponseDuration = result.duration.totalDuration;
3572 	result.writtenSize = subdataSize;
3573 }
3574 
3575 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
3576 {
3577 public:
3578 
3579 	enum CaseFlags
3580 	{
3581 		FLAG_PARTIAL = 0x1,
3582 	};
3583 
3584 					ModifyAfterWithMapBufferRangeCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3585 					~ModifyAfterWithMapBufferRangeCase	(void);
3586 
3587 	void			init								(void);
3588 	void			deinit								(void);
3589 private:
3590 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3591 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);
3592 
3593 	const bool		m_partialUpload;
3594 	const deUint32	m_mapFlags;
3595 };
3596 
ModifyAfterWithMapBufferRangeCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags,deUint32 glMapFlags)3597 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3598 	: ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3599 	, m_partialUpload										((flags & FLAG_PARTIAL) != 0)
3600 	, m_mapFlags											(glMapFlags)
3601 {
3602 }
3603 
~ModifyAfterWithMapBufferRangeCase(void)3604 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
3605 {
3606 	deinit();
3607 }
3608 
init(void)3609 void ModifyAfterWithMapBufferRangeCase::init (void)
3610 {
3611 	// Log the purpose of the test
3612 
3613 	m_testCtx.getLog()
3614 		<< tcu::TestLog::Message
3615 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3616 		<< ((m_partialUpload) ?
3617 			("Half of the buffer is mapped.\n") :
3618 			("Whole buffer is mapped.\n"))
3619 		<< "Map bits:\n"
3620 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3621 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3622 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3623 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3624 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3625 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3626 		<< tcu::TestLog::EndMessage;
3627 
3628 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
3629 }
3630 
deinit(void)3631 void ModifyAfterWithMapBufferRangeCase::deinit (void)
3632 {
3633 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
3634 }
3635 
isBufferUnspecifiedAfterUpload(int flags,deUint32 mapFlags)3636 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3637 {
3638 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3639 		return true;
3640 
3641 	return false;
3642 }
3643 
testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> & result,int bufferSize)3644 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
3645 {
3646 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3647 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3648 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3649 	void*						mapPtr;
3650 
3651 	// map
3652 	{
3653 		deUint64 startTime;
3654 		deUint64 endTime;
3655 
3656 		startTime = deGetMicroseconds();
3657 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3658 		endTime = deGetMicroseconds();
3659 
3660 		if (!mapPtr)
3661 			throw tcu::TestError("mapBufferRange returned null");
3662 
3663 		result.duration.mapDuration = endTime - startTime;
3664 	}
3665 
3666 	// write
3667 	{
3668 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3669 	}
3670 
3671 	// unmap
3672 	{
3673 		deUint64		startTime;
3674 		deUint64		endTime;
3675 		glw::GLboolean	unmapSucceeded;
3676 
3677 		startTime = deGetMicroseconds();
3678 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3679 		endTime = deGetMicroseconds();
3680 
3681 		if (unmapSucceeded != GL_TRUE)
3682 			throw UnmapFailureError();
3683 
3684 		result.duration.unmapDuration = endTime - startTime;
3685 	}
3686 
3687 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
3688 	result.duration.fitResponseDuration = result.duration.totalDuration;
3689 	result.writtenSize = subdataSize;
3690 }
3691 
3692 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
3693 {
3694 public:
3695 
3696 	enum CaseFlags
3697 	{
3698 		FLAG_PARTIAL = 0x1,
3699 	};
3700 
3701 					ModifyAfterWithMapBufferFlushCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3702 					~ModifyAfterWithMapBufferFlushCase	(void);
3703 
3704 	void			init								(void);
3705 	void			deinit								(void);
3706 private:
3707 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3708 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);
3709 
3710 	const bool		m_partialUpload;
3711 	const deUint32	m_mapFlags;
3712 };
3713 
ModifyAfterWithMapBufferFlushCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags,deUint32 glMapFlags)3714 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3715 	: ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3716 	, m_partialUpload											((flags & FLAG_PARTIAL) != 0)
3717 	, m_mapFlags												(glMapFlags)
3718 {
3719 }
3720 
~ModifyAfterWithMapBufferFlushCase(void)3721 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
3722 {
3723 	deinit();
3724 }
3725 
init(void)3726 void ModifyAfterWithMapBufferFlushCase::init (void)
3727 {
3728 	// Log the purpose of the test
3729 
3730 	m_testCtx.getLog()
3731 		<< tcu::TestLog::Message
3732 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3733 		<< ((m_partialUpload) ?
3734 			("Half of the buffer is mapped.\n") :
3735 			("Whole buffer is mapped.\n"))
3736 		<< "Map bits:\n"
3737 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3738 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3739 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3740 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3741 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3742 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3743 		<< tcu::TestLog::EndMessage;
3744 
3745 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
3746 }
3747 
deinit(void)3748 void ModifyAfterWithMapBufferFlushCase::deinit (void)
3749 {
3750 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
3751 }
3752 
isBufferUnspecifiedAfterUpload(int flags,deUint32 mapFlags)3753 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3754 {
3755 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3756 		return true;
3757 
3758 	return false;
3759 }
3760 
testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> & result,int bufferSize)3761 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
3762 {
3763 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3764 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3765 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3766 	void*						mapPtr;
3767 
3768 	// map
3769 	{
3770 		deUint64 startTime;
3771 		deUint64 endTime;
3772 
3773 		startTime = deGetMicroseconds();
3774 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3775 		endTime = deGetMicroseconds();
3776 
3777 		if (!mapPtr)
3778 			throw tcu::TestError("mapBufferRange returned null");
3779 
3780 		result.duration.mapDuration = endTime - startTime;
3781 	}
3782 
3783 	// write
3784 	{
3785 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3786 	}
3787 
3788 	// flush
3789 	{
3790 		deUint64 startTime;
3791 		deUint64 endTime;
3792 
3793 		startTime = deGetMicroseconds();
3794 		gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
3795 		endTime = deGetMicroseconds();
3796 
3797 		result.duration.flushDuration = endTime - startTime;
3798 	}
3799 
3800 	// unmap
3801 	{
3802 		deUint64		startTime;
3803 		deUint64		endTime;
3804 		glw::GLboolean	unmapSucceeded;
3805 
3806 		startTime = deGetMicroseconds();
3807 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3808 		endTime = deGetMicroseconds();
3809 
3810 		if (unmapSucceeded != GL_TRUE)
3811 			throw UnmapFailureError();
3812 
3813 		result.duration.unmapDuration = endTime - startTime;
3814 	}
3815 
3816 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
3817 	result.duration.fitResponseDuration = result.duration.totalDuration;
3818 	result.writtenSize = subdataSize;
3819 }
3820 
3821 enum DrawMethod
3822 {
3823 	DRAWMETHOD_DRAW_ARRAYS = 0,
3824 	DRAWMETHOD_DRAW_ELEMENTS,
3825 
3826 	DRAWMETHOD_LAST
3827 };
3828 
3829 enum TargetBuffer
3830 {
3831 	TARGETBUFFER_VERTEX = 0,
3832 	TARGETBUFFER_INDEX,
3833 
3834 	TARGETBUFFER_LAST
3835 };
3836 
3837 enum BufferState
3838 {
3839 	BUFFERSTATE_NEW = 0,
3840 	BUFFERSTATE_EXISTING,
3841 
3842 	BUFFERSTATE_LAST
3843 };
3844 
3845 enum UploadMethod
3846 {
3847 	UPLOADMETHOD_BUFFER_DATA = 0,
3848 	UPLOADMETHOD_BUFFER_SUB_DATA,
3849 	UPLOADMETHOD_MAP_BUFFER_RANGE,
3850 
3851 	UPLOADMETHOD_LAST
3852 };
3853 
3854 enum UnrelatedBufferType
3855 {
3856 	UNRELATEDBUFFERTYPE_NONE = 0,
3857 	UNRELATEDBUFFERTYPE_VERTEX,
3858 
3859 	UNRELATEDBUFFERTYPE_LAST
3860 };
3861 
3862 enum UploadRange
3863 {
3864 	UPLOADRANGE_FULL = 0,
3865 	UPLOADRANGE_PARTIAL,
3866 
3867 	UPLOADRANGE_LAST
3868 };
3869 
3870 struct LayeredGridSpec
3871 {
3872 	int gridWidth;
3873 	int gridHeight;
3874 	int gridLayers;
3875 };
3876 
getLayeredGridNumVertices(const LayeredGridSpec & scene)3877 static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
3878 {
3879 	return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
3880 }
3881 
generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> & vertexData,const LayeredGridSpec & scene)3882 static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
3883 {
3884 	// interleave color & vertex data
3885 	const tcu::Vec4 green	(0.0f, 1.0f, 0.0f, 0.7f);
3886 	const tcu::Vec4 yellow	(1.0f, 1.0f, 0.0f, 0.8f);
3887 
3888 	vertexData.resize(getLayeredGridNumVertices(scene) * 2);
3889 
3890 	for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
3891 	for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
3892 	for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
3893 	{
3894 		const tcu::Vec4	color		= (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
3895 		const float		cellLeft	= (float(cellX  ) / scene.gridWidth  - 0.5f) * 2.0f;
3896 		const float		cellRight	= (float(cellX+1) / scene.gridWidth  - 0.5f) * 2.0f;
3897 		const float		cellTop		= (float(cellY+1) / scene.gridHeight - 0.5f) * 2.0f;
3898 		const float		cellBottom	= (float(cellY  ) / scene.gridHeight - 0.5f) * 2.0f;
3899 
3900 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
3901 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3902 
3903 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
3904 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
3905 
3906 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
3907 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3908 
3909 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
3910 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3911 
3912 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
3913 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3914 
3915 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
3916 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
3917 	}
3918 }
3919 
generateLayeredGridIndexData(std::vector<deUint32> & indexData,const LayeredGridSpec & scene)3920 static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
3921 {
3922 	indexData.resize(getLayeredGridNumVertices(scene) * 2);
3923 
3924 	for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
3925 		indexData[ndx] = ndx;
3926 }
3927 
3928 class RenderPerformanceTestBase : public TestCase
3929 {
3930 public:
3931 							RenderPerformanceTestBase	(Context& context, const char* name, const char* description);
3932 							~RenderPerformanceTestBase	(void);
3933 
3934 protected:
3935 	void					init						(void);
3936 	void					deinit						(void);
3937 
3938 	void					waitGLResults				(void) const;
3939 	void					setupVertexAttribs			(void) const;
3940 
3941 	enum
3942 	{
3943 		RENDER_AREA_SIZE = 128
3944 	};
3945 
3946 private:
3947 	glu::ShaderProgram*		m_renderProgram;
3948 	int						m_colorLoc;
3949 	int						m_positionLoc;
3950 };
3951 
RenderPerformanceTestBase(Context & context,const char * name,const char * description)3952 RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
3953 	: TestCase			(context, tcu::NODETYPE_PERFORMANCE, name, description)
3954 	, m_renderProgram	(DE_NULL)
3955 	, m_colorLoc		(0)
3956 	, m_positionLoc		(0)
3957 {
3958 }
3959 
~RenderPerformanceTestBase(void)3960 RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
3961 {
3962 	deinit();
3963 }
3964 
init(void)3965 void RenderPerformanceTestBase::init (void)
3966 {
3967 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3968 
3969 	m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
3970 	if (!m_renderProgram->isOk())
3971 	{
3972 		m_testCtx.getLog() << *m_renderProgram;
3973 		throw tcu::TestError("could not build program");
3974 	}
3975 
3976 	m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
3977 	m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
3978 
3979 	if (m_colorLoc == -1)
3980 		throw tcu::TestError("Location of attribute a_color was -1");
3981 	if (m_positionLoc == -1)
3982 		throw tcu::TestError("Location of attribute a_position was -1");
3983 }
3984 
deinit(void)3985 void RenderPerformanceTestBase::deinit (void)
3986 {
3987 	delete m_renderProgram;
3988 	m_renderProgram = DE_NULL;
3989 }
3990 
setupVertexAttribs(void) const3991 void RenderPerformanceTestBase::setupVertexAttribs (void) const
3992 {
3993 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3994 
3995 	// buffers are bound
3996 
3997 	gl.enableVertexAttribArray(m_colorLoc);
3998 	gl.enableVertexAttribArray(m_positionLoc);
3999 
4000 	gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0);
4001 	gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1);
4002 
4003 	gl.useProgram(m_renderProgram->getProgram());
4004 
4005 	GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4006 }
4007 
waitGLResults(void) const4008 void RenderPerformanceTestBase::waitGLResults (void) const
4009 {
4010 	tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4011 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
4012 }
4013 
4014 template <typename SampleType>
4015 class RenderCase : public RenderPerformanceTestBase
4016 {
4017 public:
4018 									RenderCase						(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4019 									~RenderCase						(void);
4020 
4021 protected:
4022 	void							init							(void);
4023 	void							deinit							(void);
4024 
4025 private:
4026 	IterateResult					iterate							(void);
4027 
4028 protected:
4029 	struct SampleResult
4030 	{
4031 		LayeredGridSpec					scene;
4032 		RenderSampleResult<SampleType>	result;
4033 	};
4034 
4035 	int								getMinWorkloadSize				(void) const;
4036 	int								getMaxWorkloadSize				(void) const;
4037 	int								getMinWorkloadDataSize			(void) const;
4038 	int								getMaxWorkloadDataSize			(void) const;
4039 	int								getVertexDataSize				(void) const;
4040 	int								getNumSamples					(void) const;
4041 	void							uploadScene						(const LayeredGridSpec& scene);
4042 
4043 	virtual void					runSample						(SampleResult& sample) = 0;
4044 	virtual void					logAndSetTestResult				(const std::vector<SampleResult>& results);
4045 
4046 	void							mapResultsToRenderRateFormat	(std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;
4047 
4048 	const DrawMethod				m_drawMethod;
4049 
4050 private:
4051 	glw::GLuint						m_attributeBufferID;
4052 	glw::GLuint						m_indexBufferID;
4053 	int								m_iterationNdx;
4054 	std::vector<int>				m_iterationOrder;
4055 	std::vector<SampleResult>		m_results;
4056 	int								m_numUnmapFailures;
4057 };
4058 
4059 template <typename SampleType>
RenderCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4060 RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4061 	: RenderPerformanceTestBase	(context, name, description)
4062 	, m_drawMethod				(drawMethod)
4063 	, m_attributeBufferID		(0)
4064 	, m_indexBufferID			(0)
4065 	, m_iterationNdx			(0)
4066 	, m_numUnmapFailures		(0)
4067 {
4068 	DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4069 }
4070 
4071 template <typename SampleType>
~RenderCase(void)4072 RenderCase<SampleType>::~RenderCase (void)
4073 {
4074 	deinit();
4075 }
4076 
4077 template <typename SampleType>
init(void)4078 void RenderCase<SampleType>::init (void)
4079 {
4080 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4081 
4082 	RenderPerformanceTestBase::init();
4083 
4084 	// requirements
4085 
4086 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4087 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4088 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
4089 
4090 	// gl state
4091 
4092 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4093 
4094 	// enable bleding to prevent grid layers from being discarded
4095 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4096 	gl.blendEquation(GL_FUNC_ADD);
4097 	gl.enable(GL_BLEND);
4098 
4099 	// generate iterations
4100 
4101 	{
4102 		const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };
4103 
4104 		for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4105 		{
4106 			m_results.push_back(SampleResult());
4107 
4108 			m_results.back().scene.gridHeight = gridSizes[gridNdx];
4109 			m_results.back().scene.gridWidth = gridSizes[gridNdx];
4110 			m_results.back().scene.gridLayers = 5;
4111 
4112 			m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4113 
4114 			// test cases set these, initialize to dummy values
4115 			m_results.back().result.renderDataSize = -1;
4116 			m_results.back().result.uploadedDataSize = -1;
4117 			m_results.back().result.unrelatedDataSize = -1;
4118 		}
4119 	}
4120 
4121 	// randomize iteration order
4122 	{
4123 		m_iterationOrder.resize(m_results.size());
4124 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4125 	}
4126 }
4127 
4128 template <typename SampleType>
deinit(void)4129 void RenderCase<SampleType>::deinit (void)
4130 {
4131 	RenderPerformanceTestBase::deinit();
4132 
4133 	if (m_attributeBufferID)
4134 	{
4135 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4136 		m_attributeBufferID = 0;
4137 	}
4138 
4139 	if (m_indexBufferID)
4140 	{
4141 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4142 		m_indexBufferID = 0;
4143 	}
4144 }
4145 
4146 template <typename SampleType>
iterate(void)4147 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
4148 {
4149 	const int		unmapFailureThreshold	= 3;
4150 	const int		currentIteration		= m_iterationNdx;
4151 	const int		currentConfigNdx		= m_iterationOrder[currentIteration];
4152 	SampleResult&	currentSample			= m_results[currentConfigNdx];
4153 
4154 	try
4155 	{
4156 		runSample(currentSample);
4157 		++m_iterationNdx;
4158 	}
4159 	catch (const UnmapFailureError& ex)
4160 	{
4161 		DE_UNREF(ex);
4162 		++m_numUnmapFailures;
4163 	}
4164 
4165 	if (m_numUnmapFailures > unmapFailureThreshold)
4166 		throw tcu::TestError("Got too many unmap errors");
4167 
4168 	if (m_iterationNdx < (int)m_iterationOrder.size())
4169 		return CONTINUE;
4170 
4171 	logAndSetTestResult(m_results);
4172 	return STOP;
4173 }
4174 
4175 template <typename SampleType>
getMinWorkloadSize(void) const4176 int RenderCase<SampleType>::getMinWorkloadSize (void) const
4177 {
4178 	int result = getLayeredGridNumVertices(m_results[0].scene);
4179 
4180 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4181 	{
4182 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4183 		result = de::min(result, workloadSize);
4184 	}
4185 
4186 	return result;
4187 }
4188 
4189 template <typename SampleType>
getMaxWorkloadSize(void) const4190 int RenderCase<SampleType>::getMaxWorkloadSize (void) const
4191 {
4192 	int result = getLayeredGridNumVertices(m_results[0].scene);
4193 
4194 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4195 	{
4196 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4197 		result = de::max(result, workloadSize);
4198 	}
4199 
4200 	return result;
4201 }
4202 
4203 template <typename SampleType>
getMinWorkloadDataSize(void) const4204 int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
4205 {
4206 	return getMinWorkloadSize() * getVertexDataSize();
4207 }
4208 
4209 template <typename SampleType>
getMaxWorkloadDataSize(void) const4210 int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
4211 {
4212 	return getMaxWorkloadSize() * getVertexDataSize();
4213 }
4214 
4215 template <typename SampleType>
getVertexDataSize(void) const4216 int RenderCase<SampleType>::getVertexDataSize (void) const
4217 {
4218 	const int numVectors	= 2;
4219 	const int vec4Size		= 4 * sizeof(float);
4220 
4221 	return numVectors * vec4Size;
4222 }
4223 
4224 template <typename SampleType>
getNumSamples(void) const4225 int RenderCase<SampleType>::getNumSamples (void) const
4226 {
4227 	return (int)m_results.size();
4228 }
4229 
4230 template <typename SampleType>
uploadScene(const LayeredGridSpec & scene)4231 void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
4232 {
4233 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4234 
4235 	// vertex buffer
4236 	{
4237 		std::vector<tcu::Vec4> vertexData;
4238 
4239 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4240 
4241 		if (m_attributeBufferID == 0)
4242 			gl.genBuffers(1, &m_attributeBufferID);
4243 		gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4244 		gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4245 	}
4246 
4247 	// index buffer
4248 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4249 	{
4250 		std::vector<deUint32> indexData;
4251 
4252 		generateLayeredGridIndexData(indexData, scene);
4253 
4254 		if (m_indexBufferID == 0)
4255 			gl.genBuffers(1, &m_indexBufferID);
4256 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4257 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4258 	}
4259 
4260 	GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4261 }
4262 
4263 template <typename SampleType>
logAndSetTestResult(const std::vector<SampleResult> & results)4264 void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
4265 {
4266 	std::vector<RenderSampleResult<SampleType> > mappedResults;
4267 
4268 	mapResultsToRenderRateFormat(mappedResults, results);
4269 
4270 	{
4271 		const RenderSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4272 		const float						rate		= analysis.renderRateAtRange;
4273 
4274 		if (rate == std::numeric_limits<float>::infinity())
4275 		{
4276 			// sample times are 1) invalid or 2) timer resolution too low
4277 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4278 		}
4279 		else
4280 		{
4281 			// report transfer rate in millions of MiB/s
4282 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4283 		}
4284 	}
4285 }
4286 
4287 template <typename SampleType>
mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> & dst,const std::vector<SampleResult> & src) const4288 void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
4289 {
4290 	dst.resize(src.size());
4291 
4292 	for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4293 		dst[ndx] = src[ndx].result;
4294 }
4295 
4296 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4297 {
4298 public:
4299 			ReferenceRenderTimeCase		(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4300 
4301 private:
4302 	void	init						(void);
4303 	void	runSample					(SampleResult& sample);
4304 };
4305 
ReferenceRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4306 ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4307 	: RenderCase<RenderReadDuration>	(context, name, description, drawMethod)
4308 {
4309 }
4310 
init(void)4311 void ReferenceRenderTimeCase::init (void)
4312 {
4313 	const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4314 
4315 	// init parent
4316 	RenderCase<RenderReadDuration>::init();
4317 
4318 	// log
4319 	m_testCtx.getLog()
4320 		<< tcu::TestLog::Message
4321 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4322 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4323 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4324 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4325 		<< "Workload sizes are in the range ["
4326 			<< getMinWorkloadSize() << ",  "
4327 			<< getMaxWorkloadSize() << "] vertices (["
4328 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4329 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4330 		<< "Test result is the approximated total processing rate in MiB / s.\n"
4331 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4332 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4333 		<< tcu::TestLog::EndMessage;
4334 }
4335 
runSample(SampleResult & sample)4336 void ReferenceRenderTimeCase::runSample (SampleResult& sample)
4337 {
4338 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4339 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4340 	const int				numVertices		= getLayeredGridNumVertices(sample.scene);
4341 	const glu::Buffer		arrayBuffer		(m_context.getRenderContext());
4342 	const glu::Buffer		indexBuffer		(m_context.getRenderContext());
4343 	std::vector<tcu::Vec4>	vertexData;
4344 	std::vector<deUint32>	indexData;
4345 	deUint64				startTime;
4346 	deUint64				endTime;
4347 
4348 	// generate and upload buffers
4349 
4350 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4351 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4352 	gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4353 
4354 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4355 	{
4356 		generateLayeredGridIndexData(indexData, sample.scene);
4357 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4358 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4359 	}
4360 
4361 	setupVertexAttribs();
4362 
4363 	// make sure data is uploaded
4364 
4365 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4366 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4367 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4368 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4369 	else
4370 		DE_ASSERT(false);
4371 	waitGLResults();
4372 
4373 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4374 	gl.clear(GL_COLOR_BUFFER_BIT);
4375 	waitGLResults();
4376 
4377 	tcu::warmupCPU();
4378 
4379 	// Measure both draw and associated readpixels
4380 	{
4381 		startTime = deGetMicroseconds();
4382 
4383 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4384 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4385 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4386 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4387 		else
4388 			DE_ASSERT(false);
4389 
4390 		endTime = deGetMicroseconds();
4391 
4392 		sample.result.duration.renderDuration = endTime - startTime;
4393 	}
4394 
4395 	{
4396 		startTime = deGetMicroseconds();
4397 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4398 		endTime = deGetMicroseconds();
4399 
4400 		sample.result.duration.readDuration = endTime - startTime;
4401 	}
4402 
4403 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4404 	sample.result.uploadedDataSize = 0;
4405 	sample.result.unrelatedDataSize = 0;
4406 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4407 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4408 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4409 }
4410 
4411 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4412 {
4413 public:
4414 									UnrelatedUploadRenderTimeCase	(Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);
4415 
4416 private:
4417 	void							init							(void);
4418 	void							runSample						(SampleResult& sample);
4419 
4420 	const UploadMethod				m_unrelatedUploadMethod;
4421 };
4422 
UnrelatedUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,UploadMethod unrelatedUploadMethod)4423 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
4424 	: RenderCase<UnrelatedUploadRenderReadDuration>	(context, name, description, drawMethod)
4425 	, m_unrelatedUploadMethod						(unrelatedUploadMethod)
4426 {
4427 	DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4428 }
4429 
init(void)4430 void UnrelatedUploadRenderTimeCase::init (void)
4431 {
4432 	const char* const	targetFunctionName	= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4433 	tcu::MessageBuilder	message				(&m_testCtx.getLog());
4434 
4435 	// init parent
4436 	RenderCase<UnrelatedUploadRenderReadDuration>::init();
4437 
4438 	// log
4439 
4440 	message
4441 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4442 		<< "Uploading an unrelated buffer just before issuing the rendering command with "
4443 			<< ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")		:
4444 				(m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")		:
4445 				(m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange")	:
4446 				((const char*)DE_NULL))
4447 			<< ".\n"
4448 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4449 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4450 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4451 		<< "Workload sizes are in the range ["
4452 			<< getMinWorkloadSize() << ",  "
4453 			<< getMaxWorkloadSize() << "] vertices (["
4454 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4455 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4456 		<< "Unrelated upload sizes are in the range ["
4457 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4458 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4459 		<< "Test result is the approximated total processing rate in MiB / s.\n"
4460 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4461 		<< "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4462 		<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
4463 		<< tcu::TestLog::EndMessage;
4464 }
4465 
runSample(SampleResult & sample)4466 void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
4467 {
4468 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4469 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4470 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4471 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4472 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4473 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4474 	int						unrelatedUploadSize	= -1;
4475 	int						renderUploadSize;
4476 	std::vector<tcu::Vec4>	vertexData;
4477 	std::vector<deUint32>	indexData;
4478 	deUint64				startTime;
4479 	deUint64				endTime;
4480 
4481 	// generate and upload buffers
4482 
4483 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4484 	renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4485 
4486 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4487 	gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4488 
4489 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4490 	{
4491 		generateLayeredGridIndexData(indexData, sample.scene);
4492 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4493 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4494 	}
4495 
4496 	setupVertexAttribs();
4497 
4498 	// make sure data is uploaded
4499 
4500 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4501 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4502 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4503 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4504 	else
4505 		DE_ASSERT(false);
4506 	waitGLResults();
4507 
4508 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4509 	gl.clear(GL_COLOR_BUFFER_BIT);
4510 	waitGLResults();
4511 
4512 	tcu::warmupCPU();
4513 
4514 	// Unrelated upload
4515 	if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4516 	{
4517 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4518 
4519 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4520 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4521 	}
4522 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4523 	{
4524 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4525 
4526 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4527 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4528 		gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
4529 	}
4530 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4531 	{
4532 		void*			mapPtr;
4533 		glw::GLboolean	unmapSuccessful;
4534 
4535 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4536 
4537 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4538 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4539 
4540 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4541 		if (!mapPtr)
4542 			throw tcu::Exception("MapBufferRange returned NULL");
4543 
4544 		deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
4545 
4546 		// if unmapping fails, just try again later
4547 		unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
4548 		if (!unmapSuccessful)
4549 			throw UnmapFailureError();
4550 	}
4551 	else
4552 		DE_ASSERT(false);
4553 
4554 	DE_ASSERT(unrelatedUploadSize != -1);
4555 
4556 	// Measure both draw and associated readpixels
4557 	{
4558 		startTime = deGetMicroseconds();
4559 
4560 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4561 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4562 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4563 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4564 		else
4565 			DE_ASSERT(false);
4566 
4567 		endTime = deGetMicroseconds();
4568 
4569 		sample.result.duration.renderDuration = endTime - startTime;
4570 	}
4571 
4572 	{
4573 		startTime = deGetMicroseconds();
4574 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4575 		endTime = deGetMicroseconds();
4576 
4577 		sample.result.duration.readDuration = endTime - startTime;
4578 	}
4579 
4580 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4581 	sample.result.uploadedDataSize = renderUploadSize;
4582 	sample.result.unrelatedDataSize = unrelatedUploadSize;
4583 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4584 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4585 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4586 }
4587 
4588 class ReferenceReadPixelsTimeCase : public TestCase
4589 {
4590 public:
4591 					ReferenceReadPixelsTimeCase		(Context& context, const char* name, const char* description);
4592 
4593 private:
4594 	void			init							(void);
4595 	IterateResult	iterate							(void);
4596 	void			logAndSetTestResult				(void);
4597 
4598 	enum
4599 	{
4600 		RENDER_AREA_SIZE = 128
4601 	};
4602 
4603 	const int			m_numSamples;
4604 	int					m_sampleNdx;
4605 	std::vector<int>	m_samples;
4606 };
4607 
ReferenceReadPixelsTimeCase(Context & context,const char * name,const char * description)4608 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
4609 	: TestCase		(context, tcu::NODETYPE_PERFORMANCE, name, description)
4610 	, m_numSamples	(20)
4611 	, m_sampleNdx	(0)
4612 	, m_samples		(m_numSamples)
4613 {
4614 }
4615 
init(void)4616 void ReferenceReadPixelsTimeCase::init (void)
4617 {
4618 	m_testCtx.getLog()
4619 		<< tcu::TestLog::Message
4620 		<< "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
4621 		<< "Test result is the median of the samples in microseconds.\n"
4622 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4623 		<< tcu::TestLog::EndMessage;
4624 }
4625 
iterate(void)4626 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
4627 {
4628 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4629 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4630 	deUint64				startTime;
4631 	deUint64				endTime;
4632 
4633 	deYield();
4634 	tcu::warmupCPU();
4635 	deYield();
4636 
4637 	// "Render" something and wait for it
4638 	gl.clearColor(0.0f, 1.0f, m_sampleNdx / float(m_numSamples), 1.0f);
4639 	gl.clear(GL_COLOR_BUFFER_BIT);
4640 
4641 	// wait for results
4642 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4643 
4644 	// measure time used in readPixels
4645 	startTime = deGetMicroseconds();
4646 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4647 	endTime = deGetMicroseconds();
4648 
4649 	m_samples[m_sampleNdx] = (int)(endTime - startTime);
4650 
4651 	if (++m_sampleNdx < m_numSamples)
4652 		return CONTINUE;
4653 
4654 	logAndSetTestResult();
4655 	return STOP;
4656 }
4657 
logAndSetTestResult(void)4658 void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
4659 {
4660 	// Log sample list
4661 	{
4662 		m_testCtx.getLog()
4663 			<< tcu::TestLog::SampleList("Samples", "Samples")
4664 			<< tcu::TestLog::SampleInfo
4665 			<< tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
4666 			<< tcu::TestLog::EndSampleInfo;
4667 
4668 		for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
4669 			m_testCtx.getLog()
4670 				<< tcu::TestLog::Sample
4671 				<< m_samples[sampleNdx]
4672 				<< tcu::TestLog::EndSample;
4673 
4674 		m_testCtx.getLog() << tcu::TestLog::EndSampleList;
4675 	}
4676 
4677 	// Log median
4678 	{
4679 		float median;
4680 		float limit60Low;
4681 		float limit60Up;
4682 
4683 		std::sort(m_samples.begin(), m_samples.end());
4684 		median		= linearSample(m_samples, 0.5f);
4685 		limit60Low	= linearSample(m_samples, 0.2f);
4686 		limit60Up	= linearSample(m_samples, 0.8f);
4687 
4688 		m_testCtx.getLog()
4689 			<< tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
4690 			<< tcu::TestLog::Message
4691 			<< "60 % of samples within range:\n"
4692 			<< tcu::TestLog::EndMessage
4693 			<< tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
4694 			<< tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
4695 
4696 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
4697 	}
4698 }
4699 
4700 template <typename SampleType>
4701 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
4702 {
4703 public:
4704 	typedef typename RenderCase<SampleType>::SampleResult SampleResult;
4705 
4706 							GenericUploadRenderTimeCase	(Context&				context,
4707 														 const char*			name,
4708 														 const char*			description,
4709 														 DrawMethod				method,
4710 														 TargetBuffer			targetBuffer,
4711 														 UploadMethod			uploadMethod,
4712 														 BufferState			bufferState,
4713 														 UploadRange			uploadRange,
4714 														 UnrelatedBufferType	unrelatedBufferType);
4715 
4716 private:
4717 	void						init					(void);
4718 	void						runSample				(SampleResult& sample);
4719 
4720 	using RenderCase<SampleType>::RENDER_AREA_SIZE;
4721 
4722 	const TargetBuffer			m_targetBuffer;
4723 	const BufferState			m_bufferState;
4724 	const UploadMethod			m_uploadMethod;
4725 	const UnrelatedBufferType	m_unrelatedBufferType;
4726 	const UploadRange			m_uploadRange;
4727 
4728 	using RenderCase<SampleType>::m_context;
4729 	using RenderCase<SampleType>::m_testCtx;
4730 	using RenderCase<SampleType>::m_drawMethod;
4731 };
4732 
4733 template <typename SampleType>
GenericUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState,UploadRange uploadRange,UnrelatedBufferType unrelatedBufferType)4734 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&				context,
4735 																	  const char*			name,
4736 																	  const char*			description,
4737 																	  DrawMethod			method,
4738 																	  TargetBuffer			targetBuffer,
4739 																	  UploadMethod			uploadMethod,
4740 																	  BufferState			bufferState,
4741 																	  UploadRange			uploadRange,
4742 																	  UnrelatedBufferType	unrelatedBufferType)
4743 	: RenderCase<SampleType>	(context, name, description, method)
4744 	, m_targetBuffer			(targetBuffer)
4745 	, m_bufferState				(bufferState)
4746 	, m_uploadMethod			(uploadMethod)
4747 	, m_unrelatedBufferType		(unrelatedBufferType)
4748 	, m_uploadRange				(uploadRange)
4749 {
4750 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
4751 	DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
4752 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
4753 	DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
4754 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
4755 }
4756 
4757 template <typename SampleType>
init(void)4758 void GenericUploadRenderTimeCase<SampleType>::init (void)
4759 {
4760 	// init parent
4761 	RenderCase<SampleType>::init();
4762 
4763 	// log
4764 	{
4765 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4766 		const int			perVertexSize			= (m_targetBuffer == TARGETBUFFER_INDEX) ? (sizeof(deUint32)) : (sizeof(tcu::Vec4[2]));
4767 		const int			fullMinUploadSize		= RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
4768 		const int			fullMaxUploadSize		= RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
4769 		const int			minUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
4770 		const int			maxUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
4771 		const int			minUnrelatedUploadSize	= RenderCase<SampleType>::getMinWorkloadSize() * sizeof(tcu::Vec4[2]);
4772 		const int			maxUnrelatedUploadSize	= RenderCase<SampleType>::getMaxWorkloadSize() * sizeof(tcu::Vec4[2]);
4773 
4774 		m_testCtx.getLog()
4775 			<< tcu::TestLog::Message
4776 			<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4777 			<< "The "
4778 				<< ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
4779 				<< " buffer "
4780 				<< ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
4781 				<< "sourced by the rendering command "
4782 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded ") :
4783 					(m_uploadRange == UPLOADRANGE_FULL)		? ("are specified ") :
4784 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("are updated (partial upload) ") :
4785 					((const char*)DE_NULL))
4786 				<< "just before issuing the rendering command.\n"
4787 			<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
4788 			<< "Buffer "
4789 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded") :
4790 					(m_uploadRange == UPLOADRANGE_FULL)		? ("contents are specified") :
4791 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("contents are partially updated") :
4792 					((const char*)DE_NULL))
4793 				<< " with "
4794 				<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
4795 				<< " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
4796 			<< ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
4797 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
4798 			<< RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
4799 			<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4800 			<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4801 			<< "Workload sizes are in the range ["
4802 				<< RenderCase<SampleType>::getMinWorkloadSize() << ",  "
4803 				<< RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
4804 				<< "(["
4805 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
4806 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
4807 			<< "Upload sizes are in the range ["
4808 				<< getHumanReadableByteSize(minUploadSize) << ","
4809 				<< getHumanReadableByteSize(maxUploadSize) << "].\n"
4810 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4811 				("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
4812 				(""))
4813 			<< "Test result is the approximated processing rate in MiB / s.\n"
4814 			<< "Note that while upload time is measured, the time used is not included in the results.\n"
4815 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
4816 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4817 			<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
4818 			<< tcu::TestLog::EndMessage;
4819 	}
4820 }
4821 
4822 template <typename SampleType>
runSample(SampleResult & sample)4823 void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
4824 {
4825 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4826 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4827 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4828 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4829 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4830 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4831 	deUint64				startTime;
4832 	deUint64				endTime;
4833 	std::vector<tcu::Vec4>	vertexData;
4834 	std::vector<deUint32>	indexData;
4835 
4836 	// create data
4837 
4838 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4839 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4840 		generateLayeredGridIndexData(indexData, sample.scene);
4841 
4842 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4843 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4844 	RenderCase<SampleType>::setupVertexAttribs();
4845 
4846 	// target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
4847 
4848 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
4849 	{
4850 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
4851 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4852 	}
4853 	else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
4854 	{
4855 		// do not touch the vertex buffer
4856 	}
4857 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
4858 	{
4859 		// hint that the target buffer will be modified soon
4860 		const glw::GLenum vertexDataUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4861 		const glw::GLenum indexDataUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4862 
4863 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
4864 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
4865 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4866 	}
4867 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
4868 	{
4869 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
4870 		{
4871 			// make the index buffer present on the gpu
4872 			// use another vertex buffer to keep original buffer in unused state
4873 			const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
4874 
4875 			gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
4876 			RenderCase<SampleType>::setupVertexAttribs();
4877 
4878 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4879 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4880 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4881 
4882 			// restore original state
4883 			gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4884 			RenderCase<SampleType>::setupVertexAttribs();
4885 		}
4886 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
4887 		{
4888 			// make the vertex buffer present on the gpu
4889 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4890 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4891 		}
4892 		else
4893 			DE_ASSERT(false);
4894 	}
4895 	else
4896 		DE_ASSERT(false);
4897 
4898 	RenderCase<SampleType>::waitGLResults();
4899 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
4900 
4901 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4902 	gl.clear(GL_COLOR_BUFFER_BIT);
4903 	RenderCase<SampleType>::waitGLResults();
4904 
4905 	tcu::warmupCPU();
4906 
4907 	// upload
4908 
4909 	{
4910 		glw::GLenum		target;
4911 		glw::GLsizeiptr	size;
4912 		glw::GLintptr	offset = 0;
4913 		const void*		source;
4914 
4915 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
4916 		{
4917 			target	= GL_ARRAY_BUFFER;
4918 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
4919 			source	= &vertexData[0];
4920 		}
4921 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
4922 		{
4923 			target	= GL_ELEMENT_ARRAY_BUFFER;
4924 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
4925 			source	= &indexData[0];
4926 		}
4927 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4928 		{
4929 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4930 
4931 			target	= GL_ARRAY_BUFFER;
4932 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
4933 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4934 			source	= (const deUint8*)&vertexData[0] + offset;
4935 		}
4936 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4937 		{
4938 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4939 
4940 			// upload to 25% - 75% range
4941 			target	= GL_ELEMENT_ARRAY_BUFFER;
4942 			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
4943 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4944 			source	= (const deUint8*)&indexData[0] + offset;
4945 		}
4946 		else
4947 		{
4948 			DE_ASSERT(false);
4949 			return;
4950 		}
4951 
4952 		startTime = deGetMicroseconds();
4953 
4954 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
4955 			gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
4956 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4957 		{
4958 			// create buffer storage
4959 			if (m_bufferState == BUFFERSTATE_NEW)
4960 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4961 			gl.bufferSubData(target, offset, size, source);
4962 		}
4963 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4964 		{
4965 			void*			mapPtr;
4966 			glw::GLboolean	unmapSuccessful;
4967 
4968 			// create buffer storage
4969 			if (m_bufferState == BUFFERSTATE_NEW)
4970 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4971 
4972 			mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4973 			if (!mapPtr)
4974 				throw tcu::Exception("MapBufferRange returned NULL");
4975 
4976 			deMemcpy(mapPtr, source, (int)size);
4977 
4978 			// if unmapping fails, just try again later
4979 			unmapSuccessful = gl.unmapBuffer(target);
4980 			if (!unmapSuccessful)
4981 				throw UnmapFailureError();
4982 		}
4983 		else
4984 			DE_ASSERT(false);
4985 
4986 		endTime = deGetMicroseconds();
4987 
4988 		sample.result.uploadedDataSize = (int)size;
4989 		sample.result.duration.uploadDuration = endTime - startTime;
4990 	}
4991 
4992 	// unrelated
4993 	if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
4994 	{
4995 		const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4996 
4997 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4998 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4999 		// Attibute pointers are not modified, no need restore state
5000 
5001 		sample.result.unrelatedDataSize = unrelatedUploadSize;
5002 	}
5003 
5004 	// draw
5005 	{
5006 		startTime = deGetMicroseconds();
5007 
5008 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5009 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5010 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5011 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5012 		else
5013 			DE_ASSERT(false);
5014 
5015 		endTime = deGetMicroseconds();
5016 
5017 		sample.result.duration.renderDuration = endTime - startTime;
5018 	}
5019 
5020 	// read
5021 	{
5022 		startTime = deGetMicroseconds();
5023 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5024 		endTime = deGetMicroseconds();
5025 
5026 		sample.result.duration.readDuration = endTime - startTime;
5027 	}
5028 
5029 	// set results
5030 
5031 	sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5032 
5033 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5034 	sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
5035 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5036 }
5037 
5038 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5039 {
5040 public:
5041 	enum MapFlags
5042 	{
5043 		MAPFLAG_NONE = 0,
5044 		MAPFLAG_INVALIDATE_BUFFER,
5045 		MAPFLAG_INVALIDATE_RANGE,
5046 
5047 		MAPFLAG_LAST
5048 	};
5049 	enum UploadBufferTarget
5050 	{
5051 		UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5052 		UPLOADBUFFERTARGET_SAME_BUFFER,
5053 
5054 		UPLOADBUFFERTARGET_LAST
5055 	};
5056 								BufferInUseRenderTimeCase	(Context&			context,
5057 															 const char*		name,
5058 															 const char*		description,
5059 															 DrawMethod			method,
5060 															 MapFlags			mapFlags,
5061 															 TargetBuffer		targetBuffer,
5062 															 UploadMethod		uploadMethod,
5063 															 UploadRange		uploadRange,
5064 															 UploadBufferTarget	uploadTarget);
5065 
5066 private:
5067 	void						init						(void);
5068 	void						runSample					(SampleResult& sample);
5069 
5070 	const TargetBuffer			m_targetBuffer;
5071 	const UploadMethod			m_uploadMethod;
5072 	const UploadRange			m_uploadRange;
5073 	const MapFlags				m_mapFlags;
5074 	const UploadBufferTarget	m_uploadBufferTarget;
5075 };
5076 
BufferInUseRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,MapFlags mapFlags,TargetBuffer targetBuffer,UploadMethod uploadMethod,UploadRange uploadRange,UploadBufferTarget uploadTarget)5077 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&				context,
5078 													  const char*			name,
5079 													  const char*			description,
5080 													  DrawMethod			method,
5081 													  MapFlags				mapFlags,
5082 													  TargetBuffer			targetBuffer,
5083 													  UploadMethod			uploadMethod,
5084 													  UploadRange			uploadRange,
5085 													  UploadBufferTarget	uploadTarget)
5086 	: RenderCase<RenderUploadRenderReadDuration>	(context, name, description, method)
5087 	, m_targetBuffer								(targetBuffer)
5088 	, m_uploadMethod								(uploadMethod)
5089 	, m_uploadRange									(uploadRange)
5090 	, m_mapFlags									(mapFlags)
5091 	, m_uploadBufferTarget							(uploadTarget)
5092 {
5093 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5094 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5095 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5096 	DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5097 	DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5098 }
5099 
init(void)5100 void BufferInUseRenderTimeCase::init (void)
5101 {
5102 	RenderCase<RenderUploadRenderReadDuration>::init();
5103 
5104 	// log
5105 	{
5106 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5107 		const char* const	uploadFunctionName		= (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
5108 		const bool			isReferenceCase			= (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5109 		tcu::MessageBuilder	message					(&m_testCtx.getLog());
5110 
5111 		message	<< "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5112 				<< targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5113 
5114 		if (isReferenceCase)
5115 			message << "Rendering:\n"
5116 					<< "    before test: create and use buffers B and C\n"
5117 					<< "    first draw: render using buffer B\n"
5118 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer C contents\n")	:
5119 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer C contents\n")	:
5120 						((const char*)DE_NULL))
5121 					<< "    second draw: render using buffer C\n"
5122 					<< "    read: readPixels\n";
5123 		else
5124 			message << "Rendering:\n"
5125 					<< "    before test: create and use buffer B\n"
5126 					<< "    first draw: render using buffer B\n"
5127 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer B contents\n")	:
5128 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer B contents\n")	:
5129 						((const char*)DE_NULL))
5130 					<< "    second draw: render using buffer B\n"
5131 					<< "    read: readPixels\n";
5132 
5133 		message	<< "Uploading using " << uploadFunctionName
5134 					<< ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")	:
5135 						(m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")	:
5136 						(m_mapFlags == MAPFLAG_NONE)				? ("")														:
5137 						((const char*)DE_NULL))
5138 					<< "\n"
5139 				<< getNumSamples() << " test samples. Sample order is randomized.\n"
5140 				<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5141 				<< "Workload sizes are in the range ["
5142 					<< getMinWorkloadSize() << ",  "
5143 					<< getMaxWorkloadSize() << "] vertices "
5144 					<< "(["
5145 					<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5146 					<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5147 				<< "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";
5148 
5149 		if (isReferenceCase)
5150 			message	<< "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
5151 		else
5152 			message	<< "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5153 
5154 		message << tcu::TestLog::EndMessage;
5155 	}
5156 }
5157 
runSample(SampleResult & sample)5158 void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
5159 {
5160 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5161 	const glu::Buffer		arrayBuffer				(m_context.getRenderContext());
5162 	const glu::Buffer		indexBuffer				(m_context.getRenderContext());
5163 	const glu::Buffer		alternativeUploadBuffer	(m_context.getRenderContext());
5164 	const int				numVertices				= getLayeredGridNumVertices(sample.scene);
5165 	tcu::Surface			resultSurface			(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5166 	deUint64				startTime;
5167 	deUint64				endTime;
5168 	std::vector<tcu::Vec4>	vertexData;
5169 	std::vector<deUint32>	indexData;
5170 
5171 	// create data
5172 
5173 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5174 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5175 		generateLayeredGridIndexData(indexData, sample.scene);
5176 
5177 	// make buffers used
5178 
5179 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5180 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5181 	setupVertexAttribs();
5182 
5183 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5184 	{
5185 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5186 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5187 	}
5188 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5189 	{
5190 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5191 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5192 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5193 	}
5194 	else
5195 		DE_ASSERT(false);
5196 
5197 	// another pair of buffers for reference case
5198 	if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5199 	{
5200 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5201 		{
5202 			gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5203 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5204 
5205 			setupVertexAttribs();
5206 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5207 		}
5208 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5209 		{
5210 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5211 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5212 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5213 		}
5214 		else
5215 			DE_ASSERT(false);
5216 
5217 		// restore state
5218 		gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5219 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5220 		setupVertexAttribs();
5221 	}
5222 
5223 	waitGLResults();
5224 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5225 
5226 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5227 	gl.clear(GL_COLOR_BUFFER_BIT);
5228 	waitGLResults();
5229 
5230 	tcu::warmupCPU();
5231 
5232 	// first draw
5233 	{
5234 		startTime = deGetMicroseconds();
5235 
5236 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5237 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5238 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5239 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5240 		else
5241 			DE_ASSERT(false);
5242 
5243 		endTime = deGetMicroseconds();
5244 
5245 		sample.result.duration.firstRenderDuration = endTime - startTime;
5246 	}
5247 
5248 	// upload
5249 	{
5250 		glw::GLenum		target;
5251 		glw::GLsizeiptr	size;
5252 		glw::GLintptr	offset = 0;
5253 		const void*		source;
5254 
5255 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5256 		{
5257 			target	= GL_ARRAY_BUFFER;
5258 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5259 			source	= &vertexData[0];
5260 		}
5261 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5262 		{
5263 			target	= GL_ELEMENT_ARRAY_BUFFER;
5264 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
5265 			source	= &indexData[0];
5266 		}
5267 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5268 		{
5269 			target	= GL_ARRAY_BUFFER;
5270 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5271 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5272 			source	= (const deUint8*)&vertexData[0] + offset;
5273 		}
5274 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5275 		{
5276 			// upload to 25% - 75% range
5277 			target	= GL_ELEMENT_ARRAY_BUFFER;
5278 			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
5279 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5280 			source	= (const deUint8*)&indexData[0] + offset;
5281 		}
5282 		else
5283 		{
5284 			DE_ASSERT(false);
5285 			return;
5286 		}
5287 
5288 		// reference case? don't modify the buffer in use
5289 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5290 			gl.bindBuffer(target, *alternativeUploadBuffer);
5291 
5292 		startTime = deGetMicroseconds();
5293 
5294 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5295 			gl.bufferData(target, size, source, GL_STREAM_DRAW);
5296 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5297 			gl.bufferSubData(target, offset, size, source);
5298 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5299 		{
5300 			const int		mapFlags	= (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)	:
5301 										  (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)	:
5302 										  (-1);
5303 			void*			mapPtr;
5304 			glw::GLboolean	unmapSuccessful;
5305 
5306 			mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5307 			if (!mapPtr)
5308 				throw tcu::Exception("MapBufferRange returned NULL");
5309 
5310 			deMemcpy(mapPtr, source, (int)size);
5311 
5312 			// if unmapping fails, just try again later
5313 			unmapSuccessful = gl.unmapBuffer(target);
5314 			if (!unmapSuccessful)
5315 				throw UnmapFailureError();
5316 		}
5317 		else
5318 			DE_ASSERT(false);
5319 
5320 		endTime = deGetMicroseconds();
5321 
5322 		sample.result.uploadedDataSize = (int)size;
5323 		sample.result.duration.uploadDuration = endTime - startTime;
5324 	}
5325 
5326 	// second draw
5327 	{
5328 		// Source vertex data from alternative buffer in refernce case
5329 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5330 			setupVertexAttribs();
5331 
5332 		startTime = deGetMicroseconds();
5333 
5334 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5335 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5336 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5337 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5338 		else
5339 			DE_ASSERT(false);
5340 
5341 		endTime = deGetMicroseconds();
5342 
5343 		sample.result.duration.secondRenderDuration = endTime - startTime;
5344 	}
5345 
5346 	// read
5347 	{
5348 		startTime = deGetMicroseconds();
5349 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5350 		endTime = deGetMicroseconds();
5351 
5352 		sample.result.duration.readDuration = endTime - startTime;
5353 	}
5354 
5355 	// set results
5356 
5357 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5358 
5359 	sample.result.duration.renderReadDuration	= sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5360 	sample.result.duration.totalDuration		= sample.result.duration.firstRenderDuration +
5361 												  sample.result.duration.uploadDuration +
5362 												  sample.result.duration.secondRenderDuration +
5363 												  sample.result.duration.readDuration;
5364 	sample.result.duration.fitResponseDuration	= sample.result.duration.renderReadDuration;
5365 }
5366 
5367 class UploadWaitDrawCase : public RenderPerformanceTestBase
5368 {
5369 public:
5370 	struct Sample
5371 	{
5372 		int			numFrames;
5373 		deUint64	uploadCallEndTime;
5374 	};
5375 	struct Result
5376 	{
5377 		deUint64	uploadDuration;
5378 		deUint64	renderDuration;
5379 		deUint64	readDuration;
5380 		deUint64	renderReadDuration;
5381 
5382 		deUint64	timeBeforeUse;
5383 	};
5384 
5385 							UploadWaitDrawCase				(Context&		context,
5386 															 const char*	name,
5387 															 const char*	description,
5388 															 DrawMethod		drawMethod,
5389 															 TargetBuffer	targetBuffer,
5390 															 UploadMethod	uploadMethod,
5391 															 BufferState	bufferState);
5392 							~UploadWaitDrawCase				(void);
5393 
5394 private:
5395 	void					init							(void);
5396 	void					deinit							(void);
5397 	IterateResult			iterate							(void);
5398 
5399 	void					uploadBuffer					(Sample& sample, Result& result);
5400 	void					drawFromBuffer					(Sample& sample, Result& result);
5401 	void					reuseAndDeleteBuffer			(void);
5402 	void					logAndSetTestResult				(void);
5403 	void					logSamples						(void);
5404 	void					drawMisc						(void);
5405 	int						findStabilizationSample			(deUint64 (Result::*target), const char* description);
5406 	bool					checkSampleTemporalStability	(deUint64 (Result::*target), const char* description);
5407 
5408 	const DrawMethod		m_drawMethod;
5409 	const TargetBuffer		m_targetBuffer;
5410 	const UploadMethod		m_uploadMethod;
5411 	const BufferState		m_bufferState;
5412 
5413 	const int				m_numSamplesPerSwap;
5414 	const int				m_numMaxSwaps;
5415 
5416 	int						m_frameNdx;
5417 	int						m_sampleNdx;
5418 	int						m_numVertices;
5419 
5420 	std::vector<tcu::Vec4>	m_vertexData;
5421 	std::vector<deUint32>	m_indexData;
5422 	std::vector<Sample>		m_samples;
5423 	std::vector<Result>		m_results;
5424 	std::vector<int>		m_iterationOrder;
5425 
5426 	deUint32				m_vertexBuffer;
5427 	deUint32				m_indexBuffer;
5428 	deUint32				m_miscBuffer;
5429 	int						m_numMiscVertices;
5430 };
5431 
UploadWaitDrawCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState)5432 UploadWaitDrawCase::UploadWaitDrawCase (Context&		context,
5433 										const char*		name,
5434 										const char*		description,
5435 										DrawMethod		drawMethod,
5436 										TargetBuffer	targetBuffer,
5437 										UploadMethod	uploadMethod,
5438 										BufferState		bufferState)
5439 	: RenderPerformanceTestBase	(context, name, description)
5440 	, m_drawMethod				(drawMethod)
5441 	, m_targetBuffer			(targetBuffer)
5442 	, m_uploadMethod			(uploadMethod)
5443 	, m_bufferState				(bufferState)
5444 	, m_numSamplesPerSwap		(10)
5445 	, m_numMaxSwaps				(4)
5446 	, m_frameNdx				(0)
5447 	, m_sampleNdx				(0)
5448 	, m_numVertices				(-1)
5449 	, m_vertexBuffer			(0)
5450 	, m_indexBuffer				(0)
5451 	, m_miscBuffer				(0)
5452 	, m_numMiscVertices			(-1)
5453 {
5454 }
5455 
~UploadWaitDrawCase(void)5456 UploadWaitDrawCase::~UploadWaitDrawCase (void)
5457 {
5458 	deinit();
5459 }
5460 
init(void)5461 void UploadWaitDrawCase::init (void)
5462 {
5463 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5464 	const int				vertexAttribSize		= (int)sizeof(tcu::Vec4) * 2; // color4, position4
5465 	const int				vertexIndexSize			= (int)sizeof(deUint32);
5466 	const int				vertexUploadDataSize	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5467 
5468 	RenderPerformanceTestBase::init();
5469 
5470 	// requirements
5471 
5472 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5473 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5474 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
5475 
5476 	// gl state
5477 
5478 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5479 
5480 	// enable bleding to prevent grid layers from being discarded
5481 
5482 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5483 	gl.blendEquation(GL_FUNC_ADD);
5484 	gl.enable(GL_BLEND);
5485 
5486 	// scene
5487 
5488 	{
5489 		LayeredGridSpec scene;
5490 
5491 		// create ~8MB workload with similar characteristics as in the other test
5492 		// => makes comparison to other results more straightforward
5493 		scene.gridWidth = 93;
5494 		scene.gridHeight = 93;
5495 		scene.gridLayers = 5;
5496 
5497 		generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5498 		generateLayeredGridIndexData(m_indexData, scene);
5499 		m_numVertices = getLayeredGridNumVertices(scene);
5500 	}
5501 
5502 	// buffers
5503 
5504 	if (m_bufferState == BUFFERSTATE_NEW)
5505 	{
5506 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5507 		{
5508 			// reads from two buffers, prepare the static buffer
5509 
5510 			if (m_targetBuffer == TARGETBUFFER_VERTEX)
5511 			{
5512 				// index buffer is static, use another vertex buffer to keep original buffer in unused state
5513 				const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5514 
5515 				gl.genBuffers(1, &m_indexBuffer);
5516 				gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5517 				gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5518 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5519 				gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);
5520 
5521 				setupVertexAttribs();
5522 				gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5523 			}
5524 			else if (m_targetBuffer == TARGETBUFFER_INDEX)
5525 			{
5526 				// vertex buffer is static
5527 				gl.genBuffers(1, &m_vertexBuffer);
5528 				gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5529 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5530 
5531 				setupVertexAttribs();
5532 				gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5533 			}
5534 			else
5535 				DE_ASSERT(false);
5536 		}
5537 	}
5538 	else if (m_bufferState == BUFFERSTATE_EXISTING)
5539 	{
5540 		const glw::GLenum vertexUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5541 		const glw::GLenum indexUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5542 
5543 		gl.genBuffers(1, &m_vertexBuffer);
5544 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5545 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);
5546 
5547 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5548 		{
5549 			gl.genBuffers(1, &m_indexBuffer);
5550 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5551 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
5552 		}
5553 
5554 		setupVertexAttribs();
5555 
5556 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5557 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5558 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5559 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5560 		else
5561 			DE_ASSERT(false);
5562 	}
5563 	else
5564 		DE_ASSERT(false);
5565 
5566 	// misc draw buffer
5567 	{
5568 		std::vector<tcu::Vec4>	vertexData;
5569 		LayeredGridSpec			scene;
5570 
5571 		// create ~1.5MB workload with similar characteristics
5572 		scene.gridWidth = 40;
5573 		scene.gridHeight = 40;
5574 		scene.gridLayers = 5;
5575 
5576 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
5577 
5578 		gl.genBuffers(1, &m_miscBuffer);
5579 		gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5580 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);
5581 
5582 		m_numMiscVertices = getLayeredGridNumVertices(scene);
5583 	}
5584 
5585 	// iterations
5586 	{
5587 		m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5588 		m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5589 
5590 		for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
5591 		for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
5592 		{
5593 			const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;
5594 
5595 			m_samples[index].numFrames = numSwaps;
5596 		}
5597 
5598 		m_iterationOrder.resize(m_samples.size());
5599 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
5600 	}
5601 
5602 	// log
5603 	m_testCtx.getLog()
5604 		<< tcu::TestLog::Message
5605 		<< "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
5606 		<< "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
5607 		<< "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
5608 		<< "Uploading using "
5609 			<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")																							:
5610 				(m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")																							:
5611 				(m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")	:
5612 				((const char*)DE_NULL))
5613 			<< "\n"
5614 		<< "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
5615 		<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
5616 		<< "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
5617 		<< "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
5618 		<< tcu::TestLog::EndMessage;
5619 }
5620 
deinit(void)5621 void UploadWaitDrawCase::deinit (void)
5622 {
5623 	RenderPerformanceTestBase::deinit();
5624 
5625 	if (m_vertexBuffer)
5626 	{
5627 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
5628 		m_vertexBuffer = 0;
5629 	}
5630 	if (m_indexBuffer)
5631 	{
5632 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
5633 		m_indexBuffer = 0;
5634 	}
5635 	if (m_miscBuffer)
5636 	{
5637 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
5638 		m_miscBuffer = 0;
5639 	}
5640 }
5641 
iterate(void)5642 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
5643 {
5644 	const glw::Functions&	gl								= m_context.getRenderContext().getFunctions();
5645 	const int				betweenIterationDummyFrameCount = 5; // draw misc between test samples
5646 	const int				frameNdx						= m_frameNdx++;
5647 	const int				currentSampleNdx				= m_iterationOrder[m_sampleNdx];
5648 
5649 	// Simulate work for about 8ms
5650 	busyWait(8000);
5651 
5652 	// Dummy rendering during dummy frames
5653 	if (frameNdx != m_samples[currentSampleNdx].numFrames)
5654 	{
5655 		// draw similar from another buffer
5656 		drawMisc();
5657 	}
5658 
5659 	if (frameNdx == 0)
5660 	{
5661 		// upload and start the clock
5662 		uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5663 	}
5664 
5665 	if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
5666 	{
5667 		// draw using the uploaded buffer
5668 		drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5669 
5670 		// re-use buffer for something else to make sure test iteration do not affect each other
5671 		if (m_bufferState == BUFFERSTATE_NEW)
5672 			reuseAndDeleteBuffer();
5673 	}
5674 	else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
5675 	{
5676 		// next sample
5677 		++m_sampleNdx;
5678 		m_frameNdx = 0;
5679 	}
5680 
5681 	GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
5682 
5683 	if (m_sampleNdx < (int)m_samples.size())
5684 		return CONTINUE;
5685 
5686 	logAndSetTestResult();
5687 	return STOP;
5688 }
5689 
uploadBuffer(Sample & sample,Result & result)5690 void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
5691 {
5692 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
5693 	deUint64				startTime;
5694 	deUint64				endTime;
5695 	glw::GLenum				target;
5696 	glw::GLsizeiptr			size;
5697 	const void*				source;
5698 
5699 	// data source
5700 
5701 	if (m_targetBuffer == TARGETBUFFER_VERTEX)
5702 	{
5703 		DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5704 
5705 		target	= GL_ARRAY_BUFFER;
5706 		size	= (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
5707 		source	= &m_vertexData[0];
5708 	}
5709 	else if (m_targetBuffer == TARGETBUFFER_INDEX)
5710 	{
5711 		DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5712 
5713 		target	= GL_ELEMENT_ARRAY_BUFFER;
5714 		size	= (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
5715 		source	= &m_indexData[0];
5716 	}
5717 	else
5718 	{
5719 		DE_ASSERT(false);
5720 		return;
5721 	}
5722 
5723 	// gen buffer
5724 
5725 	if (m_bufferState == BUFFERSTATE_NEW)
5726 	{
5727 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5728 		{
5729 			gl.genBuffers(1, &m_vertexBuffer);
5730 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5731 		}
5732 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5733 		{
5734 			gl.genBuffers(1, &m_indexBuffer);
5735 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5736 		}
5737 		else
5738 			DE_ASSERT(false);
5739 
5740 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
5741 			m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5742 		{
5743 			gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
5744 		}
5745 	}
5746 	else if (m_bufferState == BUFFERSTATE_EXISTING)
5747 	{
5748 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5749 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5750 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5751 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5752 		else
5753 			DE_ASSERT(false);
5754 	}
5755 	else
5756 		DE_ASSERT(false);
5757 
5758 	// upload
5759 
5760 	startTime = deGetMicroseconds();
5761 
5762 	if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5763 		gl.bufferData(target, size, source, GL_STATIC_DRAW);
5764 	else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5765 		gl.bufferSubData(target, 0, size, source);
5766 	else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5767 	{
5768 		void*			mapPtr;
5769 		glw::GLboolean	unmapSuccessful;
5770 
5771 		mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
5772 		if (!mapPtr)
5773 			throw tcu::Exception("MapBufferRange returned NULL");
5774 
5775 		deMemcpy(mapPtr, source, (int)size);
5776 
5777 		// if unmapping fails, just try again later
5778 		unmapSuccessful = gl.unmapBuffer(target);
5779 		if (!unmapSuccessful)
5780 			throw UnmapFailureError();
5781 	}
5782 	else
5783 		DE_ASSERT(false);
5784 
5785 	endTime = deGetMicroseconds();
5786 
5787 	sample.uploadCallEndTime = endTime;
5788 	result.uploadDuration = endTime - startTime;
5789 }
5790 
drawFromBuffer(Sample & sample,Result & result)5791 void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
5792 {
5793 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
5794 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5795 	deUint64				startTime;
5796 	deUint64				endTime;
5797 
5798 	DE_ASSERT(m_vertexBuffer != 0);
5799 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5800 		DE_ASSERT(m_indexBuffer == 0);
5801 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5802 		DE_ASSERT(m_indexBuffer != 0);
5803 	else
5804 		DE_ASSERT(false);
5805 
5806 	// draw
5807 	{
5808 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5809 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5810 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5811 
5812 		setupVertexAttribs();
5813 
5814 		// microseconds passed since return from upload call
5815 		result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
5816 
5817 		startTime = deGetMicroseconds();
5818 
5819 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5820 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5821 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5822 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5823 		else
5824 			DE_ASSERT(false);
5825 
5826 		endTime = deGetMicroseconds();
5827 
5828 		result.renderDuration = endTime - startTime;
5829 	}
5830 
5831 	// read
5832 	{
5833 		startTime = deGetMicroseconds();
5834 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5835 		endTime = deGetMicroseconds();
5836 
5837 		result.readDuration = endTime - startTime;
5838 	}
5839 
5840 	result.renderReadDuration = result.renderDuration + result.readDuration;
5841 }
5842 
reuseAndDeleteBuffer(void)5843 void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
5844 {
5845 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5846 
5847 	if (m_targetBuffer == TARGETBUFFER_INDEX)
5848 	{
5849 		// respecify and delete index buffer
5850 		static const deUint32 indices[3] = {1, 3, 8};
5851 
5852 		DE_ASSERT(m_indexBuffer != 0);
5853 
5854 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
5855 		gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
5856 		gl.deleteBuffers(1, &m_indexBuffer);
5857 		m_indexBuffer = 0;
5858 	}
5859 	else if (m_targetBuffer == TARGETBUFFER_VERTEX)
5860 	{
5861 		// respecify and delete vertex buffer
5862 		static const tcu::Vec4 coloredTriangle[6] =
5863 		{
5864 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
5865 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
5866 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
5867 		};
5868 
5869 		DE_ASSERT(m_vertexBuffer != 0);
5870 
5871 		gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
5872 		gl.drawArrays(GL_TRIANGLES, 0, 3);
5873 		gl.deleteBuffers(1, &m_vertexBuffer);
5874 		m_vertexBuffer = 0;
5875 	}
5876 
5877 	waitGLResults();
5878 }
5879 
logAndSetTestResult(void)5880 void UploadWaitDrawCase::logAndSetTestResult (void)
5881 {
5882 	int		uploadStabilization;
5883 	int		renderReadStabilization;
5884 	int		renderStabilization;
5885 	int		readStabilization;
5886 	bool	temporallyStable;
5887 
5888 	{
5889 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
5890 		logSamples();
5891 	}
5892 
5893 	{
5894 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
5895 
5896 		// log stabilization points
5897 		renderReadStabilization	= findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
5898 		uploadStabilization		= findStabilizationSample(&Result::uploadDuration, "Upload time");
5899 		renderStabilization		= findStabilizationSample(&Result::renderDuration, "Draw call time");
5900 		readStabilization		= findStabilizationSample(&Result::readDuration, "ReadPixels time");
5901 
5902 		temporallyStable		= true;
5903 		temporallyStable		&= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
5904 		temporallyStable		&= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
5905 		temporallyStable		&= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
5906 		temporallyStable		&= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
5907 	}
5908 
5909 	{
5910 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
5911 
5912 		// Check result sanily
5913 		if (uploadStabilization != 0)
5914 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
5915 		if (!temporallyStable)
5916 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;
5917 
5918 		// render & read
5919 		if (renderReadStabilization == -1)
5920 			m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5921 		else
5922 			m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);
5923 
5924 		// draw call
5925 		if (renderStabilization == -1)
5926 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
5927 		else
5928 			m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);
5929 
5930 		// readpixels
5931 		if (readStabilization == -1)
5932 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5933 		else
5934 			m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);
5935 
5936 		// Report renderReadStabilization
5937 		if (renderReadStabilization != -1)
5938 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
5939 		else
5940 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
5941 	}
5942 }
5943 
logSamples(void)5944 void UploadWaitDrawCase::logSamples (void)
5945 {
5946 	// Inverse m_iterationOrder
5947 
5948 	std::vector<int> runOrder(m_iterationOrder.size());
5949 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
5950 		runOrder[m_iterationOrder[ndx]] = ndx;
5951 
5952 	// Log samples
5953 
5954 	m_testCtx.getLog()
5955 		<< tcu::TestLog::SampleList("Samples", "Samples")
5956 		<< tcu::TestLog::SampleInfo
5957 		<< tcu::TestLog::ValueInfo("NumSwaps",		"SwapBuffers before use",			"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5958 		<< tcu::TestLog::ValueInfo("Delay",			"Time before use",					"us",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
5959 		<< tcu::TestLog::ValueInfo("RunOrder",		"Sample run order",					"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5960 		<< tcu::TestLog::ValueInfo("DrawReadTime",	"Draw call and ReadPixels time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5961 		<< tcu::TestLog::ValueInfo("TotalTime",		"Total time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5962 		<< tcu::TestLog::ValueInfo("Upload time",	"Upload time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5963 		<< tcu::TestLog::ValueInfo("DrawCallTime",	"Draw call time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5964 		<< tcu::TestLog::ValueInfo("ReadTime",		"ReadPixels time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5965 		<< tcu::TestLog::EndSampleInfo;
5966 
5967 	for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5968 		m_testCtx.getLog()
5969 			<< tcu::TestLog::Sample
5970 			<< m_samples[sampleNdx].numFrames
5971 			<< (int)m_results[sampleNdx].timeBeforeUse
5972 			<< runOrder[sampleNdx]
5973 			<< (int)m_results[sampleNdx].renderReadDuration
5974 			<< (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
5975 			<< (int)m_results[sampleNdx].uploadDuration
5976 			<< (int)m_results[sampleNdx].renderDuration
5977 			<< (int)m_results[sampleNdx].readDuration
5978 			<< tcu::TestLog::EndSample;
5979 
5980 	m_testCtx.getLog() << tcu::TestLog::EndSampleList;
5981 }
5982 
drawMisc(void)5983 void UploadWaitDrawCase::drawMisc (void)
5984 {
5985 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5986 
5987 	gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5988 	setupVertexAttribs();
5989 	gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
5990 }
5991 
5992 struct DistributionCompareResult
5993 {
5994 	bool	equal;
5995 	float	standardDeviations;
5996 };
5997 
5998 template <typename Comparer>
sumOfRanks(const std::vector<deUint64> & testSamples,const std::vector<deUint64> & allSamples,const Comparer & comparer)5999 static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
6000 {
6001 	float sum = 0;
6002 
6003 	for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6004 	{
6005 		const deUint64	testSample		= testSamples[sampleNdx];
6006 		const int		lowerIndex		= (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6007 		const int		upperIndex		= (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6008 		const int		lowerRank		= lowerIndex + 1;	// convert zero-indexed to rank
6009 		const int		upperRank		= upperIndex;		// convert zero-indexed to rank, upperIndex is last equal + 1
6010 		const float		rankMidpoint	= (lowerRank + upperRank) / 2.0f;
6011 
6012 		sum += rankMidpoint;
6013 	}
6014 
6015 	return sum;
6016 }
6017 
6018 template <typename Comparer>
distributionCompare(const std::vector<deUint64> & orderedObservationsA,const std::vector<deUint64> & orderedObservationsB,const Comparer & comparer)6019 static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
6020 {
6021 	// Mann�Whitney U test
6022 
6023 	const int				n1			= (int)orderedObservationsA.size();
6024 	const int				n2			= (int)orderedObservationsB.size();
6025 	std::vector<deUint64>	allSamples	(n1 + n2);
6026 
6027 	std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6028 	std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6029 	std::sort(allSamples.begin(), allSamples.end());
6030 
6031 	{
6032 		const float					R1		= sumOfRanks(orderedObservationsA, allSamples, comparer);
6033 
6034 		const float					U1		= n1*n2 + n1*(n1 + 1)/2 - R1;
6035 		const float					U2		= (n1 * n2) - U1;
6036 		const float					U		= de::min(U1, U2);
6037 
6038 		// \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6039 
6040 		const float					mU		= n1*n2 / 2.0f;
6041 		const float					sigmaU	= deFloatSqrt((n1*n2*(n1+n2+1)) / 12.0f);
6042 		const float					z		= (U - mU) / sigmaU;
6043 
6044 		DistributionCompareResult	result;
6045 
6046 		result.equal				= (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6047 		result.standardDeviations	= z;
6048 
6049 		return result;
6050 	}
6051 }
6052 
6053 template <typename T>
6054 struct ThresholdComparer
6055 {
6056 	float	relativeThreshold;
6057 	T		absoluteThreshold;
6058 
operator ()deqp::gles3::Performance::__anon67db42180111::ThresholdComparer6059 	bool operator() (const T& a, const T& b) const
6060 	{
6061 		const float diff = de::abs((float)a - (float)b);
6062 
6063 		// thresholds
6064 		if (diff <= (float)absoluteThreshold)
6065 			return false;
6066 		if (diff <= a*relativeThreshold ||
6067 			diff <= b*relativeThreshold)
6068 			return false;
6069 
6070 		// cmp
6071 		return a < b;
6072 	}
6073 };
6074 
findStabilizationSample(deUint64 (UploadWaitDrawCase::Result::* target),const char * description)6075 int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6076 {
6077 	std::vector<std::vector<deUint64> >	sampleObservations(m_numMaxSwaps+1);
6078 	ThresholdComparer<deUint64>			comparer;
6079 
6080 	comparer.relativeThreshold = 0.15f;	// 15%
6081 	comparer.absoluteThreshold = 100;	// (us), assumed sampling precision
6082 
6083 	// get observations and order them
6084 
6085 	for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6086 	{
6087 		int insertNdx = 0;
6088 
6089 		sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6090 
6091 		for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6092 			if (m_samples[ndx].numFrames == swapNdx)
6093 				sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6094 
6095 		DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6096 
6097 		std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6098 	}
6099 
6100 	// find stabilization point
6101 
6102 	for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
6103 	{
6104 		// Distribution is equal to all following distributions
6105 		for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6106 		{
6107 			// Stable section ends here?
6108 			const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6109 			if (!result.equal)
6110 			{
6111 				// Last two samples are not equal? Samples never stabilized
6112 				if (sampleNdx == m_numMaxSwaps-1)
6113 				{
6114 					m_testCtx.getLog()
6115 						<< tcu::TestLog::Message
6116 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6117 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6118 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6119 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6120 						<< tcu::TestLog::EndMessage;
6121 					return -1;
6122 				}
6123 				else
6124 				{
6125 					m_testCtx.getLog()
6126 						<< tcu::TestLog::Message
6127 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6128 						<< "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
6129 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6130 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6131 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6132 						<< tcu::TestLog::EndMessage;
6133 
6134 					return sampleNdx+1;
6135 				}
6136 			}
6137 		}
6138 	}
6139 
6140 	m_testCtx.getLog()
6141 		<< tcu::TestLog::Message
6142 		<< description << ": All samples seem to have the same distribution"
6143 		<< tcu::TestLog::EndMessage;
6144 
6145 	// all distributions equal
6146 	return 0;
6147 }
6148 
checkSampleTemporalStability(deUint64 (UploadWaitDrawCase::Result::* target),const char * description)6149 bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6150 {
6151 	// Try to find correlation with sample order and sample times
6152 
6153 	const int						numDataPoints	= (int)m_iterationOrder.size();
6154 	std::vector<tcu::Vec2>			dataPoints		(m_iterationOrder.size());
6155 	LineParametersWithConfidence	lineFit;
6156 
6157 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6158 	{
6159 		dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6160 		dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6161 	}
6162 
6163 	lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6164 
6165 	// Difference of more than 25% of the offset along the whole sample range
6166 	if (de::abs(lineFit.coefficient) * numDataPoints > de::abs(lineFit.offset) * 0.25f)
6167 	{
6168 		m_testCtx.getLog()
6169 			<< tcu::TestLog::Message
6170 			<< description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
6171 			<< "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6172 			<< tcu::TestLog::EndMessage;
6173 
6174 		return false;
6175 	}
6176 	else
6177 		return true;
6178 }
6179 
6180 } // anonymous
6181 
BufferDataUploadTests(Context & context)6182 BufferDataUploadTests::BufferDataUploadTests (Context& context)
6183 	: TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6184 {
6185 }
6186 
~BufferDataUploadTests(void)6187 BufferDataUploadTests::~BufferDataUploadTests (void)
6188 {
6189 }
6190 
init(void)6191 void BufferDataUploadTests::init (void)
6192 {
6193 	static const struct BufferUsage
6194 	{
6195 		const char* name;
6196 		deUint32	usage;
6197 		bool		primaryUsage;
6198 	} bufferUsages[] =
6199 	{
6200 		{ "stream_draw",	GL_STREAM_DRAW,		true	},
6201 		{ "stream_read",	GL_STREAM_READ,		false	},
6202 		{ "stream_copy",	GL_STREAM_COPY,		false	},
6203 		{ "static_draw",	GL_STATIC_DRAW,		true	},
6204 		{ "static_read",	GL_STATIC_READ,		false	},
6205 		{ "static_copy",	GL_STATIC_COPY,		false	},
6206 		{ "dynamic_draw",	GL_DYNAMIC_DRAW,	true	},
6207 		{ "dynamic_read",	GL_DYNAMIC_READ,	false	},
6208 		{ "dynamic_copy",	GL_DYNAMIC_COPY,	false	},
6209 	};
6210 
6211 	tcu::TestCaseGroup* const referenceGroup			= new tcu::TestCaseGroup(m_testCtx, "reference",			"Reference functions");
6212 	tcu::TestCaseGroup* const functionCallGroup			= new tcu::TestCaseGroup(m_testCtx, "function_call",		"Function call timing");
6213 	tcu::TestCaseGroup* const modifyAfterUseGroup		= new tcu::TestCaseGroup(m_testCtx, "modify_after_use",		"Function call time after buffer has been used");
6214 	tcu::TestCaseGroup* const renderAfterUploadGroup	= new tcu::TestCaseGroup(m_testCtx, "render_after_upload",	"Function call time of draw commands after buffer has been modified");
6215 
6216 	addChild(referenceGroup);
6217 	addChild(functionCallGroup);
6218 	addChild(modifyAfterUseGroup);
6219 	addChild(renderAfterUploadGroup);
6220 
6221 	// .reference
6222 	{
6223 		static const struct BufferSizeRange
6224 		{
6225 			const char* name;
6226 			int			minBufferSize;
6227 			int			maxBufferSize;
6228 			int			numSamples;
6229 			bool		largeBuffersCase;
6230 		} sizeRanges[] =
6231 		{
6232 			{ "small_buffers", 0,		1 << 18,	64,		false	}, // !< 0kB - 256kB
6233 			{ "large_buffers", 1 << 18,	1 << 24,	32,		true	}, // !< 256kB - 16MB
6234 		};
6235 
6236 		for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6237 		{
6238 			referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
6239 															 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6240 															 "Test memcpy performance",
6241 															 sizeRanges[bufferSizeRangeNdx].minBufferSize,
6242 															 sizeRanges[bufferSizeRangeNdx].maxBufferSize,
6243 															 sizeRanges[bufferSizeRangeNdx].numSamples,
6244 															 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6245 		}
6246 	}
6247 
6248 	// .function_call
6249 	{
6250 		const int minBufferSize		= 0;		// !< 0kiB
6251 		const int maxBufferSize		= 1 << 24;	// !< 16MiB
6252 		const int numDataSamples	= 25;
6253 		const int numMapSamples		= 25;
6254 
6255 		tcu::TestCaseGroup* const bufferDataMethodGroup		= new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6256 		tcu::TestCaseGroup* const bufferSubDataMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6257 		tcu::TestCaseGroup* const mapBufferRangeMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6258 
6259 		functionCallGroup->addChild(bufferDataMethodGroup);
6260 		functionCallGroup->addChild(bufferSubDataMethodGroup);
6261 		functionCallGroup->addChild(mapBufferRangeMethodGroup);
6262 
6263 		// .buffer_data
6264 		{
6265 			static const struct TargetCase
6266 			{
6267 				tcu::TestCaseGroup*				group;
6268 				BufferDataUploadCase::CaseType	caseType;
6269 				bool							allUsages;
6270 			} targetCases[] =
6271 			{
6272 				{ new tcu::TestCaseGroup(m_testCtx, "new_buffer",				"Target new buffer"),							BufferDataUploadCase::CASE_NEW_BUFFER,			true	},
6273 				{ new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",		"Target new unspecified buffer"),				BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,	true	},
6274 				{ new tcu::TestCaseGroup(m_testCtx, "specified_buffer",			"Target new specified buffer"),					BufferDataUploadCase::CASE_SPECIFIED_BUFFER,	true	},
6275 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer",				"Target buffer that was used in draw"),			BufferDataUploadCase::CASE_USED_BUFFER,			true	},
6276 				{ new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",		"Target larger buffer that was used in draw"),	BufferDataUploadCase::CASE_USED_LARGER_BUFFER,	false	},
6277 			};
6278 
6279 			for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6280 			{
6281 				bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6282 
6283 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6284 					if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6285 						targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
6286 																						std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6287 																						std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6288 																						minBufferSize,
6289 																						maxBufferSize,
6290 																						numDataSamples,
6291 																						bufferUsages[usageNdx].usage,
6292 																						targetCases[targetNdx].caseType));
6293 			}
6294 		}
6295 
6296 		// .buffer_sub_data
6297 		{
6298 			static const struct FlagCase
6299 			{
6300 				tcu::TestCaseGroup*					group;
6301 				BufferSubDataUploadCase::CaseType	parentCase;
6302 				bool								allUsages;
6303 				int									flags;
6304 			} flagCases[] =
6305 			{
6306 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",					    ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD															},
6307 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6308 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD														},
6309 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6310 			};
6311 
6312 			for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6313 			{
6314 				bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6315 
6316 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6317 					if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6318 							flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
6319 																						   std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6320 																						   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6321 																						   minBufferSize,
6322 																						   maxBufferSize,
6323 																						   numDataSamples,
6324 																						   bufferUsages[usageNdx].usage,
6325 																						   flagCases[flagNdx].parentCase,
6326 																						   flagCases[flagNdx].flags));
6327 			}
6328 		}
6329 
6330 		// .map_buffer_range
6331 		{
6332 			static const struct FlagCase
6333 			{
6334 				const char*	name;
6335 				bool		usefulForUnusedBuffers;
6336 				bool		allUsages;
6337 				int			glFlags;
6338 				int			caseFlags;
6339 			} flagCases[] =
6340 			{
6341 				{ "flag_write_full",										true,	true,	GL_MAP_WRITE_BIT,																0																				},
6342 				{ "flag_write_partial",										true,	true,	GL_MAP_WRITE_BIT,																MapBufferRangeCase::FLAG_PARTIAL												},
6343 				{ "flag_read_write_full",									true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												0																				},
6344 				{ "flag_read_write_partial",								true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												MapBufferRangeCase::FLAG_PARTIAL												},
6345 				{ "flag_invalidate_range_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									0																				},
6346 				{ "flag_invalidate_range_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6347 				{ "flag_invalidate_buffer_full",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								0																				},
6348 				{ "flag_invalidate_buffer_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								MapBufferRangeCase::FLAG_PARTIAL												},
6349 				{ "flag_write_full_manual_invalidate_buffer",				false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_MANUAL_INVALIDATION									},
6350 				{ "flag_write_partial_manual_invalidate_buffer",			false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION	},
6351 				{ "flag_unsynchronized_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									0																				},
6352 				{ "flag_unsynchronized_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6353 				{ "flag_unsynchronized_and_invalidate_buffer_full",			true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	0																				},
6354 				{ "flag_unsynchronized_and_invalidate_buffer_partial",		true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	MapBufferRangeCase::FLAG_PARTIAL												},
6355 			};
6356 			static const struct FlushCases
6357 			{
6358 				const char*	name;
6359 				int			glFlags;
6360 				int			caseFlags;
6361 			} flushCases[] =
6362 			{
6363 				{ "flag_flush_explicit_map_full",					GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	0												},
6364 				{ "flag_flush_explicit_map_partial",				GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_PARTIAL			},
6365 				{ "flag_flush_explicit_map_full_flush_in_parts",	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS	},
6366 				{ "flag_flush_explicit_map_full_flush_partial",		GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL		},
6367 			};
6368 			static const struct MapTestGroup
6369 			{
6370 				int					flags;
6371 				bool				unusedBufferCase;
6372 				tcu::TestCaseGroup* group;
6373 			} groups[] =
6374 			{
6375 				{ MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,	true,	new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),				},
6376 				{ MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,		true,	new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),					},
6377 				{ 0,														false,	new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")	},
6378 			};
6379 
6380 			// we OR same flags to both range and flushRange cases, make sure it is legal
6381 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6382 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6383 
6384 			for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6385 			{
6386 				tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;
6387 
6388 				mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6389 
6390 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6391 				{
6392 					if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6393 						continue;
6394 
6395 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6396 					bufferTypeGroup->addChild(bufferUsageGroup);
6397 
6398 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6399 						if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6400 							bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
6401 																			  bufferUsages[usageNdx].name,
6402 																			  std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6403 																			  minBufferSize,
6404 																			  maxBufferSize,
6405 																			  numMapSamples,
6406 																			  bufferUsages[usageNdx].usage,
6407 																			  flagCases[caseNdx].glFlags,
6408 																			  flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6409 				}
6410 
6411 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6412 				{
6413 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6414 					bufferTypeGroup->addChild(bufferUsageGroup);
6415 
6416 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6417 						if (bufferUsages[usageNdx].primaryUsage)
6418 							bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
6419 																				   bufferUsages[usageNdx].name,
6420 																				   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6421 																				   minBufferSize,
6422 																				   maxBufferSize,
6423 																				   numMapSamples,
6424 																				   bufferUsages[usageNdx].usage,
6425 																				   flushCases[caseNdx].glFlags,
6426 																				   flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6427 				}
6428 			}
6429 		}
6430 	}
6431 
6432 	// .modify_after_use
6433 	{
6434 		const int minBufferSize	= 0;		// !< 0kiB
6435 		const int maxBufferSize	= 1 << 24;	// !< 16MiB
6436 
6437 		static const struct Usage
6438 		{
6439 			const char* name;
6440 			const char* description;
6441 			deUint32	usage;
6442 		} usages[] =
6443 		{
6444 			{ "static_draw",	"Test with GL_STATIC_DRAW",		GL_STATIC_DRAW	},
6445 			{ "dynamic_draw",	"Test with GL_DYNAMIC_DRAW",	GL_DYNAMIC_DRAW	},
6446 			{ "stream_draw",	"Test with GL_STREAM_DRAW",		GL_STREAM_DRAW },
6447 
6448 		};
6449 
6450 		for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6451 		{
6452 			tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6453 			modifyAfterUseGroup->addChild(usageGroup);
6454 
6455 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data",							"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6456 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_different_size",			"Respecify buffer contents and size after use",			minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6457 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_repeated",					"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6458 
6459 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6460 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial",				"Respecify buffer contents partially use",				minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6461 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full_repeated",		"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
6462 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial_repeated",		"Respecify buffer contents partially upload and use",	minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6463 
6464 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT));
6465 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_partial",				"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT));
6466 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_full",				"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6467 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_partial",			"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6468 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6469 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6470 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6471 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6472 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6473 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6474 
6475 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6476 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6477 		}
6478 	}
6479 
6480 	// .render_after_upload
6481 	{
6482 		// .reference
6483 		{
6484 			tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
6485 			renderAfterUploadGroup->addChild(renderReferenceGroup);
6486 
6487 			// .draw
6488 			{
6489 				tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
6490 				renderReferenceGroup->addChild(drawGroup);
6491 
6492 				// Time consumed by readPixels
6493 				drawGroup->addChild(new ReferenceReadPixelsTimeCase	(m_context, "read_pixels",		"Measure time consumed by readPixels() function call"));
6494 
6495 				// Time consumed by rendering
6496 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_arrays",		"Measure time consumed by drawArrays() function call",		DRAWMETHOD_DRAW_ARRAYS));
6497 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_elements",	"Measure time consumed by drawElements() function call",	DRAWMETHOD_DRAW_ELEMENTS));
6498 			}
6499 
6500 			// .draw_upload_draw
6501 			{
6502 				static const struct
6503 				{
6504 					const char*		name;
6505 					const char*		description;
6506 					DrawMethod		drawMethod;
6507 					TargetBuffer	targetBuffer;
6508 					bool			partial;
6509 				} uploadTargets[] =
6510 				{
6511 					{
6512 						"draw_arrays_upload_vertices",
6513 						"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6514 						DRAWMETHOD_DRAW_ARRAYS,
6515 						TARGETBUFFER_VERTEX,
6516 						false
6517 					},
6518 					{
6519 						"draw_arrays_upload_vertices_partial",
6520 						"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6521 						DRAWMETHOD_DRAW_ARRAYS,
6522 						TARGETBUFFER_VERTEX,
6523 						true
6524 					},
6525 					{
6526 						"draw_elements_upload_vertices",
6527 						"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6528 						DRAWMETHOD_DRAW_ELEMENTS,
6529 						TARGETBUFFER_VERTEX,
6530 						false
6531 					},
6532 					{
6533 						"draw_elements_upload_indices",
6534 						"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6535 						DRAWMETHOD_DRAW_ELEMENTS,
6536 						TARGETBUFFER_INDEX,
6537 						false
6538 					},
6539 					{
6540 						"draw_elements_upload_indices_partial",
6541 						"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6542 						DRAWMETHOD_DRAW_ELEMENTS,
6543 						TARGETBUFFER_INDEX,
6544 						true
6545 					},
6546 				};
6547 				static const struct
6548 				{
6549 					const char*							name;
6550 					const char*							description;
6551 					UploadMethod						uploadMethod;
6552 					BufferInUseRenderTimeCase::MapFlags	mapFlags;
6553 					bool								supportsPartialUpload;
6554 				} uploadMethods[] =
6555 				{
6556 					{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6557 					{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6558 					{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6559 					{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6560 				};
6561 
6562 				tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
6563 				renderReferenceGroup->addChild(drawUploadDrawGroup);
6564 
6565 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6566 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6567 				{
6568 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6569 
6570 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6571 						continue;
6572 
6573 					drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6574 																				name.c_str(),
6575 																				uploadTargets[uploadTargetNdx].description,
6576 																				uploadTargets[uploadTargetNdx].drawMethod,
6577 																				uploadMethods[uploadMethodNdx].mapFlags,
6578 																				uploadTargets[uploadTargetNdx].targetBuffer,
6579 																				uploadMethods[uploadMethodNdx].uploadMethod,
6580 																				(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6581 																				BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
6582 				}
6583 			}
6584 		}
6585 
6586 		// .upload_unrelated_and_draw
6587 		{
6588 			static const struct
6589 			{
6590 				const char*		name;
6591 				const char*		description;
6592 				DrawMethod		drawMethod;
6593 			} drawMethods[] =
6594 			{
6595 				{ "draw_arrays",	"drawArrays",	DRAWMETHOD_DRAW_ARRAYS		},
6596 				{ "draw_elements",	"drawElements",	DRAWMETHOD_DRAW_ELEMENTS	},
6597 			};
6598 
6599 			static const struct
6600 			{
6601 				const char*		name;
6602 				UploadMethod	uploadMethod;
6603 			} uploadMethods[] =
6604 			{
6605 				{ "buffer_data",		UPLOADMETHOD_BUFFER_DATA		},
6606 				{ "buffer_sub_data",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6607 				{ "map_buffer_range",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6608 			};
6609 
6610 			tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
6611 			renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
6612 
6613 			for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
6614 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6615 			{
6616 				const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
6617 				const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";
6618 
6619 				// Time consumed by rendering command after an unrelated upload
6620 
6621 				uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
6622 			}
6623 		}
6624 
6625 		// .upload_and_draw
6626 		{
6627 			static const struct
6628 			{
6629 				const char*			name;
6630 				const char*			description;
6631 				BufferState			bufferState;
6632 				UnrelatedBufferType	unrelatedBuffer;
6633 				bool				supportsPartialUpload;
6634 			} bufferConfigs[] =
6635 			{
6636 				{ "used_buffer",						"Upload to an used buffer",											BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_NONE,	true	},
6637 				{ "new_buffer",							"Upload to a new buffer",											BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_NONE,	false	},
6638 				{ "used_buffer_and_unrelated_upload",	"Upload to an used buffer and an unrelated buffer and then draw",	BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_VERTEX,	true	},
6639 				{ "new_buffer_and_unrelated_upload",	"Upload to a new buffer and an unrelated buffer and then draw",		BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_VERTEX,	false	},
6640 			};
6641 
6642 			tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
6643 			renderAfterUploadGroup->addChild(uploadAndDrawGroup);
6644 
6645 			// .used_buffer
6646 			// .new_buffer
6647 			// .used_buffer_and_unrelated_upload
6648 			// .new_buffer_and_unrelated_upload
6649 			for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
6650 			{
6651 				static const struct
6652 				{
6653 					const char*		name;
6654 					const char*		description;
6655 					DrawMethod		drawMethod;
6656 					TargetBuffer	targetBuffer;
6657 					bool			partial;
6658 				} uploadTargets[] =
6659 				{
6660 					{
6661 						"draw_arrays_upload_vertices",
6662 						"Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
6663 						DRAWMETHOD_DRAW_ARRAYS,
6664 						TARGETBUFFER_VERTEX,
6665 						false
6666 					},
6667 					{
6668 						"draw_arrays_upload_vertices_partial",
6669 						"Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
6670 						DRAWMETHOD_DRAW_ARRAYS,
6671 						TARGETBUFFER_VERTEX,
6672 						true
6673 					},
6674 					{
6675 						"draw_elements_upload_vertices",
6676 						"Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
6677 						DRAWMETHOD_DRAW_ELEMENTS,
6678 						TARGETBUFFER_VERTEX,
6679 						false
6680 					},
6681 					{
6682 						"draw_elements_upload_indices",
6683 						"Measure time consumed by index upload, drawElements, and readPixels function calls",
6684 						DRAWMETHOD_DRAW_ELEMENTS,
6685 						TARGETBUFFER_INDEX,
6686 						false
6687 					},
6688 					{
6689 						"draw_elements_upload_indices_partial",
6690 						"Measure time consumed by partial index upload, drawElements, and readPixels function calls",
6691 						DRAWMETHOD_DRAW_ELEMENTS,
6692 						TARGETBUFFER_INDEX,
6693 						true
6694 					},
6695 				};
6696 				static const struct
6697 				{
6698 					const char*		name;
6699 					const char*		description;
6700 					UploadMethod	uploadMethod;
6701 					bool			supportsPartialUpload;
6702 				} uploadMethods[] =
6703 				{
6704 					{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA,		false	},
6705 					{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	true	},
6706 					{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	true	},
6707 				};
6708 
6709 				tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
6710 				uploadAndDrawGroup->addChild(group);
6711 
6712 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6713 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6714 				{
6715 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6716 
6717 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6718 						continue;
6719 					if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
6720 						continue;
6721 
6722 					// Don't log unrelated buffer information to samples if there is no such buffer
6723 
6724 					if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
6725 					{
6726 						typedef UploadRenderReadDuration				SampleType;
6727 						typedef GenericUploadRenderTimeCase<SampleType>	TestType;
6728 
6729 						group->addChild(new TestType(m_context,
6730 													 name.c_str(),
6731 													 uploadTargets[uploadTargetNdx].description,
6732 													 uploadTargets[uploadTargetNdx].drawMethod,
6733 													 uploadTargets[uploadTargetNdx].targetBuffer,
6734 													 uploadMethods[uploadMethodNdx].uploadMethod,
6735 													 bufferConfigs[stateNdx].bufferState,
6736 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6737 													 bufferConfigs[stateNdx].unrelatedBuffer));
6738 					}
6739 					else
6740 					{
6741 						typedef UploadRenderReadDurationWithUnrelatedUploadSize	SampleType;
6742 						typedef GenericUploadRenderTimeCase<SampleType>			TestType;
6743 
6744 						group->addChild(new TestType(m_context,
6745 													 name.c_str(),
6746 													 uploadTargets[uploadTargetNdx].description,
6747 													 uploadTargets[uploadTargetNdx].drawMethod,
6748 													 uploadTargets[uploadTargetNdx].targetBuffer,
6749 													 uploadMethods[uploadMethodNdx].uploadMethod,
6750 													 bufferConfigs[stateNdx].bufferState,
6751 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6752 													 bufferConfigs[stateNdx].unrelatedBuffer));
6753 					}
6754 				}
6755 			}
6756 		}
6757 
6758 		// .draw_modify_draw
6759 		{
6760 			static const struct
6761 			{
6762 				const char*		name;
6763 				const char*		description;
6764 				DrawMethod		drawMethod;
6765 				TargetBuffer	targetBuffer;
6766 				bool			partial;
6767 			} uploadTargets[] =
6768 			{
6769 				{
6770 					"draw_arrays_upload_vertices",
6771 					"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6772 					DRAWMETHOD_DRAW_ARRAYS,
6773 					TARGETBUFFER_VERTEX,
6774 					false
6775 				},
6776 				{
6777 					"draw_arrays_upload_vertices_partial",
6778 					"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6779 					DRAWMETHOD_DRAW_ARRAYS,
6780 					TARGETBUFFER_VERTEX,
6781 					true
6782 				},
6783 				{
6784 					"draw_elements_upload_vertices",
6785 					"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6786 					DRAWMETHOD_DRAW_ELEMENTS,
6787 					TARGETBUFFER_VERTEX,
6788 					false
6789 				},
6790 				{
6791 					"draw_elements_upload_indices",
6792 					"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6793 					DRAWMETHOD_DRAW_ELEMENTS,
6794 					TARGETBUFFER_INDEX,
6795 					false
6796 				},
6797 				{
6798 					"draw_elements_upload_indices_partial",
6799 					"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6800 					DRAWMETHOD_DRAW_ELEMENTS,
6801 					TARGETBUFFER_INDEX,
6802 					true
6803 				},
6804 			};
6805 			static const struct
6806 			{
6807 				const char*							name;
6808 				const char*							description;
6809 				UploadMethod						uploadMethod;
6810 				BufferInUseRenderTimeCase::MapFlags	mapFlags;
6811 				bool								supportsPartialUpload;
6812 			} uploadMethods[] =
6813 			{
6814 				{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6815 				{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6816 				{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6817 				{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6818 			};
6819 
6820 			tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
6821 			renderAfterUploadGroup->addChild(drawModifyDrawGroup);
6822 
6823 			for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6824 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6825 			{
6826 				const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6827 
6828 				if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6829 					continue;
6830 
6831 				drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6832 																			name.c_str(),
6833 																			uploadTargets[uploadTargetNdx].description,
6834 																			uploadTargets[uploadTargetNdx].drawMethod,
6835 																			uploadMethods[uploadMethodNdx].mapFlags,
6836 																			uploadTargets[uploadTargetNdx].targetBuffer,
6837 																			uploadMethods[uploadMethodNdx].uploadMethod,
6838 																			(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6839 																			BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
6840 			}
6841 		}
6842 
6843 		// .upload_wait_draw
6844 		{
6845 			static const struct
6846 			{
6847 				const char*	name;
6848 				const char*	description;
6849 				BufferState	bufferState;
6850 			} bufferStates[] =
6851 			{
6852 				{ "new_buffer",		"Uploading to just generated name",	BUFFERSTATE_NEW			},
6853 				{ "used_buffer",	"Uploading to a used buffer",		BUFFERSTATE_EXISTING	},
6854 			};
6855 			static const struct
6856 			{
6857 				const char*		name;
6858 				const char*		description;
6859 				DrawMethod		drawMethod;
6860 				TargetBuffer	targetBuffer;
6861 			} uploadTargets[] =
6862 			{
6863 				{ "draw_arrays_vertices",	"Upload vertex data, draw with drawArrays",		DRAWMETHOD_DRAW_ARRAYS,		TARGETBUFFER_VERTEX	},
6864 				{ "draw_elements_vertices",	"Upload vertex data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_VERTEX	},
6865 				{ "draw_elements_indices",	"Upload index data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_INDEX	},
6866 			};
6867 			static const struct
6868 			{
6869 				const char*		name;
6870 				const char*		description;
6871 				UploadMethod	uploadMethod;
6872 			} uploadMethods[] =
6873 			{
6874 				{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA		},
6875 				{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6876 				{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6877 			};
6878 
6879 			tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
6880 			renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
6881 
6882 			for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
6883 			{
6884 				tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
6885 				uploadSwapDrawGroup->addChild(bufferGroup);
6886 
6887 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6888 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6889 				{
6890 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6891 
6892 					bufferGroup->addChild(new UploadWaitDrawCase(m_context,
6893 																 name.c_str(),
6894 																 uploadTargets[uploadTargetNdx].description,
6895 																 uploadTargets[uploadTargetNdx].drawMethod,
6896 																 uploadTargets[uploadTargetNdx].targetBuffer,
6897 																 uploadMethods[uploadMethodNdx].uploadMethod,
6898 																 bufferStates[bufferStateNdx].bufferState));
6899 				}
6900 			}
6901 		}
6902 	}
6903 }
6904 
6905 } // Performance
6906 } // gles3
6907 } // deqp
6908