1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef __DATAGEN_H
17 #define __DATAGEN_H
18
19 #include "harness/compat.h"
20
21 #include <assert.h>
22
23 #include <string>
24 #include <memory>
25 #include <vector>
26 #include <map>
27 #include <fstream>
28 #include <algorithm>
29
30 #include "harness/mt19937.h"
31
32 #include "exceptions.h"
33 #include "kernelargs.h"
34
35 // ESINNS is a short name for EXPLICIT_SPECIALIZATION_IN_NON_NAMESPACE_SCOPE
36
37 #undef ESINNS
38
39 #ifdef __GNUC__
40
41 #define ESINNS
42 #define ESINNS_PREF() inline
43 #define ESINNS_POST() RandomGenerator::
44
45 #else
46
47 #define ESINNS_PREF()
48 #define ESINNS_POST()
49
50 #endif
51
52 #define MAX_WORK_DIM 3
53 #define GLOBAL_WORK_SIZE (((CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(cl_double) / 16) / 2) * 2) // max buffer size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(double16)
54
55 // SPIR definitions for image channel data types (Section 2.1.3.2).
56 #define SPIR_CLK_SNORM_INT8 0x10D0
57 #define SPIR_CLK_SNORM_INT16 0x10D1
58 #define SPIR_CLK_UNORM_INT8 0x10D2
59 #define SPIR_CLK_UNORM_INT16 0x10D3
60 #define SPIR_CLK_UNORM_SHORT_565 0x10D4
61 #define SPIR_CLK_UNORM_SHORT_555 0x10D5
62 #define SPIR_CLK_UNORM_SHORT_101010 0x10D6
63 #define SPIR_CLK_SIGNED_INT8 0x10D7
64 #define SPIR_CLK_SIGNED_INT16 0x10D8
65 #define SPIR_CLK_SIGNED_INT32 0x10D9
66 #define SPIR_CLK_UNSIGNED_INT8 0x10DA
67 #define SPIR_CLK_UNSIGNED_INT16 0x10DB
68 #define SPIR_CLK_UNSIGNED_INT32 0x10DC
69 #define SPIR_CLK_HALF_FLOAT 0x10DD
70 #define SPIR_CLK_FLOAT 0x10DE
71 #define SPIR_CLK_UNORM_INT24 0x10DF
72
73 #define NUM_IMG_FORMATS 64
74
75 double get_random_double(double low, double high, MTdata d);
76 float get_random_float(float low, float high, MTdata d);
77 size_t get_random_size_t(size_t low, size_t high, MTdata d);
78
79 /**
80 Simple container for the work size information
81 */
82 class WorkSizeInfo
83 {
84 public:
85 /**
86 Returns the flat global size
87 */
88 size_t getGlobalWorkSize() const;
89 public:
90 cl_uint work_dim;
91 size_t global_work_offset[MAX_WORK_DIM];
92 size_t global_work_size[MAX_WORK_DIM];
93 size_t local_work_size[MAX_WORK_DIM];
94 };
95
96 /**
97 Generates various types of random numbers
98 */
99 class RandomGenerator
100 {
101 public:
RandomGenerator()102 RandomGenerator():m_d(NULL)
103 {
104 init(0);
105 }
106
~RandomGenerator()107 ~RandomGenerator()
108 {
109 if( NULL != m_d )
110 free_mtdata(m_d);
111 }
112
init(cl_uint seed)113 void init(cl_uint seed)
114 {
115 m_d = init_genrand( seed );
116 }
117
getNext(T low,T high)118 template<class T> T getNext(T low, T high)
119 {
120 assert(false && "Not implemented");
121 return T();
122 }
123
124 #ifdef ESINNS
125
126 private:
127 MTdata m_d;
128 };
129
130 #endif
131
ESINNS_PREF()132 template<> ESINNS_PREF() bool ESINNS_POST()getNext(bool low, bool high)
133 {
134 return (bool)get_random_size_t((size_t)low, (size_t)high, m_d);
135 }
136
ESINNS_PREF()137 template<> ESINNS_PREF() cl_char ESINNS_POST()getNext(cl_char low, cl_char high)
138 {
139 return (cl_char)get_random_size_t((size_t)low, (size_t)high, m_d);
140 }
141
ESINNS_PREF()142 template<> ESINNS_PREF() cl_double ESINNS_POST()getNext(cl_double low, cl_double high)
143 {
144 return get_random_double(low, high, m_d);
145 }
146
ESINNS_PREF()147 template<> ESINNS_PREF() cl_float ESINNS_POST()getNext(cl_float low, cl_float high)
148 {
149 return get_random_float(low, high, m_d);
150 }
151
ESINNS_PREF()152 template<> ESINNS_PREF() cl_int ESINNS_POST()getNext(cl_int low, cl_int high)
153 {
154 return (cl_int)get_random_size_t((size_t)low, (size_t)high, m_d);
155 }
156
ESINNS_PREF()157 template<> ESINNS_PREF() cl_long ESINNS_POST()getNext(cl_long low, cl_long high)
158 {
159 return (cl_long)get_random_size_t((size_t)low, (size_t)high, m_d);
160 }
161
ESINNS_PREF()162 template<> ESINNS_PREF() cl_short ESINNS_POST()getNext(cl_short low, cl_short high)
163 {
164 return (cl_short)get_random_size_t((size_t)low, (size_t)high, m_d);
165 }
166
ESINNS_PREF()167 template<> ESINNS_PREF() cl_uchar ESINNS_POST()getNext(cl_uchar low, cl_uchar high)
168 {
169 return (cl_uchar)get_random_size_t((size_t)low, (size_t)high, m_d);
170 }
171
ESINNS_PREF()172 template<> ESINNS_PREF() cl_uint ESINNS_POST()getNext(cl_uint low, cl_uint high)
173 {
174 return (cl_uint)get_random_size_t((size_t)low, (size_t)high, m_d);
175 }
176
ESINNS_PREF()177 template<> ESINNS_PREF() cl_ulong ESINNS_POST()getNext(cl_ulong low, cl_ulong high)
178 {
179 return (cl_ulong)get_random_size_t((size_t)low, (size_t)high, m_d);
180 }
181
ESINNS_PREF()182 template<> ESINNS_PREF() cl_ushort ESINNS_POST()getNext(cl_ushort low, cl_ushort high)
183 {
184 return (cl_ushort)get_random_size_t((size_t)low, (size_t)high, m_d);
185 }
186
187 #ifndef ESINNS
188
189 private:
190 MTdata m_d;
191 };
192
193 #endif
194
195 extern RandomGenerator gRG;
196
197 /**
198 Base class for kernel argument generator
199 */
200 class KernelArgGenerator
201 {
202 protected:
203 KernelArgGenerator()
204 {}
205
206 public:
207 virtual KernelArg* generate( cl_context context,
208 const WorkSizeInfo& ws,
209 const KernelArgInfo& argInfo,
210 const KernelArg* refArg,
211 const cl_kernel kernel,
212 const cl_device_id device ) = 0;
213 virtual ~KernelArgGenerator() {}
214 };
215
216 /**
217 Mock: 'Not implemented' version of the kernel argument generator - used for the still unsupported types
218 */
219 class KernelArgGeneratorNI: public KernelArgGenerator
220 {
221 public:
222 KernelArgGeneratorNI( bool isBuffer, size_t vectorSize, int minValue, int maxValue )
223 {}
224
225 KernelArg* generate( cl_context context,
226 const WorkSizeInfo& ws,
227 const KernelArgInfo& argInfo,
228 const KernelArg* refArg,
229 const cl_kernel kernel,
230 const cl_device_id device )
231 {
232 //assert(false && "Not implemented");
233 throw Exceptions::TestError("KernelArgGenerator is not implemented\n");
234 }
235 };
236
237 /**
238 Kernel argument generator for images
239 */
240 class KernelArgGeneratorImage: public KernelArgGenerator
241 {
242 public:
243 KernelArgGeneratorImage(bool isBuffer, size_t vectorSize, char minValue, char maxValue) :
244 m_isBuffer(isBuffer),
245 m_vectorSize(vectorSize),
246 m_minValue(minValue),
247 m_maxValue(maxValue)
248 {
249 m_format.image_channel_order = CL_RGBA;
250
251 m_desc.image_width = 32;
252 m_desc.image_height = 1;
253 m_desc.image_depth = 1;
254 m_desc.image_array_size = 1;
255 m_desc.num_mip_levels = 0;
256 m_desc.num_samples = 0;
257 m_desc.buffer = NULL;
258 }
259
260 bool isValidChannelOrder(cl_context context, cl_channel_order order) const
261 {
262 cl_mem_flags flags = CL_MEM_COPY_HOST_PTR;
263 cl_uint actualNumFormats = 0;
264 cl_image_format imgFormat = m_format;
265 imgFormat.image_channel_order = order;
266
267 cl_int error = clGetSupportedImageFormats(
268 context,
269 flags,
270 m_desc.image_type,
271 0,
272 NULL,
273 &actualNumFormats);
274 if (CL_SUCCESS != error)
275 throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
276
277 std::vector<cl_image_format> supportedFormats(actualNumFormats);
278 error = clGetSupportedImageFormats(context, flags, m_desc.image_type,
279 actualNumFormats,
280 supportedFormats.data(), NULL);
281 if (CL_SUCCESS != error)
282 throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
283
284 for (size_t i=0; i<actualNumFormats; ++i)
285 {
286 cl_image_format curFormat = supportedFormats[i];
287
288 if(imgFormat.image_channel_order == curFormat.image_channel_order &&
289 imgFormat.image_channel_data_type == curFormat.image_channel_data_type)
290 return true;
291 }
292
293 return false;
294 }
295
296 void setChannelOrder(cl_channel_order order)
297 {
298 m_format.image_channel_order = order;
299 }
300
301 KernelArg* generate(cl_context context,
302 const WorkSizeInfo& ws,
303 const KernelArgInfo& argInfo,
304 const KernelArg* refArg,
305 const cl_kernel kernel,
306 const cl_device_id device)
307 {
308 void * pBuffer = NULL;
309 size_t numPixels = m_desc.image_width * m_desc.image_height * m_desc.image_depth * m_desc.image_array_size;
310 const int alignment = sizeof(cl_int) * 4 ; //RGBA channel size * sizeof (cl_int)
311 size_t allocSize = numPixels * alignment ;
312
313 cl_kernel_arg_access_qualifier accessQ = argInfo.getAccessQualifier();
314
315 cl_mem_flags mem_flags = 0;
316
317 if (accessQ == CL_KERNEL_ARG_ACCESS_READ_ONLY)
318 {
319 mem_flags |= CL_MEM_READ_ONLY;
320 }
321
322 if (accessQ == CL_KERNEL_ARG_ACCESS_WRITE_ONLY)
323 {
324 mem_flags |= CL_MEM_WRITE_ONLY;
325 }
326
327 if (accessQ == CL_KERNEL_ARG_ACCESS_READ_WRITE)
328 {
329 mem_flags |= CL_MEM_READ_WRITE;
330 }
331
332 pBuffer = align_malloc(allocSize, alignment);
333 if (NULL == pBuffer)
334 {
335 throw Exceptions::TestError("align_malloc failed for image\n", 1);
336 }
337 assert( (size_t)pBuffer % alignment == 0 );
338 if (NULL == refArg)
339 {
340 fillBuffer((cl_char *)pBuffer, allocSize );
341 }
342 else {
343 memcpy(pBuffer, refArg->getBuffer(), allocSize );
344 }
345
346 return new KernelArgImage(context, argInfo, pBuffer, allocSize, mem_flags, m_format, m_desc);
347 }
348
349 protected:
350 KernelArgGeneratorImage()
351 {}
352
353 void fillBuffer( cl_char * ptr, size_t nelem)
354 {
355 for( size_t i = 0; i < nelem; ++i )
356 {
357 ptr[i] = gRG.getNext<cl_char>(m_minValue, m_maxValue);
358 }
359 }
360
361 protected:
362 bool m_isBuffer;
363 size_t m_vectorSize;
364 cl_char m_minValue;
365 cl_char m_maxValue;
366 cl_image_format m_format;
367 cl_image_desc m_desc;
368 };
369
370 /**
371 Kernel argument generator for image1d_array
372 */
373 template<cl_channel_type channel_type> class KernelArgGeneratorImage1dArray: public KernelArgGeneratorImage
374 {
375 public:
376 KernelArgGeneratorImage1dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ):
377 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
378 {
379 m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
380 m_format.image_channel_data_type = channel_type;
381
382 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
383 m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
384
385 }
386 };
387
388 /**
389 Kernel argument generator for image1d_buffer
390 */
391 template<cl_channel_type channel_type> class KernelArgGeneratorImage1dBuffer: public KernelArgGeneratorImage
392 {
393 public:
394 KernelArgGeneratorImage1dBuffer( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
395 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
396 {
397 m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
398 m_format.image_channel_data_type = channel_type;
399
400 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
401 // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf 5.2.2;
402 // Slice pitch of 1d images should be zero.
403 m_desc.image_slice_pitch = 0;
404 }
405 };
406
407 /**
408 Kernel argument generator for image1d
409 */
410 template<cl_channel_type channel_type> class KernelArgGeneratorImage1d: public KernelArgGeneratorImage
411 {
412 public:
413 KernelArgGeneratorImage1d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
414 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
415 {
416 m_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
417 m_format.image_channel_data_type = channel_type;
418
419 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
420 // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
421 // '5.3.1.2 image descriptor': Slice pitch is not applicable for one-
422 // dimensional images.
423 m_desc.image_slice_pitch = 0;
424 }
425 };
426
427 /**
428 Kernel argument generator for image2d_array
429 */
430 template<cl_channel_type channel_type> class KernelArgGeneratorImage2dArray: public KernelArgGeneratorImage
431 {
432 public:
433 KernelArgGeneratorImage2dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
434 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
435 {
436 m_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
437 m_format.image_channel_data_type = channel_type;
438
439 m_desc.image_height = 32;
440 m_desc.image_array_size = 8;
441 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
442 m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
443 }
444 };
445
446 /**
447 Kernel argument generator for image2d
448 */
449 template<cl_channel_type channel_type> class KernelArgGeneratorImage2d: public KernelArgGeneratorImage
450 {
451 public:
452 KernelArgGeneratorImage2d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
453 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
454 {
455 m_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
456 m_format.image_channel_data_type = channel_type;
457
458 m_desc.image_height = 32;
459 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
460 // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
461 // '5.3.1.2 image descriptor': Slice pitch is not applicable for two-
462 // dimensional images.
463 m_desc.image_slice_pitch = 0;
464 }
465 };
466
467 /**
468 Kernel argument generator for image3d
469 */
470 template<cl_channel_type channel_type> class KernelArgGeneratorImage3d: public KernelArgGeneratorImage
471 {
472 public:
473 KernelArgGeneratorImage3d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
474 KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
475 {
476 m_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
477 m_format.image_channel_data_type = channel_type;
478
479 m_desc.image_height = 32;
480 m_desc.image_depth = 8;
481 m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
482 m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
483 }
484 };
485
486 /**
487 Kernel argument generator for samplers
488 */
489 class KernelArgGeneratorSampler: public KernelArgGenerator
490 {
491 public:
492 KernelArgGeneratorSampler(bool isBuffer, size_t vectorSize, int minValue, int maxValue);
493
494 KernelArgGeneratorSampler();
495
496 /*
497 * Sampler property setters.
498 */
499 void setNormalized(cl_bool);
500 void setAddressingMode(cl_addressing_mode);
501 void setFiterMode(cl_filter_mode);
502
503 KernelArg* generate(cl_context context,
504 const WorkSizeInfo& ws,
505 const KernelArgInfo& argInfo,
506 const KernelArg* refArg,
507 const cl_kernel kernel,
508 const cl_device_id device)
509 {
510 return new KernelArgSampler(context, m_normalized, m_addressingMode, m_filterMode);
511 }
512 private:
513 void initToDefaults();
514
515 cl_bool m_normalized;
516 cl_addressing_mode m_addressingMode;
517 cl_filter_mode m_filterMode;
518 };
519
520 /*
521 * Generates all the possible values for image samplers.
522 */
523 class SamplerValuesGenerator
524 {
525 public:
526 class iterator {
527 friend class SamplerValuesGenerator;
528
529 size_t m_normIndex, m_filterIndex, m_addressingModeIndex;
530
531 iterator(size_t norm, size_t filter, size_t addressing);
532
533 bool incrementIndex(size_t &i, const size_t limit);
534 public:
535 iterator();
536
537 /*
538 * Moves the iterator to the next sampler value.
539 */
540 iterator& operator ++();
541
542 bool operator == (const iterator& other) const;
543
544 bool operator != (const iterator& other) const;
545
546 cl_bool getNormalized() const;
547
548 cl_filter_mode getFilterMode() const;
549
550 cl_addressing_mode getAddressingMode() const;
551
552 /*
553 * Converts the value of the sampler to a bitmask representation.
554 */
555 unsigned toBitmap() const;
556
557 /*
558 * Retruns a string representation of the sampler.
559 */
560 std::string toString() const;
561 };
562
563 iterator begin() { return iterator(); }
564
565 iterator end();
566
567 static cl_bool coordNormalizations[];
568 static cl_filter_mode filterModes[];
569 static cl_addressing_mode addressingModes[];
570 };
571
572 typedef struct struct_type {
573 cl_float float4d[4];
574 cl_int intd;
575 } typedef_struct_type;
576
577 typedef struct {
578 cl_int width;
579 cl_int channelType;
580 cl_int channelOrder;
581 cl_int expectedChannelType;
582 cl_int expectedChannelOrder;
583 } image_kernel_data;
584
585 typedef struct testStruct {
586 cl_double vec[16];
587 } testStruct;
588
589 typedef struct {
590 cl_uint workDim;
591 cl_uint globalSize[3];
592 cl_uint globalID[3];
593 cl_uint localSize[3];
594 cl_uint localID[3];
595 cl_uint numGroups[3];
596 cl_uint groupID[3];
597 } work_item_data;
598
599 /**
600 Kernel argument generator for structure "struct_type"
601
602 Kernel argument generator for structure "image_kernel_data"
603
604 Kernel argument generator for structure "testStruct"
605 Since there are many "testStruct", we define it to have maximum space
606 Also the alignment is done following the "worst" case
607
608 Kernel argument generator for structure "work_item_data"
609 */
610
611 template<typename T> class KernelStructTypeArgGenerator: public KernelArgGenerator
612 {
613
614 public:
615 KernelStructTypeArgGenerator( bool isBuffer, size_t vectorSize, cl_int minValue, cl_int maxValue ):
616 m_isBuffer(isBuffer),
617 m_vectorSize(vectorSize),
618 m_alignment(0),
619 m_size(0)
620 {}
621
622 KernelArg* generate( cl_context context,
623 const WorkSizeInfo& ws,
624 const KernelArgInfo& argInfo,
625 const KernelArg* refArg,
626 const cl_kernel kernel,
627 const cl_device_id device )
628 {
629 T *pStruct = NULL;
630
631 calcSizeAndAlignment(pStruct);
632 size_t size = m_size;
633
634 if( m_isBuffer )
635 {
636 cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
637
638 if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
639 {
640 if ( (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size < m_size )
641 size=(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size;
642 }
643
644 if( CL_KERNEL_ARG_ADDRESS_GLOBAL == addrQ ||
645 CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
646 {
647 size_t no_e = ws.getGlobalWorkSize();
648 size = no_e * m_size;
649 pStruct = (T *)align_malloc(size, m_alignment);
650 if (NULL == pStruct)
651 {
652 throwExceptions(pStruct);
653 }
654 assert( (size_t)pStruct % m_alignment == 0 );
655 if (NULL == refArg)
656 {
657 fillBuffer(pStruct, no_e);
658 }
659 else {
660 memcpy(pStruct, refArg->getBuffer(), size);
661 }
662 }
663 return new KernelArgBuffer( context, argInfo, (void*)pStruct, size);
664 }
665 else {
666 pStruct = (T *)align_malloc(m_size, m_alignment);
667 if (NULL == pStruct)
668 {
669 throwExceptions(pStruct);
670 }
671 assert( (size_t)pStruct % m_alignment == 0 );
672 if (NULL == refArg)
673 {
674 fillBuffer(pStruct, 1);
675 }
676 else {
677 memcpy(pStruct, refArg->getBuffer(), m_size);
678 }
679
680 return new KernelArg( argInfo, (void*)pStruct, m_size);
681 }
682 }
683 private:
684
685 std::string getTypeString(typedef_struct_type *pStruct)
686 {
687 return "typedef_struct_type";
688 }
689
690 std::string getTypeString(image_kernel_data *pStruct)
691 {
692 return "image_kernel_data";
693 }
694
695 std::string getTypeString(testStruct *pStruct)
696 {
697 return "testStruct";
698 }
699
700 std::string getTypeString(work_item_data *pStruct)
701 {
702 return "work_item_data";
703 }
704
705 void throwExceptions(T * pStruct)
706 {
707 std::string str = "align_malloc failed for " ;
708 if (m_isBuffer)
709 str += "array of " ;
710 str += getTypeString(pStruct) ;
711 throw Exceptions::TestError(str, 1);
712 }
713
714 void fillBuffer( typedef_struct_type *pStruct, size_t no_e )
715 {
716 for (size_t e = 0; e < no_e; ++e)
717 {
718 for( size_t i = 0; i < 4; ++i )
719 {
720 pStruct[e].float4d[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
721 }
722 pStruct[e].intd = gRG.getNext<cl_int>(0, 0x7fffffff);
723 }
724 }
725
726 void fillBuffer( image_kernel_data *pStruct, size_t no_e )
727 {
728 for (size_t e = 0; e < no_e; ++e)
729 {
730 pStruct[e].width = gRG.getNext<cl_int>(0, 0x7fffffff);
731 pStruct[e].channelType = gRG.getNext<cl_int>(0, 0x7fffffff);
732 pStruct[e].channelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
733 pStruct[e].expectedChannelType = gRG.getNext<cl_int>(0, 0x7fffffff);
734 pStruct[e].expectedChannelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
735 }
736 }
737
738 void fillBuffer( testStruct *pStruct, size_t no_e )
739 {
740 for (size_t e = 0; e < no_e; ++e)
741 {
742 for( size_t i = 0; i < 16; ++i )
743 {
744 pStruct[e].vec[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
745 }
746 }
747 }
748
749 void fillBuffer( work_item_data *pStruct, size_t no_e )
750 {
751 for (size_t e = 0; e < no_e; ++e)
752 {
753 memset(&pStruct[e], 0, sizeof(work_item_data));
754 }
755 }
756
757 // structure alignment is derived from the size of the larger field in it
758 // size of the structure is the size of the largest field multiple by the number of fields
759
760 void calcSizeAndAlignment(typedef_struct_type *pStruct)
761 {
762 m_alignment = sizeof(cl_float) * 4;
763 m_size = m_alignment * 2 ;
764 }
765
766 void calcSizeAndAlignment(image_kernel_data *pStruct)
767 {
768 m_alignment = sizeof(cl_int);
769 m_size = sizeof(image_kernel_data) ;
770 }
771
772 void calcSizeAndAlignment(testStruct *pStruct)
773 {
774 m_alignment = sizeof(cl_double) * 16;
775 m_size = m_alignment;
776 }
777
778 void calcSizeAndAlignment(work_item_data *pStruct)
779 {
780 m_alignment = sizeof(cl_uint);
781 m_size = sizeof(work_item_data);
782 }
783
784 private:
785 bool m_isBuffer;
786 size_t m_vectorSize;
787 int m_alignment;
788 size_t m_size;
789 };
790
791 /**
792 Kernel argument generator for the simple scalar and vector types
793 */
794 template <class T> class KernelArgGeneratorT: public KernelArgGenerator
795 {
796 public:
797 KernelArgGeneratorT( bool isBuffer, size_t vectorSize, T minValue, T maxValue ):
798 m_isBuffer(isBuffer),
799 m_vectorSize(vectorSize),
800 m_minValue(minValue),
801 m_maxValue(maxValue)
802 {}
803
804 KernelArg* generate( cl_context context,
805 const WorkSizeInfo& ws,
806 const KernelArgInfo& argInfo,
807 const KernelArg* refArg,
808 const cl_kernel kernel,
809 const cl_device_id device )
810 {
811 T* pBuffer = NULL;
812 size_t size = 0;
813 int alignment, error;
814 cl_ulong totalDeviceLocalMem;
815 cl_ulong localMemUsedByKernel;
816 cl_uint numArgs, numLocalArgs = 0;
817 KernelArgInfo kernel_arg_info;
818
819 error = CL_SUCCESS;
820
821 // take care of 3-elements vector's alignment issue:
822 // if 3-elements vector - the alignment is 4-elements
823 if (m_vectorSize == 3)
824 alignment = sizeof(T) * 4;
825 else
826 alignment = sizeof(T) * m_vectorSize;
827
828 // gather information about the kernel and device
829 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalDeviceLocalMem), &totalDeviceLocalMem, NULL);
830 clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(localMemUsedByKernel), &localMemUsedByKernel, NULL);
831 clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
832
833 // Calculate the number of local memory arguments
834 for (cl_uint i = 0; i < numArgs; i ++)
835 {
836 error = clGetKernelArgInfo( kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(cl_kernel_arg_address_qualifier), kernel_arg_info.getAddressQualifierRef(), &size);
837 if( error != CL_SUCCESS )
838 {
839 throw Exceptions::TestError("Unable to get argument address qualifier\n", error);
840 }
841
842 if(kernel_arg_info.getAddressQualifier() == CL_KERNEL_ARG_ADDRESS_LOCAL)
843 {
844 numLocalArgs ++;
845 }
846 }
847
848 // reduce the amount of local memory by the amount the kernel + implementation uses
849 totalDeviceLocalMem -= localMemUsedByKernel;
850
851 if( m_isBuffer )
852 {
853 cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
854
855 // decide about the buffer size - take into account the alignment and padding
856 size = ws.getGlobalWorkSize() * alignment;
857
858 // reduce the size of the buffer for local memory
859 if (numLocalArgs &&
860 size > floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs)) &&
861 addrQ == CL_KERNEL_ARG_ADDRESS_LOCAL)
862 {
863 size = floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs));
864 }
865
866 if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
867 {
868 if ( CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE < size )
869 size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE;
870 }
871
872 if( CL_KERNEL_ARG_ADDRESS_GLOBAL == addrQ ||
873 CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
874 {
875 pBuffer = (T *)align_malloc(size, alignment);
876 if (NULL == pBuffer)
877 {
878 throw Exceptions::TestError("align_malloc failed for array buffer\n", 1);
879 }
880 assert( (size_t)pBuffer % alignment == 0 );
881 if (NULL == refArg)
882 {
883 fillBuffer(pBuffer, size / sizeof(T));
884 }
885 else {
886 memcpy(pBuffer, refArg->getBuffer(), size);
887 }
888 }
889 return new KernelArgBuffer( context, argInfo, (void*)pBuffer, size);
890 }
891 else
892 {
893 if (m_vectorSize == 3)
894 size = sizeof(T) * 4;
895 else
896 size = sizeof(T) * m_vectorSize;
897
898 pBuffer = (T *)align_malloc(size, alignment);
899 if (NULL == pBuffer)
900 {
901 throw Exceptions::TestError("align_malloc failed for pBuffer\n", 1);
902 }
903 assert( (size_t)pBuffer % alignment == 0 );
904 if (NULL == refArg)
905 {
906 fillBuffer(pBuffer, m_vectorSize);
907 }
908 else {
909 memcpy(pBuffer, refArg->getBuffer(), size);
910 }
911 return new KernelArg( argInfo, (void*)pBuffer, size);
912 }
913 }
914 private:
915 void fillBuffer( T* buffer, size_t nelem)
916 {
917 for( size_t i = 0; i < nelem; ++i )
918 {
919 buffer[i] = gRG.getNext<T>(m_minValue, m_maxValue);
920 }
921 }
922
923 private:
924 bool m_isBuffer;
925 size_t m_vectorSize;
926 T m_minValue;
927 T m_maxValue;
928 };
929
930 /**
931 General facade for the kernel arguments generation functionality.
932 */
933 class DataGenerator
934 {
935 public:
936 static DataGenerator* getInstance();
937
938 ~DataGenerator();
939
940 KernelArg* generateKernelArg(cl_context context,
941 const KernelArgInfo& argInfo,
942 const WorkSizeInfo& ws,
943 const KernelArg* refArg,
944 const cl_kernel kernel,
945 const cl_device_id device)
946 {
947 KernelArgGenerator* pArgGenerator = getArgGenerator(argInfo);
948 return pArgGenerator->generate(context, ws, argInfo, refArg, kernel, device);
949 }
950
951 /*
952 * Gets the generator associated to the given key.
953 */
954 KernelArgGenerator* getArgGenerator(const KernelArgInfo& argInfo);
955
956 /*
957 * Sets the entry associated to the given key, with the given prototype
958 * generator.
959 */
960 void setArgGenerator(const KernelArgInfo& key, KernelArgGenerator* gen);
961
962 private:
963 DataGenerator();
964
965 static DataGenerator *Instance;
966
967 typedef std::map<std::string, KernelArgGenerator*> ArgGeneratorsMap;
968 ArgGeneratorsMap m_argGenerators;
969 };
970
971 class ImageValuesGenerator
972 {
973 public:
974 class iterator
975 {
976 friend class ImageValuesGenerator;
977 public:
978 /*
979 * Iterator operators.
980 */
981 iterator& operator ++();
982 bool operator == (const iterator&) const;
983 bool operator != (const iterator&) const;
984 /*
985 * Returns the name of the basic image type (e.g., image2d_t).
986 */
987 std::string getImageTypeName() const;
988
989 /*
990 * Returns the name of the genrator that generates images of this type
991 * (e.g., imaget2d_float).
992 */
993 std::string getImageGeneratorName() const;
994
995 /*
996 * Returns the name of the genrator that generates images of the 'base'
997 * type (e.g., imaget2d_t).
998 */
999 std::string getBaseImageGeneratorName() const;
1000
1001 /*
1002 * Returns the OpenCL enumeration for the channel order of the image
1003 * object this iterator creates.
1004 */
1005 int getOpenCLChannelOrder() const;
1006
1007 /*
1008 * Returns the SPIR enumeration for the channel order of the image
1009 * object this iterator creates.
1010 */
1011 int getSPIRChannelOrder() const;
1012
1013 /*
1014 * Returns the data type of the image object this iterator creates. (e.g.,
1015 * cl_float, cl_int).
1016 */
1017 int getDataType() const;
1018
1019 /*
1020 * Returns the data type of the image object this iterator creates. (e.g.,
1021 * float, int), in string format.
1022 */
1023 std::string getDataTypeName() const;
1024
1025 std::string toString() const;
1026 private:
1027 /*
1028 * Constructor for creating a 'begin' iterator.
1029 */
1030 iterator(ImageValuesGenerator*);
1031 /*
1032 * Constructor for creating an 'end' iterator.
1033 */
1034 iterator(int);
1035 /*
1036 * Increments the given argument up to the given limit.
1037 * In case the new value reaches the limit, the index is reset to hold zero.
1038 * Returns: true if the value of the index was incremented, false if it was reset
1039 * to zero.
1040 */
1041 bool incrementIndex(size_t& index, size_t limit);
1042
1043 /*
1044 * Returns true is the index combination of this iterator is legal,
1045 * or false otherwise.
1046 */
1047 bool isLegalCombination() const;
1048
1049 ImageValuesGenerator* m_parent;
1050 size_t m_channelIndex, m_imgTyIndex;
1051 }; //End iterator.
1052
1053 iterator begin();
1054 iterator end();
1055
1056 static cl_channel_order channelOrders[];
1057 static const char* imageTypes[];
1058 private:
1059 WorkSizeInfo m_wsInfo;
1060 };
1061
1062 #endif
1063