1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef __DATAGEN_H
17 #define __DATAGEN_H
18 
19 #include "harness/compat.h"
20 
21 #include <assert.h>
22 
23 #include <string>
24 #include <memory>
25 #include <vector>
26 #include <map>
27 #include <fstream>
28 #include <algorithm>
29 
30 #include "harness/mt19937.h"
31 
32 #include "exceptions.h"
33 #include "kernelargs.h"
34 
35 // ESINNS is a short name for EXPLICIT_SPECIALIZATION_IN_NON_NAMESPACE_SCOPE
36 
37 #undef ESINNS
38 
39 #ifdef __GNUC__
40 
41 #define ESINNS
42 #define ESINNS_PREF() inline
43 #define ESINNS_POST() RandomGenerator::
44 
45 #else
46 
47 #define ESINNS_PREF()
48 #define ESINNS_POST()
49 
50 #endif
51 
52 #define MAX_WORK_DIM        3
53 #define GLOBAL_WORK_SIZE    (((CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(cl_double) / 16) / 2) * 2)            // max buffer size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(double16)
54 
55 // SPIR definitions for image channel data types (Section 2.1.3.2).
56 #define SPIR_CLK_SNORM_INT8         0x10D0
57 #define SPIR_CLK_SNORM_INT16        0x10D1
58 #define SPIR_CLK_UNORM_INT8         0x10D2
59 #define SPIR_CLK_UNORM_INT16        0x10D3
60 #define SPIR_CLK_UNORM_SHORT_565    0x10D4
61 #define SPIR_CLK_UNORM_SHORT_555    0x10D5
62 #define SPIR_CLK_UNORM_SHORT_101010 0x10D6
63 #define SPIR_CLK_SIGNED_INT8        0x10D7
64 #define SPIR_CLK_SIGNED_INT16       0x10D8
65 #define SPIR_CLK_SIGNED_INT32       0x10D9
66 #define SPIR_CLK_UNSIGNED_INT8      0x10DA
67 #define SPIR_CLK_UNSIGNED_INT16     0x10DB
68 #define SPIR_CLK_UNSIGNED_INT32     0x10DC
69 #define SPIR_CLK_HALF_FLOAT         0x10DD
70 #define SPIR_CLK_FLOAT              0x10DE
71 #define SPIR_CLK_UNORM_INT24        0x10DF
72 
73 #define NUM_IMG_FORMATS 64
74 
75 double get_random_double(double low, double high, MTdata d);
76 float get_random_float(float low, float high, MTdata d);
77 size_t get_random_size_t(size_t low, size_t high, MTdata d);
78 
79 /**
80  Simple container for the work size information
81  */
82 class WorkSizeInfo
83 {
84 public:
85     /**
86       Returns the flat global size
87       */
88     size_t getGlobalWorkSize() const;
89 public:
90     cl_uint work_dim;
91     size_t  global_work_offset[MAX_WORK_DIM];
92     size_t  global_work_size[MAX_WORK_DIM];
93     size_t  local_work_size[MAX_WORK_DIM];
94 };
95 
96 /**
97  Generates various types of random numbers
98  */
99 class RandomGenerator
100 {
101 public:
RandomGenerator()102     RandomGenerator():m_d(NULL)
103     {
104        init(0);
105     }
106 
~RandomGenerator()107     ~RandomGenerator()
108     {
109         if( NULL != m_d )
110             free_mtdata(m_d);
111     }
112 
init(cl_uint seed)113     void init(cl_uint seed)
114     {
115         m_d = init_genrand( seed );
116     }
117 
getNext(T low,T high)118     template<class T> T getNext(T low, T high)
119     {
120         assert(false && "Not implemented");
121         return T();
122     }
123 
124 #ifdef ESINNS
125 
126 private:
127     MTdata m_d;
128 };
129 
130 #endif
131 
ESINNS_PREF()132 template<> ESINNS_PREF() bool ESINNS_POST()getNext(bool low, bool high)
133 {
134     return (bool)get_random_size_t((size_t)low, (size_t)high, m_d);
135 }
136 
ESINNS_PREF()137 template<> ESINNS_PREF() cl_char ESINNS_POST()getNext(cl_char low, cl_char high)
138 {
139     return (cl_char)get_random_size_t((size_t)low, (size_t)high, m_d);
140 }
141 
ESINNS_PREF()142 template<> ESINNS_PREF() cl_double ESINNS_POST()getNext(cl_double low, cl_double high)
143 {
144     return get_random_double(low, high, m_d);
145 }
146 
ESINNS_PREF()147 template<> ESINNS_PREF() cl_float ESINNS_POST()getNext(cl_float low, cl_float high)
148 {
149     return get_random_float(low, high, m_d);
150 }
151 
ESINNS_PREF()152 template<> ESINNS_PREF() cl_int ESINNS_POST()getNext(cl_int low, cl_int high)
153 {
154     return (cl_int)get_random_size_t((size_t)low, (size_t)high, m_d);
155 }
156 
ESINNS_PREF()157 template<> ESINNS_PREF() cl_long ESINNS_POST()getNext(cl_long low, cl_long high)
158 {
159     return (cl_long)get_random_size_t((size_t)low, (size_t)high, m_d);
160 }
161 
ESINNS_PREF()162 template<> ESINNS_PREF() cl_short ESINNS_POST()getNext(cl_short low, cl_short high)
163 {
164     return (cl_short)get_random_size_t((size_t)low, (size_t)high, m_d);
165 }
166 
ESINNS_PREF()167 template<> ESINNS_PREF() cl_uchar ESINNS_POST()getNext(cl_uchar low, cl_uchar high)
168 {
169     return (cl_uchar)get_random_size_t((size_t)low, (size_t)high, m_d);
170 }
171 
ESINNS_PREF()172 template<> ESINNS_PREF() cl_uint ESINNS_POST()getNext(cl_uint low, cl_uint high)
173 {
174     return (cl_uint)get_random_size_t((size_t)low, (size_t)high, m_d);
175 }
176 
ESINNS_PREF()177 template<> ESINNS_PREF() cl_ulong ESINNS_POST()getNext(cl_ulong low, cl_ulong high)
178 {
179     return (cl_ulong)get_random_size_t((size_t)low, (size_t)high, m_d);
180 }
181 
ESINNS_PREF()182 template<> ESINNS_PREF() cl_ushort ESINNS_POST()getNext(cl_ushort low, cl_ushort high)
183 {
184     return (cl_ushort)get_random_size_t((size_t)low, (size_t)high, m_d);
185 }
186 
187 #ifndef ESINNS
188 
189 private:
190     MTdata m_d;
191 };
192 
193 #endif
194 
195 extern RandomGenerator gRG;
196 
197 /**
198  Base class for kernel argument generator
199  */
200 class KernelArgGenerator
201 {
202 protected:
203     KernelArgGenerator()
204     {}
205 
206 public:
207     virtual KernelArg* generate( cl_context context,
208                                  const WorkSizeInfo& ws,
209                                  const KernelArgInfo& argInfo,
210                                  const KernelArg* refArg,
211                                  const cl_kernel kernel,
212                                  const cl_device_id device ) = 0;
213     virtual ~KernelArgGenerator() {}
214 };
215 
216 /**
217  Mock: 'Not implemented' version of the kernel argument generator - used for the still unsupported types
218  */
219 class KernelArgGeneratorNI: public KernelArgGenerator
220 {
221 public:
222     KernelArgGeneratorNI( bool isBuffer, size_t vectorSize, int minValue, int maxValue )
223     {}
224 
225     KernelArg* generate( cl_context context,
226                          const WorkSizeInfo& ws,
227                          const KernelArgInfo& argInfo,
228                          const KernelArg* refArg,
229                          const cl_kernel kernel,
230                          const cl_device_id device )
231     {
232         //assert(false && "Not implemented");
233         throw Exceptions::TestError("KernelArgGenerator is not implemented\n");
234     }
235 };
236 
237 /**
238  Kernel argument generator for images
239  */
240 class KernelArgGeneratorImage: public KernelArgGenerator
241 {
242 public:
243     KernelArgGeneratorImage(bool isBuffer, size_t vectorSize, char minValue, char maxValue) :
244         m_isBuffer(isBuffer),
245         m_vectorSize(vectorSize),
246         m_minValue(minValue),
247         m_maxValue(maxValue)
248     {
249         m_format.image_channel_order = CL_RGBA;
250 
251         m_desc.image_width = 32;
252         m_desc.image_height = 1;
253         m_desc.image_depth = 1;
254         m_desc.image_array_size = 1;
255         m_desc.num_mip_levels = 0;
256         m_desc.num_samples = 0;
257         m_desc.buffer = NULL;
258     }
259 
260     bool isValidChannelOrder(cl_context context, cl_channel_order order) const
261     {
262         cl_mem_flags flags = CL_MEM_COPY_HOST_PTR;
263         cl_uint actualNumFormats = 0;
264         cl_image_format imgFormat = m_format;
265         imgFormat.image_channel_order = order;
266 
267         cl_int error = clGetSupportedImageFormats(
268             context,
269             flags,
270             m_desc.image_type,
271             0,
272             NULL,
273             &actualNumFormats);
274         if (CL_SUCCESS != error)
275             throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
276 
277         std::vector<cl_image_format> supportedFormats(actualNumFormats);
278         error = clGetSupportedImageFormats(context, flags, m_desc.image_type,
279                                            actualNumFormats,
280                                            supportedFormats.data(), NULL);
281         if (CL_SUCCESS != error)
282             throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
283 
284         for (size_t i=0; i<actualNumFormats; ++i)
285         {
286             cl_image_format curFormat = supportedFormats[i];
287 
288             if(imgFormat.image_channel_order == curFormat.image_channel_order &&
289                imgFormat.image_channel_data_type == curFormat.image_channel_data_type)
290                return true;
291         }
292 
293         return false;
294     }
295 
296     void setChannelOrder(cl_channel_order order)
297     {
298         m_format.image_channel_order = order;
299     }
300 
301     KernelArg* generate(cl_context context,
302                         const WorkSizeInfo& ws,
303                         const KernelArgInfo& argInfo,
304                         const KernelArg* refArg,
305                         const cl_kernel kernel,
306                         const cl_device_id device)
307     {
308         void * pBuffer = NULL;
309         size_t numPixels = m_desc.image_width * m_desc.image_height * m_desc.image_depth * m_desc.image_array_size;
310         const int alignment = sizeof(cl_int) * 4 ; //RGBA channel size * sizeof (cl_int)
311         size_t allocSize = numPixels * alignment ;
312 
313         cl_kernel_arg_access_qualifier accessQ = argInfo.getAccessQualifier();
314 
315         cl_mem_flags mem_flags = 0;
316 
317         if (accessQ == CL_KERNEL_ARG_ACCESS_READ_ONLY)
318         {
319             mem_flags |=  CL_MEM_READ_ONLY;
320         }
321 
322         if (accessQ == CL_KERNEL_ARG_ACCESS_WRITE_ONLY)
323         {
324             mem_flags |=  CL_MEM_WRITE_ONLY;
325         }
326 
327         if (accessQ == CL_KERNEL_ARG_ACCESS_READ_WRITE)
328         {
329             mem_flags |=  CL_MEM_READ_WRITE;
330         }
331 
332         pBuffer = align_malloc(allocSize, alignment);
333         if (NULL == pBuffer)
334         {
335             throw Exceptions::TestError("align_malloc failed for image\n", 1);
336         }
337         assert( (size_t)pBuffer % alignment == 0 );
338         if (NULL == refArg)
339         {
340             fillBuffer((cl_char *)pBuffer, allocSize );
341         }
342         else {
343             memcpy(pBuffer, refArg->getBuffer(), allocSize );
344         }
345 
346         return new KernelArgImage(context, argInfo, pBuffer, allocSize, mem_flags, m_format, m_desc);
347     }
348 
349 protected:
350     KernelArgGeneratorImage()
351     {}
352 
353     void fillBuffer( cl_char * ptr, size_t nelem)
354     {
355         for( size_t i = 0; i < nelem; ++i )
356         {
357             ptr[i]  = gRG.getNext<cl_char>(m_minValue, m_maxValue);
358         }
359     }
360 
361 protected:
362     bool m_isBuffer;
363     size_t m_vectorSize;
364     cl_char m_minValue;
365     cl_char m_maxValue;
366     cl_image_format m_format;
367     cl_image_desc m_desc;
368 };
369 
370 /**
371  Kernel argument generator for image1d_array
372  */
373 template<cl_channel_type channel_type> class KernelArgGeneratorImage1dArray: public KernelArgGeneratorImage
374 {
375 public:
376     KernelArgGeneratorImage1dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ):
377         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
378     {
379         m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
380         m_format.image_channel_data_type = channel_type;
381 
382         m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
383         m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
384 
385     }
386 };
387 
388 /**
389  Kernel argument generator for image1d_buffer
390  */
391 template<cl_channel_type channel_type> class KernelArgGeneratorImage1dBuffer: public KernelArgGeneratorImage
392 {
393 public:
394     KernelArgGeneratorImage1dBuffer( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
395         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
396     {
397         m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
398         m_format.image_channel_data_type = channel_type;
399 
400         m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
401         // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf 5.2.2;
402         // Slice pitch of 1d images should be zero.
403         m_desc.image_slice_pitch = 0;
404     }
405 };
406 
407 /**
408  Kernel argument generator for image1d
409  */
410 template<cl_channel_type channel_type> class KernelArgGeneratorImage1d: public KernelArgGeneratorImage
411 {
412 public:
413     KernelArgGeneratorImage1d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
414         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
415     {
416         m_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
417         m_format.image_channel_data_type = channel_type;
418 
419         m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
420         // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
421         // '5.3.1.2 image descriptor': Slice pitch is not applicable for one-
422         // dimensional images.
423         m_desc.image_slice_pitch = 0;
424     }
425 };
426 
427 /**
428  Kernel argument generator for image2d_array
429  */
430 template<cl_channel_type channel_type> class KernelArgGeneratorImage2dArray: public KernelArgGeneratorImage
431 {
432 public:
433     KernelArgGeneratorImage2dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
434         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
435     {
436         m_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
437         m_format.image_channel_data_type = channel_type;
438 
439         m_desc.image_height = 32;
440         m_desc.image_array_size = 8;
441         m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
442         m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
443     }
444 };
445 
446 /**
447  Kernel argument generator for image2d
448  */
449 template<cl_channel_type channel_type> class KernelArgGeneratorImage2d: public KernelArgGeneratorImage
450 {
451 public:
452     KernelArgGeneratorImage2d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
453         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
454     {
455         m_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
456         m_format.image_channel_data_type = channel_type;
457 
458         m_desc.image_height = 32;
459         m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
460         // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
461         // '5.3.1.2 image descriptor': Slice pitch is not applicable for two-
462         // dimensional images.
463         m_desc.image_slice_pitch = 0;
464     }
465 };
466 
467 /**
468  Kernel argument generator for image3d
469  */
470 template<cl_channel_type channel_type> class KernelArgGeneratorImage3d: public KernelArgGeneratorImage
471 {
472 public:
473     KernelArgGeneratorImage3d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
474         KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
475     {
476         m_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
477         m_format.image_channel_data_type = channel_type;
478 
479         m_desc.image_height = 32;
480         m_desc.image_depth = 8;
481         m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
482         m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
483     }
484 };
485 
486 /**
487  Kernel argument generator for samplers
488  */
489 class KernelArgGeneratorSampler: public KernelArgGenerator
490 {
491 public:
492     KernelArgGeneratorSampler(bool isBuffer, size_t vectorSize, int minValue, int maxValue);
493 
494     KernelArgGeneratorSampler();
495 
496     /*
497      * Sampler property setters.
498      */
499     void setNormalized(cl_bool);
500     void setAddressingMode(cl_addressing_mode);
501     void setFiterMode(cl_filter_mode);
502 
503     KernelArg* generate(cl_context context,
504                         const WorkSizeInfo& ws,
505                         const KernelArgInfo& argInfo,
506                         const KernelArg* refArg,
507                         const cl_kernel kernel,
508                         const cl_device_id device)
509     {
510         return new KernelArgSampler(context, m_normalized, m_addressingMode, m_filterMode);
511     }
512 private:
513     void initToDefaults();
514 
515     cl_bool m_normalized;
516     cl_addressing_mode m_addressingMode;
517     cl_filter_mode m_filterMode;
518 };
519 
520 /*
521  * Generates all the possible values for image samplers.
522  */
523 class SamplerValuesGenerator
524 {
525 public:
526     class iterator {
527         friend class SamplerValuesGenerator;
528 
529         size_t m_normIndex, m_filterIndex, m_addressingModeIndex;
530 
531         iterator(size_t norm, size_t filter, size_t addressing);
532 
533         bool incrementIndex(size_t &i, const size_t limit);
534     public:
535         iterator();
536 
537         /*
538          * Moves the iterator to the next sampler value.
539          */
540         iterator& operator ++();
541 
542         bool operator == (const iterator& other) const;
543 
544         bool operator != (const iterator& other) const;
545 
546         cl_bool getNormalized() const;
547 
548         cl_filter_mode getFilterMode() const;
549 
550         cl_addressing_mode getAddressingMode() const;
551 
552         /*
553          * Converts the value of the sampler to a bitmask representation.
554          */
555         unsigned toBitmap() const;
556 
557         /*
558          * Retruns a string representation of the sampler.
559          */
560         std::string toString() const;
561     };
562 
563     iterator begin() { return iterator(); }
564 
565     iterator end();
566 
567     static cl_bool coordNormalizations[];
568     static cl_filter_mode filterModes[];
569     static cl_addressing_mode addressingModes[];
570 };
571 
572 typedef struct struct_type {
573     cl_float float4d[4];
574     cl_int intd;
575 } typedef_struct_type;
576 
577 typedef struct {
578     cl_int width;
579     cl_int channelType;
580     cl_int channelOrder;
581     cl_int expectedChannelType;
582     cl_int expectedChannelOrder;
583  } image_kernel_data;
584 
585 typedef struct testStruct {
586      cl_double vec[16];
587  } testStruct;
588 
589 typedef struct {
590      cl_uint workDim;
591      cl_uint globalSize[3];
592      cl_uint globalID[3];
593      cl_uint localSize[3];
594      cl_uint localID[3];
595      cl_uint numGroups[3];
596      cl_uint groupID[3];
597   } work_item_data;
598 
599 /**
600  Kernel argument generator for structure "struct_type"
601 
602  Kernel argument generator for structure "image_kernel_data"
603 
604  Kernel argument generator for structure "testStruct"
605  Since there are many "testStruct", we define it to have maximum space
606  Also the alignment is done following the "worst" case
607 
608  Kernel argument generator for structure "work_item_data"
609  */
610 
611   template<typename T> class KernelStructTypeArgGenerator: public KernelArgGenerator
612   {
613 
614   public:
615       KernelStructTypeArgGenerator( bool isBuffer, size_t vectorSize, cl_int minValue, cl_int maxValue ):
616           m_isBuffer(isBuffer),
617           m_vectorSize(vectorSize),
618           m_alignment(0),
619           m_size(0)
620       {}
621 
622       KernelArg* generate( cl_context context,
623                                  const WorkSizeInfo& ws,
624                                  const KernelArgInfo& argInfo,
625                                  const KernelArg* refArg,
626                                  const cl_kernel kernel,
627                                  const cl_device_id device )
628       {
629           T *pStruct = NULL;
630 
631           calcSizeAndAlignment(pStruct);
632           size_t size = m_size;
633 
634           if( m_isBuffer )
635           {
636               cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
637 
638               if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
639               {
640                   if ( (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size < m_size )
641                       size=(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size;
642               }
643 
644               if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
645                   CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
646               {
647                   size_t no_e = ws.getGlobalWorkSize();
648                   size = no_e * m_size;
649                   pStruct = (T *)align_malloc(size, m_alignment);
650                   if (NULL == pStruct)
651                   {
652                        throwExceptions(pStruct);
653                   }
654                   assert( (size_t)pStruct % m_alignment == 0 );
655                    if (NULL == refArg)
656                   {
657                       fillBuffer(pStruct, no_e);
658                   }
659                   else {
660                       memcpy(pStruct, refArg->getBuffer(), size);
661                   }
662               }
663               return new KernelArgBuffer( context, argInfo, (void*)pStruct, size);
664           }
665           else {
666               pStruct = (T *)align_malloc(m_size, m_alignment);
667               if (NULL == pStruct)
668               {
669                    throwExceptions(pStruct);
670               }
671               assert( (size_t)pStruct % m_alignment == 0 );
672               if (NULL == refArg)
673               {
674                   fillBuffer(pStruct, 1);
675               }
676               else {
677                   memcpy(pStruct, refArg->getBuffer(), m_size);
678               }
679 
680               return new KernelArg( argInfo, (void*)pStruct, m_size);
681           }
682       }
683   private:
684 
685       std::string getTypeString(typedef_struct_type *pStruct)
686       {
687           return "typedef_struct_type";
688       }
689 
690       std::string getTypeString(image_kernel_data *pStruct)
691       {
692           return "image_kernel_data";
693       }
694 
695       std::string getTypeString(testStruct *pStruct)
696       {
697           return "testStruct";
698       }
699 
700       std::string getTypeString(work_item_data *pStruct)
701       {
702           return "work_item_data";
703       }
704 
705       void throwExceptions(T * pStruct)
706       {
707           std::string str = "align_malloc failed for " ;
708           if (m_isBuffer)
709               str += "array of " ;
710           str += getTypeString(pStruct) ;
711           throw Exceptions::TestError(str, 1);
712       }
713 
714       void fillBuffer( typedef_struct_type *pStruct, size_t no_e )
715       {
716           for (size_t e = 0; e < no_e; ++e)
717           {
718               for( size_t i = 0; i < 4; ++i )
719               {
720                   pStruct[e].float4d[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
721               }
722               pStruct[e].intd = gRG.getNext<cl_int>(0, 0x7fffffff);
723           }
724       }
725 
726       void fillBuffer( image_kernel_data *pStruct, size_t no_e )
727       {
728           for (size_t e = 0; e < no_e; ++e)
729           {
730               pStruct[e].width = gRG.getNext<cl_int>(0, 0x7fffffff);
731               pStruct[e].channelType = gRG.getNext<cl_int>(0, 0x7fffffff);
732               pStruct[e].channelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
733               pStruct[e].expectedChannelType = gRG.getNext<cl_int>(0, 0x7fffffff);
734               pStruct[e].expectedChannelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
735           }
736       }
737 
738       void fillBuffer( testStruct *pStruct, size_t no_e )
739       {
740           for (size_t e = 0; e < no_e; ++e)
741           {
742               for( size_t i = 0; i < 16; ++i )
743               {
744                   pStruct[e].vec[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
745                }
746           }
747       }
748 
749       void fillBuffer( work_item_data *pStruct, size_t no_e )
750       {
751           for (size_t e = 0; e < no_e; ++e)
752           {
753               memset(&pStruct[e], 0, sizeof(work_item_data));
754           }
755       }
756 
757       // structure alignment is derived from the size of the larger field in it
758       // size of the structure is the size of the largest field multiple by the number of fields
759 
760       void calcSizeAndAlignment(typedef_struct_type *pStruct)
761       {
762           m_alignment = sizeof(cl_float) * 4;
763           m_size = m_alignment * 2 ;
764       }
765 
766       void calcSizeAndAlignment(image_kernel_data *pStruct)
767       {
768           m_alignment = sizeof(cl_int);
769           m_size = sizeof(image_kernel_data) ;
770       }
771 
772       void calcSizeAndAlignment(testStruct *pStruct)
773       {
774           m_alignment = sizeof(cl_double) * 16;
775           m_size = m_alignment;
776       }
777 
778       void calcSizeAndAlignment(work_item_data *pStruct)
779       {
780           m_alignment = sizeof(cl_uint);
781           m_size = sizeof(work_item_data);
782       }
783 
784   private:
785       bool m_isBuffer;
786       size_t m_vectorSize;
787       int m_alignment;
788       size_t m_size;
789 };
790 
791 /**
792  Kernel argument generator for the simple scalar and vector types
793  */
794 template <class T> class KernelArgGeneratorT: public KernelArgGenerator
795 {
796 public:
797     KernelArgGeneratorT( bool isBuffer, size_t vectorSize, T minValue, T maxValue ):
798         m_isBuffer(isBuffer),
799         m_vectorSize(vectorSize),
800         m_minValue(minValue),
801         m_maxValue(maxValue)
802     {}
803 
804     KernelArg* generate( cl_context context,
805                          const WorkSizeInfo& ws,
806                          const KernelArgInfo& argInfo,
807                          const KernelArg* refArg,
808                          const cl_kernel kernel,
809                          const cl_device_id device  )
810     {
811         T* pBuffer = NULL;
812         size_t size = 0;
813         int alignment, error;
814         cl_ulong totalDeviceLocalMem;
815         cl_ulong localMemUsedByKernel;
816         cl_uint numArgs, numLocalArgs = 0;
817         KernelArgInfo kernel_arg_info;
818 
819         error = CL_SUCCESS;
820 
821         // take care of 3-elements vector's alignment issue:
822         // if 3-elements vector - the alignment is 4-elements
823         if (m_vectorSize == 3)
824             alignment = sizeof(T) * 4;
825         else
826             alignment = sizeof(T) * m_vectorSize;
827 
828         // gather information about the kernel and device
829         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalDeviceLocalMem), &totalDeviceLocalMem, NULL);
830         clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(localMemUsedByKernel), &localMemUsedByKernel, NULL);
831         clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
832 
833         // Calculate the number of local memory arguments
834         for (cl_uint i = 0; i < numArgs; i ++)
835         {
836             error = clGetKernelArgInfo( kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(cl_kernel_arg_address_qualifier), kernel_arg_info.getAddressQualifierRef(), &size);
837             if( error != CL_SUCCESS )
838             {
839                 throw Exceptions::TestError("Unable to get argument address qualifier\n", error);
840             }
841 
842             if(kernel_arg_info.getAddressQualifier() == CL_KERNEL_ARG_ADDRESS_LOCAL)
843             {
844                 numLocalArgs ++;
845             }
846         }
847 
848         // reduce the amount of local memory by the amount the kernel + implementation uses
849         totalDeviceLocalMem -= localMemUsedByKernel;
850 
851         if( m_isBuffer )
852         {
853             cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
854 
855             // decide about the buffer size - take into account the alignment and padding
856             size = ws.getGlobalWorkSize() * alignment;
857 
858             // reduce the size of the buffer for local memory
859             if (numLocalArgs &&
860                 size > floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs)) &&
861                 addrQ == CL_KERNEL_ARG_ADDRESS_LOCAL)
862             {
863                 size = floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs));
864             }
865 
866             if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
867             {
868                 if ( CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE < size )
869                     size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE;
870             }
871 
872             if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
873                 CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
874             {
875                 pBuffer = (T *)align_malloc(size, alignment);
876                 if (NULL == pBuffer)
877                 {
878                      throw Exceptions::TestError("align_malloc failed for array buffer\n", 1);
879                 }
880                 assert( (size_t)pBuffer % alignment == 0 );
881                 if (NULL == refArg)
882                 {
883                     fillBuffer(pBuffer, size / sizeof(T));
884                 }
885                 else {
886                     memcpy(pBuffer, refArg->getBuffer(), size);
887                 }
888             }
889             return new KernelArgBuffer( context, argInfo, (void*)pBuffer, size);
890         }
891         else
892         {
893             if (m_vectorSize == 3)
894                 size = sizeof(T) * 4;
895             else
896                 size = sizeof(T) * m_vectorSize;
897 
898             pBuffer = (T *)align_malloc(size, alignment);
899             if (NULL == pBuffer)
900             {
901                 throw Exceptions::TestError("align_malloc failed for pBuffer\n", 1);
902             }
903             assert( (size_t)pBuffer % alignment == 0 );
904             if (NULL == refArg)
905             {
906                 fillBuffer(pBuffer, m_vectorSize);
907             }
908             else {
909                 memcpy(pBuffer, refArg->getBuffer(), size);
910             }
911             return new KernelArg( argInfo, (void*)pBuffer, size);
912         }
913     }
914 private:
915     void fillBuffer( T* buffer, size_t nelem)
916     {
917         for( size_t i = 0; i < nelem; ++i )
918         {
919             buffer[i]  = gRG.getNext<T>(m_minValue, m_maxValue);
920         }
921     }
922 
923 private:
924     bool m_isBuffer;
925     size_t m_vectorSize;
926     T    m_minValue;
927     T    m_maxValue;
928 };
929 
930 /**
931  General facade for the kernel arguments generation functionality.
932  */
933 class DataGenerator
934 {
935 public:
936      static DataGenerator* getInstance();
937 
938     ~DataGenerator();
939 
940     KernelArg* generateKernelArg(cl_context context,
941                                  const KernelArgInfo& argInfo,
942                                  const WorkSizeInfo& ws,
943                                  const KernelArg* refArg,
944                                  const cl_kernel kernel,
945                                  const cl_device_id device)
946     {
947         KernelArgGenerator* pArgGenerator = getArgGenerator(argInfo);
948         return pArgGenerator->generate(context, ws, argInfo, refArg, kernel, device);
949     }
950 
951     /*
952      * Gets the generator associated to the given key.
953      */
954     KernelArgGenerator* getArgGenerator(const KernelArgInfo& argInfo);
955 
956     /*
957      * Sets the entry associated to the given key, with the given prototype
958      * generator.
959      */
960     void setArgGenerator(const KernelArgInfo& key, KernelArgGenerator* gen);
961 
962 private:
963     DataGenerator();
964 
965     static DataGenerator *Instance;
966 
967     typedef std::map<std::string, KernelArgGenerator*> ArgGeneratorsMap;
968     ArgGeneratorsMap m_argGenerators;
969 };
970 
971 class ImageValuesGenerator
972 {
973 public:
974     class iterator
975     {
976         friend class ImageValuesGenerator;
977     public:
978         /*
979          * Iterator operators.
980          */
981         iterator& operator ++();
982         bool operator == (const iterator&) const;
983         bool operator != (const iterator&) const;
984         /*
985          * Returns the name of the basic image type (e.g., image2d_t).
986          */
987         std::string getImageTypeName() const;
988 
989         /*
990          * Returns the name of the genrator that generates images of this type
991          * (e.g., imaget2d_float).
992          */
993         std::string getImageGeneratorName() const;
994 
995         /*
996          * Returns the name of the genrator that generates images of the 'base'
997          * type (e.g., imaget2d_t).
998          */
999         std::string getBaseImageGeneratorName() const;
1000 
1001         /*
1002          * Returns the OpenCL enumeration for the channel order of the image
1003          * object this iterator creates.
1004          */
1005         int getOpenCLChannelOrder() const;
1006 
1007         /*
1008          * Returns the SPIR enumeration for the channel order of the image
1009          * object this iterator creates.
1010          */
1011         int getSPIRChannelOrder() const;
1012 
1013         /*
1014          * Returns the data type of the image object this iterator creates. (e.g.,
1015          * cl_float, cl_int).
1016          */
1017         int getDataType() const;
1018 
1019         /*
1020          * Returns the data type of the image object this iterator creates. (e.g.,
1021          * float, int), in string format.
1022          */
1023         std::string getDataTypeName() const;
1024 
1025         std::string toString() const;
1026     private:
1027         /*
1028          * Constructor for creating a 'begin' iterator.
1029          */
1030         iterator(ImageValuesGenerator*);
1031         /*
1032          * Constructor for creating an 'end' iterator.
1033          */
1034         iterator(int);
1035         /*
1036         * Increments the given argument up to the given limit.
1037         * In case the new value reaches the limit, the index is reset to hold zero.
1038         * Returns: true if the value of the index was incremented, false if it was reset
1039         * to zero.
1040         */
1041         bool incrementIndex(size_t& index, size_t limit);
1042 
1043         /*
1044          * Returns true is the index combination of this iterator is legal,
1045          * or false otherwise.
1046          */
1047         bool isLegalCombination() const;
1048 
1049         ImageValuesGenerator* m_parent;
1050         size_t m_channelIndex, m_imgTyIndex;
1051     }; //End iterator.
1052 
1053     iterator begin();
1054     iterator end();
1055 
1056     static cl_channel_order channelOrders[];
1057     static const char* imageTypes[];
1058 private:
1059     WorkSizeInfo  m_wsInfo;
1060 };
1061 
1062 #endif
1063