• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017, 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 // Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
19 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
20 
21 #define FLUSH fflush(stdout)
22 
23 #define MAX_STR 16*1024
24 
25 #define ALIGNMENT 128
26 
27 #define OPTIONS "-cl-std=CL2.0"
28 
29 // NUM_ROUNDS must be at least 1.
30 // It determines how many sets of random data we push through the global
31 // variables.
32 #define NUM_ROUNDS 1
33 
34 // This is a shared property of the writer and reader kernels.
35 #define NUM_TESTED_VALUES 5
36 
37 // TODO: pointer-to-half (and its vectors)
38 // TODO: union of...
39 
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <cstring>
44 #include <string>
45 #include <vector>
46 #include <cassert>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include "harness/typeWrappers.h"
50 #include "harness/errorHelpers.h"
51 #include "harness/mt19937.h"
52 #include "procs.h"
53 
54 
55 ////////////////////
56 // Device capabilities
57 static int l_has_double = 0;
58 static int l_has_half = 0;
59 static int l_64bit_device = 0;
60 static int l_has_int64_atomics = 0;
61 static int l_has_intptr_atomics = 0;
62 static int l_has_cles_int64 = 0;
63 
64 static int l_host_is_big_endian = 1;
65 
66 static size_t l_max_global_id0 = 0;
67 static cl_bool l_linker_available = false;
68 
69 #define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
70 
71 ////////////////////
72 // Info about types we can use for program scope variables.
73 
74 
75 class TypeInfo {
76 
77 public:
TypeInfo()78     TypeInfo() :
79         name(""),
80         m_buf_elem_type(""),
81         m_is_vecbase(false),
82         m_is_atomic(false),
83         m_is_like_size_t(false),
84         m_is_bool(false),
85         m_elem_type(0), m_num_elem(0),
86         m_size(0),
87         m_value_size(0)
88         {}
TypeInfo(const char * name_arg)89     TypeInfo(const char* name_arg) :
90         name(name_arg),
91         m_buf_elem_type(name_arg),
92         m_is_vecbase(false),
93         m_is_atomic(false),
94         m_is_like_size_t(false),
95         m_is_bool(false),
96         m_elem_type(0), m_num_elem(0),
97         m_size(0),
98         m_value_size(0)
99         { }
100 
101     // Vectors
TypeInfo(TypeInfo * elem_type,int num_elem)102     TypeInfo( TypeInfo* elem_type, int num_elem ) :
103         m_is_vecbase(false),
104         m_is_atomic(false),
105         m_is_like_size_t(false),
106         m_is_bool(false),
107         m_elem_type(elem_type),
108         m_num_elem(num_elem)
109     {
110         char the_name[10]; // long enough for longest vector type name "double16"
111         snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
112         this->name = std::string(the_name);
113         this->m_buf_elem_type = std::string(the_name);
114         this->m_value_size = num_elem * elem_type->get_size();
115         if ( m_num_elem == 3 ) {
116             this->m_size = 4 * elem_type->get_size();
117         } else {
118             this->m_size = num_elem * elem_type->get_size();
119         }
120     }
get_name(void) const121     const std::string& get_name(void) const { return name; }
get_name_c_str(void) const122     const char* get_name_c_str(void) const { return name.c_str(); }
set_vecbase(void)123     TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
set_atomic(void)124     TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
set_like_size_t(void)125     TypeInfo& set_like_size_t(void) {
126         this->m_is_like_size_t = true;
127         this->set_size( l_64bit_device ? 8 : 4 );
128         this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
129         return *this;
130     }
set_bool(void)131     TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
set_size(size_t n)132     TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
set_buf_elem_type(const char * name)133     TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
134 
elem_type(void) const135     const TypeInfo* elem_type(void) const { return m_elem_type; }
num_elem(void) const136     int num_elem(void) const { return m_num_elem; }
137 
is_vecbase(void) const138     bool is_vecbase(void) const {return m_is_vecbase;}
is_atomic(void) const139     bool is_atomic(void) const {return m_is_atomic;}
is_atomic_64bit(void) const140     bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
is_like_size_t(void) const141     bool is_like_size_t(void) const {return m_is_like_size_t;}
is_bool(void) const142     bool is_bool(void) const {return m_is_bool;}
get_size(void) const143     size_t get_size(void) const {return m_size;}
get_value_size(void) const144     size_t get_value_size(void) const {return m_value_size;}
145 
146     // When passing values of this type to a kernel, what buffer type
147     // should be used?
get_buf_elem_type(void) const148     const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
149 
as_string(const cl_uchar * value_ptr) const150     std::string as_string(const cl_uchar* value_ptr) const {
151         // This method would be shorter if I had a real handle to element
152         // vector type.
153         if ( this->is_bool() ) {
154             std::string result( name );
155             result += "<";
156             result += (*value_ptr ? "true" : "false");
157             result += ", ";
158             char buf[10];
159             sprintf(buf,"%02x",*value_ptr);
160             result += buf;
161             result += ">";
162             return result;
163         } else if ( this->num_elem() ) {
164             std::string result( name );
165             result += "<";
166             for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
167                 char buf[MAX_STR];
168                 if ( ielem ) result += ", ";
169                 for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
170                     sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
171                 }
172                 result += buf;
173             }
174             result += ">";
175             return result;
176         } else {
177             std::string result( name );
178             result += "<";
179             char buf[MAX_STR];
180             for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
181                 sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
182             }
183             result += buf;
184             result += ">";
185             return result;
186         }
187     }
188 
189     // Initialize the given buffer to a constant value initialized as if it
190     // were from the INIT_VAR macro below.
191     // Only needs to support values 0 and 1.
init(cl_uchar * buf,cl_uchar val) const192     void init( cl_uchar* buf, cl_uchar val) const {
193         if ( this->num_elem() ) {
194             for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
195                 // Delegate!
196                 this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
197             }
198         } else {
199             init_elem( buf, val );
200         }
201     }
202 
203 private:
init_elem(cl_uchar * buf,cl_uchar val) const204     void init_elem( cl_uchar* buf, cl_uchar val ) const {
205         size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
206         memset(buf,0,elem_size);
207         if ( val ) {
208             if ( strstr( name.c_str(), "float" ) ) {
209                 *(float*)buf = (float)val;
210                 return;
211             }
212             if ( strstr( name.c_str(), "double" ) ) {
213                 *(double*)buf = (double)val;
214                 return;
215             }
216             if ( this->is_bool() ) { *buf = (bool)val; return; }
217 
218             // Write a single character value to the correct spot,
219             // depending on host endianness.
220             if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
221             else *buf = (cl_uchar)val;
222         }
223     }
224 public:
225 
dump(FILE * fp) const226     void dump(FILE* fp) const {
227         fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
228                 (int)m_size,
229                 (int)m_value_size,
230                 m_buf_elem_type.c_str() );
231         if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
232         if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
233         if ( this->m_is_bool ) fprintf(fp, " bool");
234         if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
235         if ( this->m_is_atomic ) fprintf(fp, " atomic");
236         fprintf(fp,"\n");
237         fflush(fp);
238     }
239 
240 private:
241     std::string name;
242     TypeInfo* m_elem_type;
243     int m_num_elem;
244     bool m_is_vecbase;
245     bool m_is_atomic;
246     bool m_is_like_size_t;
247     bool m_is_bool;
248     size_t m_size; // Number of bytes of storage occupied by this type.
249     size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
250 
251     // When passing values of this type to a kernel, what buffer type
252     // should be used?
253     // For most types, it's just itself.
254     // Use a std::string so I don't have to make a copy constructor.
255     std::string m_buf_elem_type;
256 };
257 
258 
259 #define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
260 #define NUM_VECTOR_SIZES (5)   // 2,3,4,8,16
261 #define NUM_PLAIN_TYPES \
262       5 /*boolean and size_t family */  \
263     + NUM_SCALAR_TYPES \
264     + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
265     + 10 /* atomic types */
266 
267 // Need room for plain, array, pointer, struct
268 #define MAX_TYPES (4*NUM_PLAIN_TYPES)
269 
270 static TypeInfo type_info[MAX_TYPES];
271 static int num_type_info = 0; // Number of valid entries in type_info[]
272 
273 
274 
275 
276 // A helper class to form kernel source arguments for clCreateProgramWithSource.
277 class StringTable {
278 public:
StringTable()279     StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
~StringTable()280     ~StringTable() { release_frozen(); }
281 
add(std::string s)282     void add(std::string s) { release_frozen(); m_strings.push_back(s); }
283 
num_str()284     const size_t num_str() { freeze(); return m_strings.size(); }
strs()285     const char** strs() { freeze(); return m_c_strs; }
lengths()286     const size_t* lengths() { freeze(); return m_lengths; }
287 
288 private:
freeze(void)289     void freeze(void) {
290         if ( !m_frozen ) {
291             release_frozen();
292 
293             m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
294             m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
295             assert( m_c_strs );
296             assert( m_lengths );
297 
298             for ( size_t i = 0; i < m_strings.size() ; i++ ) {
299                 m_c_strs[i] = m_strings[i].c_str();
300                 m_lengths[i] = strlen(m_c_strs[i]);
301             }
302 
303             m_frozen = true;
304         }
305     }
release_frozen(void)306     void release_frozen(void) {
307         if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
308         if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
309         m_frozen = false;
310     }
311 
312     typedef std::vector<std::string> strlist_t;
313     strlist_t m_strings;
314     const char** m_c_strs;
315     size_t* m_lengths;
316     bool m_frozen;
317 };
318 
319 
320 ////////////////////
321 // File scope function declarations
322 
323 static void l_load_abilities(cl_device_id device);
324 static const char* l_get_fp64_pragma(void);
325 static const char* l_get_cles_int64_pragma(void);
326 static int l_build_type_table(cl_device_id device);
327 
328 static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
329 
330 static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
331 static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
332 static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
333 
334 static std::string conversion_functions(const TypeInfo& ti);
335 static std::string global_decls(const TypeInfo& ti, bool with_init);
336 static std::string global_check_function(const TypeInfo& ti);
337 static std::string writer_function(const TypeInfo& ti);
338 static std::string reader_function(const TypeInfo& ti);
339 
340 static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
341 static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
342 
343 static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
344 static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
345 
346 static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
347 static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
348 
349 
350 
351 ////////////////////
352 // File scope function definitions
353 
print_build_log(cl_program program,cl_uint num_devices,cl_device_id * device_list,cl_uint count,const char ** strings,const size_t * lengths,const char * options)354 static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
355 {
356     cl_uint i;
357     cl_int error;
358     BufferOwningPtr<cl_device_id> devices;
359 
360     if(num_devices == 0 || device_list == NULL)
361     {
362         error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
363         test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
364 
365         device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
366         devices.reset(device_list);
367 
368         memset(device_list, 0, sizeof(cl_device_id) * num_devices);
369 
370         error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
371         test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
372     }
373 
374     cl_uint z;
375     bool sourcePrinted = false;
376 
377     for(z = 0; z < num_devices; z++)
378     {
379         char deviceName[4096] = "";
380         error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
381         check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
382 
383         cl_build_status buildStatus;
384         error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
385         check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
386 
387         if(buildStatus != CL_BUILD_SUCCESS)
388         {
389             if(!sourcePrinted)
390             {
391                 log_error("Build options: %s\n", options);
392                 if(count && strings)
393                 {
394                     log_error("Original source is: ------------\n");
395                     for(i = 0; i < count; i++) log_error("%s", strings[i]);
396                 }
397                 sourcePrinted = true;
398             }
399 
400             char statusString[64] = "";
401             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
402               sprintf(statusString, "CL_BUILD_SUCCESS");
403             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
404               sprintf(statusString, "CL_BUILD_NONE");
405             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
406               sprintf(statusString, "CL_BUILD_ERROR");
407             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
408               sprintf(statusString, "CL_BUILD_IN_PROGRESS");
409             else
410               sprintf(statusString, "UNKNOWN (%d)", buildStatus);
411 
412             log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
413 
414             size_t paramSize = 0;
415             error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
416             if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
417 
418             std::string log;
419             log.resize(paramSize/sizeof(char));
420 
421             error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
422             if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
423             if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
424             else
425             {
426                 log_error("Build log:\n", deviceName);
427                 log_error("%s\n", log.c_str());
428             }
429         }
430     }
431     return error;
432 }
433 
l_load_abilities(cl_device_id device)434 static void l_load_abilities(cl_device_id device)
435 {
436     l_has_half       = is_extension_available(device,"cl_khr_fp16");
437     l_has_double     = is_extension_available(device,"cl_khr_fp64");
438     l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
439 
440     l_has_int64_atomics
441     =  is_extension_available(device,"cl_khr_int64_base_atomics")
442     && is_extension_available(device,"cl_khr_int64_extended_atomics");
443 
444     {
445         int status = CL_SUCCESS;
446         cl_uint addr_bits = 32;
447         status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
448         l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
449     }
450 
451     // 32-bit devices always have intptr atomics.
452     l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
453 
454     union { char c[4]; int i; } probe;
455     probe.i = 1;
456     l_host_is_big_endian = !probe.c[0];
457 
458     // Determine max global id.
459     {
460         int status = CL_SUCCESS;
461         cl_uint max_dim = 0;
462         status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
463         assert( status == CL_SUCCESS );
464         assert( max_dim > 0 );
465         size_t max_id[3];
466         max_id[0] = 0;
467     status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
468         assert( status == CL_SUCCESS );
469         l_max_global_id0 = max_id[0];
470     }
471 
472     { // Is separate compilation supported?
473         int status = CL_SUCCESS;
474         l_linker_available = false;
475         status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
476         assert( status == CL_SUCCESS );
477     }
478 }
479 
480 
l_get_fp64_pragma(void)481 static const char* l_get_fp64_pragma(void)
482 {
483     return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
484 }
485 
l_get_cles_int64_pragma(void)486 static const char* l_get_cles_int64_pragma(void)
487 {
488     return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
489 }
490 
l_get_int64_atomic_pragma(void)491 static const char* l_get_int64_atomic_pragma(void)
492 {
493     return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
494            "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
495 }
496 
l_build_type_table(cl_device_id device)497 static int l_build_type_table(cl_device_id device)
498 {
499     int status = CL_SUCCESS;
500     size_t iscalar = 0;
501     size_t ivecsize = 0;
502     int vecsizes[] = { 2, 3, 4, 8, 16 };
503     const char* vecbase[] = {
504         "uchar", "char",
505         "ushort", "short",
506         "uint", "int",
507         "ulong", "long",
508         "float",
509         "double"
510     };
511     int vecbase_size[] = {
512         1, 1,
513         2, 2,
514         4, 4,
515         8, 8,
516         4,
517         8
518     };
519     const char* like_size_t[] = {
520         "intptr_t",
521         "uintptr_t",
522         "size_t",
523         "ptrdiff_t"
524     };
525     const char* atomics[] = {
526         "atomic_int", "atomic_uint",
527         "atomic_long", "atomic_ulong",
528         "atomic_float",
529         "atomic_double",
530     };
531     int atomics_size[] = {
532         4, 4,
533         8, 8,
534         4,
535         8
536     };
537     const char* intptr_atomics[] = {
538         "atomic_intptr_t",
539         "atomic_uintptr_t",
540         "atomic_size_t",
541         "atomic_ptrdiff_t"
542     };
543 
544     l_load_abilities(device);
545     num_type_info = 0;
546 
547     // Boolean.
548     type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
549 
550     // Vector types, and the related scalar element types.
551     for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
552         if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
553         if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
554 
555         // Scalar
556         TypeInfo* elem_type = type_info + num_type_info++;
557         *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
558 
559         // Vector
560         for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
561             type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
562         }
563     }
564 
565     // Size_t-like types
566     for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
567         type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
568     }
569 
570     // Atomic types.
571     for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
572         if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
573         if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
574 
575         // The +7 is used to skip over the "atomic_" prefix.
576         const char* buf_type = atomics[iscalar] + 7;
577         type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
578     }
579     if ( l_has_intptr_atomics ) {
580         for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
581             type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
582         }
583     }
584 
585     assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
586 
587 #if 0
588     for ( size_t i = 0 ; i < num_type_info ; i++ ) {
589         type_info[ i ].dump(stdout);
590     }
591     exit(0);
592 #endif
593 
594     return status;
595 }
596 
l_find_type(const char * name)597 static const TypeInfo& l_find_type( const char* name )
598 {
599     auto itr =
600         std::find_if(type_info, type_info + num_type_info,
601                      [name](TypeInfo& ti) { return ti.get_name() == name; });
602     assert(itr != type_info + num_type_info);
603     return *itr;
604 }
605 
606 
607 
608 // Populate return parameters for max program variable size, preferred program variable size.
609 
l_get_device_info(cl_device_id device,size_t * max_size_ret,size_t * pref_size_ret)610 static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
611 {
612     int err = CL_SUCCESS;
613     size_t return_size = 0;
614 
615     err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
616     if ( err != CL_SUCCESS ) {
617         log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
618         return err;
619     }
620     if ( return_size != sizeof(size_t) ) {
621         log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
622         return 1;
623     }
624     if ( return_size != sizeof(size_t) ) {
625         log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
626         return 1;
627     }
628 
629     return_size = 0;
630     err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
631     if ( err != CL_SUCCESS ) {
632         log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
633         return err;
634     }
635     if ( return_size != sizeof(size_t) ) {
636         log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
637         return 1;
638     }
639 
640     return CL_SUCCESS;
641 }
642 
643 
l_set_randomly(cl_uchar * buf,size_t buf_size,RandomSeed & rand_state)644 static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
645 {
646     assert( 0 == (buf_size % sizeof(cl_uint) ) );
647     for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
648         *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
649     }
650 #if 0
651     for ( size_t i = 0; i < buf_size ; i++ ) {
652         printf("%02x",buf[i]);
653     }
654     printf("\n");
655 #endif
656 }
657 
658 // Return num_value values of the given type.
659 // Returns CL_SUCCESS if they compared as equal.
l_compare(const char * test_name,const cl_uchar * expected,const cl_uchar * received,size_t num_values,const TypeInfo & ti)660 static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
661 {
662     // Compare only the valid returned bytes.
663     for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
664         const cl_uchar* expv = expected + value_idx * ti.get_size();
665         const cl_uchar* gotv = received + value_idx * ti.get_size();
666         if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
667             std::string exp_str = ti.as_string( expv );
668             std::string got_str = ti.as_string( gotv );
669             log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
670                     test_name,
671                     ti.get_name_c_str(), value_idx,
672                     exp_str.c_str(),
673                     got_str.c_str() );
674             return 1;
675         }
676     }
677     return CL_SUCCESS;
678 }
679 
680 // Copy a target value from src[idx] to dest[idx]
l_copy(cl_uchar * dest,unsigned dest_idx,const cl_uchar * src,unsigned src_idx,const TypeInfo & ti)681 static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
682 {
683     cl_uchar* raw_dest      = dest + dest_idx * ti.get_size();
684     const cl_uchar* raw_src =  src +  src_idx * ti.get_size();
685     memcpy( raw_dest, raw_src,  ti.get_value_size() );
686 
687     return 0;
688 }
689 
690 
conversion_functions(const TypeInfo & ti)691 static std::string conversion_functions(const TypeInfo& ti)
692 {
693     std::string result;
694     static char buf[MAX_STR];
695     int num_printed = 0;
696     // The atomic types just use the base type.
697     if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
698         // The type is represented in a buffer by itself.
699         num_printed = snprintf(buf,MAX_STR,
700                 "%s from_buf(%s a) { return a; }\n"
701                 "%s to_buf(%s a) { return a; }\n",
702                 ti.get_buf_elem_type(), ti.get_buf_elem_type(),
703                 ti.get_buf_elem_type(), ti.get_buf_elem_type() );
704     } else {
705         // Just use C-style cast.
706         num_printed = snprintf(buf,MAX_STR,
707                 "%s from_buf(%s a) { return (%s)a; }\n"
708                 "%s to_buf(%s a) { return (%s)a; }\n",
709                 ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
710                 ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
711     }
712     // Add initializations.
713     if ( ti.is_atomic() ) {
714         num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
715                 "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
716     } else {
717         // This cast works even if the target type is a vector type.
718         num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
719                 "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
720     }
721     assert( num_printed < MAX_STR ); // or increase MAX_STR
722     result = buf;
723     return result;
724 }
725 
global_decls(const TypeInfo & ti,bool with_init)726 static std::string global_decls(const TypeInfo& ti, bool with_init )
727 {
728     const char* tn = ti.get_name_c_str();
729     const char* vol = (ti.is_atomic() ? " volatile " : " ");
730     static char decls[MAX_STR];
731     int num_printed = 0;
732     if ( with_init ) {
733         const char *decls_template_with_init =
734             "%s %s var = INIT_VAR(0);\n"
735             "global %s %s g_var = INIT_VAR(1);\n"
736             "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
737             "volatile global %s %s* p_var = &a_var[1];\n\n";
738         num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
739                 vol,tn,vol,tn,vol,tn,vol,tn);
740     } else {
741         const char *decls_template_no_init =
742             "%s %s var;\n"
743             "global %s %s g_var;\n"
744             "%s %s a_var[2];\n"
745             "global %s %s* p_var;\n\n";
746         num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
747              vol,tn,vol,tn,vol,tn,vol,tn);
748     }
749     assert( num_printed < sizeof(decls) );
750     return std::string(decls);
751 }
752 
753 // Return the source code for the "global_check" function for the given type.
754 // This function checks that all program-scope variables have appropriate
755 // initial values when no explicit initializer is used. If all tests pass the
756 // kernel writes a non-zero value to its output argument, otherwise it writes
757 // zero.
global_check_function(const TypeInfo & ti)758 static std::string global_check_function(const TypeInfo& ti)
759 {
760     const std::string type_name = ti.get_buf_elem_type();
761 
762     // all() should only be used on vector inputs. For scalar comparison, the
763     // result of the equality operator can be used as a bool value.
764     const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
765     const std::string is_equality_true = is_scalar ? "" : "all";
766 
767     std::string code = "kernel void global_check(global int* out) {\n";
768     code += "  const " + type_name + " zero = ((" + type_name + ")0);\n";
769     code += "  bool status = true;\n";
770     if (ti.is_atomic()) {
771         code += "  status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
772         code += "  status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
773         code += "  status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
774         code += "  status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
775     } else {
776         code += "  status &= " + is_equality_true + "(var == zero);\n";
777         code += "  status &= " + is_equality_true + "(g_var == zero);\n";
778         code += "  status &= " + is_equality_true + "(a_var[0] == zero);\n";
779         code += "  status &= " + is_equality_true + "(a_var[1] == zero);\n";
780     }
781     code += "  status &= (p_var == NULL);\n";
782     code += "  *out = status ? 1 : 0;\n";
783     code += "}\n\n";
784 
785     return code;
786 }
787 
788 // Return the source text for the writer function for the given type.
789 // For types that can't be passed as pointer-to-type as a kernel argument,
790 // use a substitute base type of the same size.
writer_function(const TypeInfo & ti)791 static std::string writer_function(const TypeInfo& ti)
792 {
793     static char writer_src[MAX_STR];
794     int num_printed = 0;
795     if ( !ti.is_atomic() ) {
796         const char* writer_template_normal =
797             "kernel void writer( global %s* src, uint idx ) {\n"
798             "  var = from_buf(src[0]);\n"
799             "  g_var = from_buf(src[1]);\n"
800             "  a_var[0] = from_buf(src[2]);\n"
801             "  a_var[1] = from_buf(src[3]);\n"
802             "  p_var = a_var + idx;\n"
803             "}\n\n";
804         num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
805     } else {
806         const char* writer_template_atomic =
807             "kernel void writer( global %s* src, uint idx ) {\n"
808             "  atomic_store( &var, from_buf(src[0]) );\n"
809             "  atomic_store( &g_var, from_buf(src[1]) );\n"
810             "  atomic_store( &a_var[0], from_buf(src[2]) );\n"
811             "  atomic_store( &a_var[1], from_buf(src[3]) );\n"
812             "  p_var = a_var + idx;\n"
813             "}\n\n";
814         num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
815     }
816     assert( num_printed < sizeof(writer_src) );
817     std::string result = writer_src;
818     return result;
819 }
820 
821 
822 // Return source text for teh reader function for the given type.
823 // For types that can't be passed as pointer-to-type as a kernel argument,
824 // use a substitute base type of the same size.
reader_function(const TypeInfo & ti)825 static std::string reader_function(const TypeInfo& ti)
826 {
827     static char reader_src[MAX_STR];
828     int num_printed = 0;
829     if ( !ti.is_atomic() ) {
830         const char* reader_template_normal =
831             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
832             "  *p_var = from_buf(ptr_write_val);\n"
833             "  dest[0] = to_buf(var);\n"
834             "  dest[1] = to_buf(g_var);\n"
835             "  dest[2] = to_buf(a_var[0]);\n"
836             "  dest[3] = to_buf(a_var[1]);\n"
837             "}\n\n";
838         num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
839     } else {
840         const char* reader_template_atomic =
841             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
842             "  atomic_store( p_var, from_buf(ptr_write_val) );\n"
843             "  dest[0] = to_buf( atomic_load( &var ) );\n"
844             "  dest[1] = to_buf( atomic_load( &g_var ) );\n"
845             "  dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
846             "  dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
847             "}\n\n";
848         num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
849     }
850     assert( num_printed < sizeof(reader_src) );
851     std::string result = reader_src;
852     return result;
853 }
854 
855 // Check that all globals where appropriately default-initialized.
check_global_initialization(cl_context context,cl_program program,cl_command_queue queue)856 static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
857 {
858     int status = CL_SUCCESS;
859 
860     // Create a buffer on device to store a unique integer.
861     cl_int is_init_valid = 0;
862     clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
863     test_error_ret(status, "Failed to allocate buffer", status);
864 
865     // Create, setup and invoke kernel.
866     clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
867     test_error_ret(status, "Failed to create global_check kernel", status);
868     status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
869     test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
870     const cl_uint work_dim = 1;
871     const size_t global_work_offset[] = { 0 };
872     const size_t global_work_size[] = { 1 };
873     status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
874     test_error_ret(status, "Failed to run global_check kernel", status);
875     status = clFinish(queue);
876     test_error_ret(status, "clFinish() failed", status);
877 
878     // Read back the memory buffer from the device.
879     status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
880     test_error_ret(status, "Failed to read buffer from device", status);
881     if (is_init_valid == 0) {
882         log_error("Unexpected default values were detected");
883         return 1;
884     }
885 
886     return CL_SUCCESS;
887 }
888 
889 // Check write-then-read.
l_write_read(cl_device_id device,cl_context context,cl_command_queue queue)890 static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
891 {
892     int status = CL_SUCCESS;
893     int itype;
894 
895     RandomSeed rand_state( gRandomSeed );
896 
897     for ( itype = 0; itype < num_type_info ; itype++ ) {
898         status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
899         FLUSH;
900     }
901 
902     return status;
903 }
904 
l_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)905 static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
906 {
907     int err = CL_SUCCESS;
908     std::string type_name( ti.get_name() );
909     const char* tn = type_name.c_str();
910     log_info("  %s ",tn);
911 
912     StringTable ksrc;
913     ksrc.add( l_get_fp64_pragma() );
914     ksrc.add( l_get_cles_int64_pragma() );
915     if (ti.is_atomic_64bit())
916       ksrc.add( l_get_int64_atomic_pragma() );
917     ksrc.add( conversion_functions(ti) );
918     ksrc.add( global_decls(ti,false) );
919     ksrc.add( global_check_function(ti) );
920     ksrc.add( writer_function(ti) );
921     ksrc.add( reader_function(ti) );
922 
923     int status = CL_SUCCESS;
924     clProgramWrapper program;
925     clKernelWrapper writer;
926 
927     status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
928     test_error_ret(status,"Failed to create program for read-after-write test",status);
929 
930     clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
931     test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
932 
933     // Check size query.
934     size_t used_bytes = 0;
935     status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
936     test_error_ret(status,"Failed to query global variable total size",status);
937     size_t expected_used_bytes =
938         (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
939         + ( l_64bit_device ? 8 : 4 ); // The pointer
940     if ( used_bytes < expected_used_bytes ) {
941         log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
942         err |= 1;
943     }
944 
945     err |= check_global_initialization(context, program, queue);
946 
947     // We need to create 5 random values of the given type,
948     // and read 4 of them back.
949     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
950     const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
951     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
952     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
953 
954     clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
955     test_error_ret(status,"Failed to allocate write buffer",status);
956     clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
957     test_error_ret(status,"Failed to allocate read buffer",status);
958 
959     status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
960     status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
961 
962     // Boolean random data needs to be massaged a bit more.
963     const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
964     unsigned bool_iter = 0;
965 
966     for ( int iround = 0; iround < num_rounds ; iround++ ) {
967         for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
968             // Generate new random data to push through.
969             // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
970 
971             cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
972 
973             if ( ti.is_bool() ) {
974                 // For boolean, random data cast to bool isn't very random.
975                 // So use the bottom bit of bool_value_iter to get true
976                 // diversity.
977                 for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
978                     write_data[value_idx] = (1<<value_idx) & bool_iter;
979                     //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
980                 }
981                 bool_iter++;
982             } else {
983                 l_set_randomly( write_data, write_data_size, rand_state );
984             }
985             status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
986 
987             // The value to write via the pointer should be taken from the
988             // 5th typed slot of the write_data.
989             status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
990 
991             // Determine the expected values.
992             cl_uchar expected[read_data_size];
993             memset( expected, -1, sizeof(expected) );
994             l_copy( expected, 0, write_data, 0, ti );
995             l_copy( expected, 1, write_data, 1, ti );
996             l_copy( expected, 2, write_data, 2, ti );
997             l_copy( expected, 3, write_data, 3, ti );
998             // But we need to take into account the value from the pointer write.
999             // The 2 represents where the "a" array values begin in our read-back.
1000             l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
1001 
1002             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1003 
1004             if ( ti.is_bool() ) {
1005                 // Collapse down to one bit.
1006                 for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
1007             }
1008 
1009             cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1010             memset(read_data, -1, read_data_size);
1011             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1012 
1013             // Now run the kernel
1014             const size_t one = 1;
1015             status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1016             status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1017             status = clFinish(queue); test_error_ret(status,"finish",status);
1018 
1019             read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1020 
1021             if ( ti.is_bool() ) {
1022                 // Collapse down to one bit.
1023                 for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
1024             }
1025 
1026             // Compare only the valid returned bytes.
1027             int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
1028             // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
1029             err |= compare_result;
1030 
1031             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1032 
1033             if ( err ) break;
1034         }
1035     }
1036 
1037     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1038     align_free(write_data);
1039     align_free(read_data);
1040     return err;
1041 }
1042 
1043 
1044 // Check initialization, then, read, then write, then read.
l_init_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1045 static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
1046 {
1047     int status = CL_SUCCESS;
1048     int itype;
1049 
1050     RandomSeed rand_state( gRandomSeed );
1051 
1052     for ( itype = 0; itype < num_type_info ; itype++ ) {
1053         status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
1054     }
1055     return status;
1056 }
l_init_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1057 static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
1058 {
1059     int err = CL_SUCCESS;
1060     std::string type_name( ti.get_name() );
1061     const char* tn = type_name.c_str();
1062     log_info("  %s ",tn);
1063 
1064     StringTable ksrc;
1065     ksrc.add( l_get_fp64_pragma() );
1066     ksrc.add( l_get_cles_int64_pragma() );
1067     if (ti.is_atomic_64bit())
1068       ksrc.add( l_get_int64_atomic_pragma() );
1069     ksrc.add( conversion_functions(ti) );
1070     ksrc.add( global_decls(ti,true) );
1071     ksrc.add( writer_function(ti) );
1072     ksrc.add( reader_function(ti) );
1073 
1074     int status = CL_SUCCESS;
1075     clProgramWrapper program;
1076     clKernelWrapper writer;
1077 
1078     status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
1079     test_error_ret(status,"Failed to create program for init-read-after-write test",status);
1080 
1081     clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1082     test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
1083 
1084     // Check size query.
1085     size_t used_bytes = 0;
1086     status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1087     test_error_ret(status,"Failed to query global variable total size",status);
1088     size_t expected_used_bytes =
1089         (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
1090         + ( l_64bit_device ? 8 : 4 ); // The pointer
1091     if ( used_bytes < expected_used_bytes ) {
1092         log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
1093         err |= 1;
1094     }
1095 
1096     // We need to create 5 random values of the given type,
1097     // and read 4 of them back.
1098     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1099     const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
1100 
1101     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1102     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1103     clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
1104     test_error_ret(status,"Failed to allocate write buffer",status);
1105     clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
1106     test_error_ret(status,"Failed to allocate read buffer",status);
1107 
1108     status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
1109     status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
1110 
1111     // Boolean random data needs to be massaged a bit more.
1112     const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
1113     unsigned bool_iter = 0;
1114 
1115     // We need to count iterations.  We do something *different on the
1116     // first iteration, to ensure we actually pick up the initialized
1117     // values.
1118     unsigned iteration = 0;
1119 
1120     for ( int iround = 0; iround < num_rounds ; iround++ ) {
1121         for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
1122             // Generate new random data to push through.
1123             // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
1124 
1125             cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
1126 
1127             if ( ti.is_bool() ) {
1128                 // For boolean, random data cast to bool isn't very random.
1129                 // So use the bottom bit of bool_value_iter to get true
1130                 // diversity.
1131                 for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
1132                     write_data[value_idx] = (1<<value_idx) & bool_iter;
1133                     //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
1134                 }
1135                 bool_iter++;
1136             } else {
1137                 l_set_randomly( write_data, write_data_size, rand_state );
1138             }
1139             status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
1140 
1141             if ( !iteration ) {
1142                 // On first iteration, the value we write via the last arg
1143                 // to the "reader" function is 0.
1144                 // It's way easier to code the test this way.
1145                 ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
1146             }
1147 
1148             // The value to write via the pointer should be taken from the
1149             // 5th typed slot of the write_data.
1150             status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
1151 
1152             // Determine the expected values.
1153             cl_uchar expected[read_data_size];
1154             memset( expected, -1, sizeof(expected) );
1155             if ( iteration ) {
1156                 l_copy( expected, 0, write_data, 0, ti );
1157                 l_copy( expected, 1, write_data, 1, ti );
1158                 l_copy( expected, 2, write_data, 2, ti );
1159                 l_copy( expected, 3, write_data, 3, ti );
1160                 // But we need to take into account the value from the pointer write.
1161                 // The 2 represents where the "a" array values begin in our read-back.
1162                 // But we need to take into account the value from the pointer write.
1163                 l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
1164             } else {
1165                 // On first iteration, expect these initialized values!
1166                 // See the decls_template_with_init above.
1167                 ti.init( expected, 0 );
1168                 ti.init( expected + ti.get_size(), 1 );
1169                 ti.init( expected + 2*ti.get_size(), 1 );
1170                 // Emulate the effect of the write via the pointer.
1171                 // The value is 0, not 1 (see above).
1172                 // The pointer is always initialized to the second element
1173                 // of the array. So it goes into slot 3 of the "expected" array.
1174                 ti.init( expected + 3*ti.get_size(), 0 );
1175             }
1176 
1177             if ( ti.is_bool() ) {
1178                 // Collapse down to one bit.
1179                 for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
1180             }
1181 
1182             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1183 
1184             cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1185             memset( read_data, -1, read_data_size );
1186             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1187 
1188             // Now run the kernel
1189             const size_t one = 1;
1190             if ( iteration ) {
1191                 status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1192             } else {
1193                 // On first iteration, we should be picking up the
1194                 // initialized value. So don't enqueue the writer.
1195             }
1196             status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1197             status = clFinish(queue); test_error_ret(status,"finish",status);
1198 
1199             read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1200 
1201             if ( ti.is_bool() ) {
1202                 // Collapse down to one bit.
1203                 for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
1204             }
1205 
1206             // Compare only the valid returned bytes.
1207             //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
1208             int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
1209             //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
1210             err |= compare_result;
1211 
1212             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1213 
1214             if ( err ) break;
1215 
1216             iteration++;
1217         }
1218     }
1219 
1220     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1221     align_free(write_data);
1222     align_free(read_data);
1223 
1224     return err;
1225 }
1226 
1227 
1228 // Check that we can make at least one variable with size
1229 // max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
l_capacity(cl_device_id device,cl_context context,cl_command_queue queue,size_t max_size)1230 static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
1231 {
1232     int err = CL_SUCCESS;
1233     // Just test one type.
1234     const TypeInfo ti( l_find_type("uchar") );
1235     log_info(" l_capacity...");
1236 
1237     const char prog_src_template[] =
1238 #if defined(_WIN32)
1239         "uchar var[%Iu];\n\n"
1240 #else
1241         "uchar var[%zu];\n\n"
1242 #endif
1243         "kernel void get_max_size( global ulong* size_ret ) {\n"
1244 #if defined(_WIN32)
1245         "  *size_ret = (ulong)%Iu;\n"
1246 #else
1247         "  *size_ret = (ulong)%zu;\n"
1248 #endif
1249         "}\n\n"
1250         "kernel void writer( global uchar* src ) {\n"
1251         "  var[get_global_id(0)] = src[get_global_linear_id()];\n"
1252         "}\n\n"
1253         "kernel void reader( global uchar* dest ) {\n"
1254         "  dest[get_global_linear_id()] = var[get_global_id(0)];\n"
1255         "}\n\n";
1256     char prog_src[MAX_STR];
1257     int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
1258     assert( num_printed < MAX_STR ); // or increase MAX_STR
1259 
1260     StringTable ksrc;
1261     ksrc.add( prog_src );
1262 
1263     int status = CL_SUCCESS;
1264     clProgramWrapper program;
1265     clKernelWrapper get_max_size;
1266 
1267     status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
1268     test_error_ret(status,"Failed to create program for capacity test",status);
1269 
1270     // Check size query.
1271     size_t used_bytes = 0;
1272     status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1273     test_error_ret(status,"Failed to query global variable total size",status);
1274     if ( used_bytes < max_size ) {
1275         log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
1276         err |= 1;
1277     }
1278 
1279     // Prepare to execute
1280     clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
1281     test_error_ret(status,"Failed to create writer kernel for capacity test",status);
1282     clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1283     test_error_ret(status,"Failed to create reader kernel for capacity test",status);
1284 
1285     cl_ulong max_size_ret = 0;
1286     const size_t arr_size = 10*1024*1024;
1287     cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
1288 
1289     if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
1290 
1291     clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
1292     test_error_ret(status,"Failed to allocate size query buffer",status);
1293     clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
1294     test_error_ret(status,"Failed to allocate write buffer",status);
1295 
1296     status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
1297     status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
1298     status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
1299 
1300     // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
1301     const size_t one = 1;
1302     status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
1303     status = clFinish(queue); test_error_ret(status,"finish",status);
1304 
1305     cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
1306     if ( max_size_ret != max_size ) {
1307         log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
1308                 (unsigned long long) max_size_ret,
1309                 (unsigned long long) max_size );
1310         err |= 1;
1311     }
1312     clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
1313 
1314     RandomSeed rand_state_write( gRandomSeed );
1315     for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
1316         size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1317         l_set_randomly( buffer, curr_size, rand_state_write );
1318         status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
1319         status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1320     status = clFinish(queue); test_error_ret(status,"finish",status);
1321     }
1322 
1323     RandomSeed rand_state_read( gRandomSeed );
1324     for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
1325         size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1326         status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1327         cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
1328         l_set_randomly( buffer, curr_size, rand_state_read );
1329         err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
1330         clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
1331     }
1332 
1333     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1334     align_free(buffer);
1335 
1336     return err;
1337 }
1338 
1339 
1340 // Check operation on a user type.
l_user_type(cl_device_id device,cl_context context,cl_command_queue queue,bool separate_compile)1341 static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
1342 {
1343     int err = CL_SUCCESS;
1344     // Just test one type.
1345     const TypeInfo ti( l_find_type("uchar") );
1346     log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
1347 
1348     if ( separate_compile && ! l_linker_available ) {
1349         log_info("Separate compilation is not supported. Skipping test\n");
1350         return err;
1351     }
1352 
1353     const char type_src[] =
1354         "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
1355     const char def_src[] =
1356         "my_struct_t var = { 'a', 42 };\n\n";
1357     const char decl_src[] =
1358         "extern my_struct_t var;\n\n";
1359 
1360     // Don't use a host struct. We can't guarantee that the host
1361     // compiler has the same structure layout as the device compiler.
1362     const char writer_src[] =
1363         "kernel void writer( uchar c, uint i ) {\n"
1364         "  var.c = c;\n"
1365         "  var.i = i;\n"
1366         "}\n\n";
1367     const char reader_src[] =
1368         "kernel void reader( global uchar* C, global uint* I ) {\n"
1369         "  *C = var.c;\n"
1370         "  *I = var.i;\n"
1371         "}\n\n";
1372 
1373     clProgramWrapper program;
1374 
1375     if ( separate_compile ) {
1376         // Separate compilation flow.
1377         StringTable wksrc;
1378         wksrc.add( type_src );
1379         wksrc.add( def_src );
1380         wksrc.add( writer_src );
1381 
1382         StringTable rksrc;
1383         rksrc.add( type_src );
1384         rksrc.add( decl_src );
1385         rksrc.add( reader_src );
1386 
1387         int status = CL_SUCCESS;
1388         clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
1389         test_error_ret(status,"Failed to create writer program for user type test",status);
1390 
1391         status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
1392         if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
1393         {
1394             print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
1395             return status;
1396         }
1397 
1398         clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
1399         test_error_ret(status,"Failed to create reader program for user type test",status);
1400 
1401         status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
1402         if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
1403         {
1404             print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
1405             return status;
1406         }
1407 
1408         cl_program progs[2];
1409         progs[0] = writer_program;
1410         progs[1] = reader_program;
1411 
1412         program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
1413         if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
1414         {
1415             print_build_log(program, 1, &device, 0, NULL, NULL, "");
1416             return status;
1417         }
1418     } else {
1419         // Single compilation flow.
1420         StringTable ksrc;
1421         ksrc.add( type_src );
1422         ksrc.add( def_src );
1423         ksrc.add( writer_src );
1424         ksrc.add( reader_src );
1425 
1426         int status = CL_SUCCESS;
1427 
1428         status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
1429         if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
1430         {
1431             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
1432             return status;
1433         }
1434 
1435         status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
1436         if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
1437         {
1438             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
1439             return status;
1440         }
1441     }
1442 
1443 
1444     // Check size query.
1445     size_t used_bytes = 0;
1446     int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1447     test_error_ret(status,"Failed to query global variable total size",status);
1448     size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
1449     if ( used_bytes < expected_size ) {
1450         log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
1451         err |= 1;
1452     }
1453 
1454     // Prepare to execute
1455     clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
1456     test_error_ret(status,"Failed to create writer kernel for user type test",status);
1457     clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1458     test_error_ret(status,"Failed to create reader kernel for user type test",status);
1459 
1460     // Set up data.
1461     cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
1462     cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
1463 
1464     clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
1465     test_error_ret(status,"Failed to allocate uchar buffer",status);
1466     clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
1467     test_error_ret(status,"Failed to allocate uint buffer",status);
1468 
1469     status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
1470     status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
1471 
1472     cl_uchar expected_uchar = 'a';
1473     cl_uint expected_uint = 42;
1474     for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
1475         // Read back data
1476         *uchar_data = -1;
1477         *uint_data = -1;
1478         const size_t one = 1;
1479         status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1480         status = clFinish(queue); test_error_ret(status,"finish",status);
1481 
1482         cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
1483         cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
1484 
1485         if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
1486             log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
1487                     iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
1488             err |= 1;
1489         }
1490 
1491         clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0);
1492         clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0);
1493 
1494         // Mutate the data.
1495         expected_uchar++;
1496         expected_uint++;
1497 
1498         // Write the new values into persistent store.
1499         *uchar_data = expected_uchar;
1500         *uint_data = expected_uint;
1501         status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
1502         status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
1503         status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1504         status = clFinish(queue); test_error_ret(status,"finish",status);
1505     }
1506 
1507     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1508     align_free(uchar_data);
1509     align_free(uint_data);
1510     return err;
1511 }
1512 
1513 // Determines whether its valid to skip this test based on the driver version
1514 // and the features it optionally supports.
1515 // Whether the test should be skipped is writen into the out paramter skip.
1516 // The check returns an error code for the clDeviceInfo query.
should_skip(cl_device_id device,cl_bool & skip)1517 static cl_int should_skip(cl_device_id device, cl_bool& skip)
1518 {
1519     // Assume we can't skip to begin with.
1520     skip = CL_FALSE;
1521 
1522     // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
1523     // to test for 3.0 since that is when program scope global variables become
1524     // optional.
1525     if (get_device_cl_version(device) >= Version(3, 0))
1526     {
1527         size_t max_global_variable_size{};
1528         test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
1529                                    sizeof(max_global_variable_size),
1530                                    &max_global_variable_size, nullptr),
1531                    "clGetDeviceInfo failed");
1532         skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
1533     }
1534     return CL_SUCCESS;
1535 }
1536 
1537 ////////////////////
1538 // Global functions
1539 
1540 
1541 // Test support for variables at program scope. Miscellaneous
test_progvar_prog_scope_misc(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1542 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1543 {
1544     cl_bool skip{ CL_FALSE };
1545     auto error = should_skip(device, skip);
1546     if (CL_SUCCESS != error)
1547     {
1548         return TEST_FAIL;
1549     }
1550     if (skip)
1551     {
1552         log_info("Skipping progvar_prog_scope_misc since it is optionally not "
1553                  "supported on this device\n");
1554         return TEST_SKIPPED_ITSELF;
1555     }
1556     size_t max_size = 0;
1557     size_t pref_size = 0;
1558 
1559     cl_int err = CL_SUCCESS;
1560 
1561     err = l_get_device_info( device, &max_size, &pref_size );
1562     err |= l_build_type_table( device );
1563 
1564     err |= l_capacity( device, context, queue, max_size );
1565     err |= l_user_type( device, context, queue, false );
1566     err |= l_user_type( device, context, queue, true );
1567 
1568     return err;
1569 }
1570 
1571 
1572 // Test support for variables at program scope. Unitialized data
test_progvar_prog_scope_uninit(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1573 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1574 {
1575     cl_bool skip{ CL_FALSE };
1576     auto error = should_skip(device, skip);
1577     if (CL_SUCCESS != error)
1578     {
1579         return TEST_FAIL;
1580     }
1581     if (skip)
1582     {
1583         log_info(
1584             "Skipping progvar_prog_scope_uninit since it is optionally not "
1585             "supported on this device\n");
1586         return TEST_SKIPPED_ITSELF;
1587     }
1588     size_t max_size = 0;
1589     size_t pref_size = 0;
1590 
1591     cl_int err = CL_SUCCESS;
1592 
1593     err = l_get_device_info( device, &max_size, &pref_size );
1594     err |= l_build_type_table( device );
1595 
1596     err |= l_write_read( device, context, queue );
1597 
1598     return err;
1599 }
1600 
1601 // Test support for variables at program scope. Initialized data.
test_progvar_prog_scope_init(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1602 int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1603 {
1604     cl_bool skip{ CL_FALSE };
1605     auto error = should_skip(device, skip);
1606     if (CL_SUCCESS != error)
1607     {
1608         return TEST_FAIL;
1609     }
1610     if (skip)
1611     {
1612         log_info("Skipping progvar_prog_scope_init since it is optionally not "
1613                  "supported on this device\n");
1614         return TEST_SKIPPED_ITSELF;
1615     }
1616     size_t max_size = 0;
1617     size_t pref_size = 0;
1618 
1619     cl_int err = CL_SUCCESS;
1620 
1621     err = l_get_device_info( device, &max_size, &pref_size );
1622     err |= l_build_type_table( device );
1623 
1624     err |= l_init_write_read( device, context, queue );
1625 
1626     return err;
1627 }
1628 
1629 
1630 // A simple test for support of static variables inside a kernel.
test_progvar_func_scope(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1631 int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1632 {
1633     cl_bool skip{ CL_FALSE };
1634     auto error = should_skip(device, skip);
1635     if (CL_SUCCESS != error)
1636     {
1637         return TEST_FAIL;
1638     }
1639     if (skip)
1640     {
1641         log_info("Skipping progvar_func_scope since it is optionally not "
1642                  "supported on this device\n");
1643         return TEST_SKIPPED_ITSELF;
1644     }
1645     size_t max_size = 0;
1646     size_t pref_size = 0;
1647 
1648     cl_int err = CL_SUCCESS;
1649 
1650     // Deliberately have two variables with the same name but in different
1651     // scopes.
1652     // Also, use a large initialized structure in both cases.
1653     const char prog_src[] =
1654         "typedef struct { char c; int16 i; } mystruct_t;\n"
1655         "kernel void test_bump( global int* value, int which ) {\n"
1656         "  if ( which ) {\n"
1657         // Explicit address space.
1658         // Last element set to 0
1659         "     static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
1660         "     *value = persistent.i.sf++;\n"
1661         "  } else {\n"
1662         // Implicitly global
1663         // Last element set to 100
1664         "     static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
1665         "     *value = persistent.i.sf++;\n"
1666         "  }\n"
1667         "}\n";
1668 
1669     StringTable ksrc;
1670     ksrc.add( prog_src );
1671 
1672     int status = CL_SUCCESS;
1673     clProgramWrapper program;
1674     clKernelWrapper test_bump;
1675 
1676     status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
1677     test_error_ret(status, "Failed to create program for function static variable test", status);
1678 
1679     // Check size query.
1680     size_t used_bytes = 0;
1681     status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1682     test_error_ret(status,"Failed to query global variable total size",status);
1683     size_t expected_size = 2 * sizeof(cl_int); // Two ints.
1684     if ( used_bytes < expected_size ) {
1685         log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
1686         err |= 1;
1687     }
1688 
1689     // Prepare the data.
1690     cl_int counter_value = 0;
1691     clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
1692     test_error_ret(status,"Failed to allocate counter query buffer",status);
1693 
1694     status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
1695 
1696     // Go a few rounds, alternating between the two counters in the kernel.
1697 
1698     // Same as initial values in kernel.
1699     // But "true" which increments the 0-based counter, and "false" which
1700     // increments the 100-based counter.
1701     cl_int expected_counter[2] = { 100, 0 };
1702 
1703     const size_t one = 1;
1704     for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
1705         for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
1706             status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
1707             status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
1708             status = clFinish(queue); test_error_ret(status,"finish",status);
1709 
1710             cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
1711 
1712             if ( counter_value != expected_counter[iwhich] ) {
1713                 log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
1714                         iround, iwhich, expected_counter[iwhich], counter_value );
1715                 err |= 1;
1716             }
1717             expected_counter[iwhich]++; // Emulate behaviour of the kernel.
1718 
1719             clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
1720         }
1721     }
1722 
1723     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1724 
1725     return err;
1726 }
1727