1 //
2 // Copyright (c) 2017, 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 // Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
19 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
20
21 #define FLUSH fflush(stdout)
22
23 #define MAX_STR 16*1024
24
25 #define ALIGNMENT 128
26
27 #define OPTIONS "-cl-std=CL2.0"
28
29 // NUM_ROUNDS must be at least 1.
30 // It determines how many sets of random data we push through the global
31 // variables.
32 #define NUM_ROUNDS 1
33
34 // This is a shared property of the writer and reader kernels.
35 #define NUM_TESTED_VALUES 5
36
37 // TODO: pointer-to-half (and its vectors)
38 // TODO: union of...
39
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <cstring>
44 #include <string>
45 #include <vector>
46 #include <cassert>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include "harness/typeWrappers.h"
50 #include "harness/errorHelpers.h"
51 #include "harness/mt19937.h"
52 #include "procs.h"
53
54
55 ////////////////////
56 // Device capabilities
57 static int l_has_double = 0;
58 static int l_has_half = 0;
59 static int l_64bit_device = 0;
60 static int l_has_int64_atomics = 0;
61 static int l_has_intptr_atomics = 0;
62 static int l_has_cles_int64 = 0;
63
64 static int l_host_is_big_endian = 1;
65
66 static size_t l_max_global_id0 = 0;
67 static cl_bool l_linker_available = false;
68
69 #define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
70
71 ////////////////////
72 // Info about types we can use for program scope variables.
73
74
75 class TypeInfo {
76
77 public:
TypeInfo()78 TypeInfo() :
79 name(""),
80 m_buf_elem_type(""),
81 m_is_vecbase(false),
82 m_is_atomic(false),
83 m_is_like_size_t(false),
84 m_is_bool(false),
85 m_elem_type(0), m_num_elem(0),
86 m_size(0),
87 m_value_size(0)
88 {}
TypeInfo(const char * name_arg)89 TypeInfo(const char* name_arg) :
90 name(name_arg),
91 m_buf_elem_type(name_arg),
92 m_is_vecbase(false),
93 m_is_atomic(false),
94 m_is_like_size_t(false),
95 m_is_bool(false),
96 m_elem_type(0), m_num_elem(0),
97 m_size(0),
98 m_value_size(0)
99 { }
100
101 // Vectors
TypeInfo(TypeInfo * elem_type,int num_elem)102 TypeInfo( TypeInfo* elem_type, int num_elem ) :
103 m_is_vecbase(false),
104 m_is_atomic(false),
105 m_is_like_size_t(false),
106 m_is_bool(false),
107 m_elem_type(elem_type),
108 m_num_elem(num_elem)
109 {
110 char the_name[10]; // long enough for longest vector type name "double16"
111 snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
112 this->name = std::string(the_name);
113 this->m_buf_elem_type = std::string(the_name);
114 this->m_value_size = num_elem * elem_type->get_size();
115 if ( m_num_elem == 3 ) {
116 this->m_size = 4 * elem_type->get_size();
117 } else {
118 this->m_size = num_elem * elem_type->get_size();
119 }
120 }
get_name(void) const121 const std::string& get_name(void) const { return name; }
get_name_c_str(void) const122 const char* get_name_c_str(void) const { return name.c_str(); }
set_vecbase(void)123 TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
set_atomic(void)124 TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
set_like_size_t(void)125 TypeInfo& set_like_size_t(void) {
126 this->m_is_like_size_t = true;
127 this->set_size( l_64bit_device ? 8 : 4 );
128 this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
129 return *this;
130 }
set_bool(void)131 TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
set_size(size_t n)132 TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
set_buf_elem_type(const char * name)133 TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
134
elem_type(void) const135 const TypeInfo* elem_type(void) const { return m_elem_type; }
num_elem(void) const136 int num_elem(void) const { return m_num_elem; }
137
is_vecbase(void) const138 bool is_vecbase(void) const {return m_is_vecbase;}
is_atomic(void) const139 bool is_atomic(void) const {return m_is_atomic;}
is_atomic_64bit(void) const140 bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
is_like_size_t(void) const141 bool is_like_size_t(void) const {return m_is_like_size_t;}
is_bool(void) const142 bool is_bool(void) const {return m_is_bool;}
get_size(void) const143 size_t get_size(void) const {return m_size;}
get_value_size(void) const144 size_t get_value_size(void) const {return m_value_size;}
145
146 // When passing values of this type to a kernel, what buffer type
147 // should be used?
get_buf_elem_type(void) const148 const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
149
as_string(const cl_uchar * value_ptr) const150 std::string as_string(const cl_uchar* value_ptr) const {
151 // This method would be shorter if I had a real handle to element
152 // vector type.
153 if ( this->is_bool() ) {
154 std::string result( name );
155 result += "<";
156 result += (*value_ptr ? "true" : "false");
157 result += ", ";
158 char buf[10];
159 sprintf(buf,"%02x",*value_ptr);
160 result += buf;
161 result += ">";
162 return result;
163 } else if ( this->num_elem() ) {
164 std::string result( name );
165 result += "<";
166 for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
167 char buf[MAX_STR];
168 if ( ielem ) result += ", ";
169 for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
170 sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
171 }
172 result += buf;
173 }
174 result += ">";
175 return result;
176 } else {
177 std::string result( name );
178 result += "<";
179 char buf[MAX_STR];
180 for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
181 sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
182 }
183 result += buf;
184 result += ">";
185 return result;
186 }
187 }
188
189 // Initialize the given buffer to a constant value initialized as if it
190 // were from the INIT_VAR macro below.
191 // Only needs to support values 0 and 1.
init(cl_uchar * buf,cl_uchar val) const192 void init( cl_uchar* buf, cl_uchar val) const {
193 if ( this->num_elem() ) {
194 for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
195 // Delegate!
196 this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
197 }
198 } else {
199 init_elem( buf, val );
200 }
201 }
202
203 private:
init_elem(cl_uchar * buf,cl_uchar val) const204 void init_elem( cl_uchar* buf, cl_uchar val ) const {
205 size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
206 memset(buf,0,elem_size);
207 if ( val ) {
208 if ( strstr( name.c_str(), "float" ) ) {
209 *(float*)buf = (float)val;
210 return;
211 }
212 if ( strstr( name.c_str(), "double" ) ) {
213 *(double*)buf = (double)val;
214 return;
215 }
216 if ( this->is_bool() ) { *buf = (bool)val; return; }
217
218 // Write a single character value to the correct spot,
219 // depending on host endianness.
220 if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
221 else *buf = (cl_uchar)val;
222 }
223 }
224 public:
225
dump(FILE * fp) const226 void dump(FILE* fp) const {
227 fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
228 (int)m_size,
229 (int)m_value_size,
230 m_buf_elem_type.c_str() );
231 if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
232 if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
233 if ( this->m_is_bool ) fprintf(fp, " bool");
234 if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
235 if ( this->m_is_atomic ) fprintf(fp, " atomic");
236 fprintf(fp,"\n");
237 fflush(fp);
238 }
239
240 private:
241 std::string name;
242 TypeInfo* m_elem_type;
243 int m_num_elem;
244 bool m_is_vecbase;
245 bool m_is_atomic;
246 bool m_is_like_size_t;
247 bool m_is_bool;
248 size_t m_size; // Number of bytes of storage occupied by this type.
249 size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
250
251 // When passing values of this type to a kernel, what buffer type
252 // should be used?
253 // For most types, it's just itself.
254 // Use a std::string so I don't have to make a copy constructor.
255 std::string m_buf_elem_type;
256 };
257
258
259 #define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
260 #define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
261 #define NUM_PLAIN_TYPES \
262 5 /*boolean and size_t family */ \
263 + NUM_SCALAR_TYPES \
264 + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
265 + 10 /* atomic types */
266
267 // Need room for plain, array, pointer, struct
268 #define MAX_TYPES (4*NUM_PLAIN_TYPES)
269
270 static TypeInfo type_info[MAX_TYPES];
271 static int num_type_info = 0; // Number of valid entries in type_info[]
272
273
274
275
276 // A helper class to form kernel source arguments for clCreateProgramWithSource.
277 class StringTable {
278 public:
StringTable()279 StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
~StringTable()280 ~StringTable() { release_frozen(); }
281
add(std::string s)282 void add(std::string s) { release_frozen(); m_strings.push_back(s); }
283
num_str()284 const size_t num_str() { freeze(); return m_strings.size(); }
strs()285 const char** strs() { freeze(); return m_c_strs; }
lengths()286 const size_t* lengths() { freeze(); return m_lengths; }
287
288 private:
freeze(void)289 void freeze(void) {
290 if ( !m_frozen ) {
291 release_frozen();
292
293 m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
294 m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
295 assert( m_c_strs );
296 assert( m_lengths );
297
298 for ( size_t i = 0; i < m_strings.size() ; i++ ) {
299 m_c_strs[i] = m_strings[i].c_str();
300 m_lengths[i] = strlen(m_c_strs[i]);
301 }
302
303 m_frozen = true;
304 }
305 }
release_frozen(void)306 void release_frozen(void) {
307 if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
308 if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
309 m_frozen = false;
310 }
311
312 typedef std::vector<std::string> strlist_t;
313 strlist_t m_strings;
314 const char** m_c_strs;
315 size_t* m_lengths;
316 bool m_frozen;
317 };
318
319
320 ////////////////////
321 // File scope function declarations
322
323 static void l_load_abilities(cl_device_id device);
324 static const char* l_get_fp64_pragma(void);
325 static const char* l_get_cles_int64_pragma(void);
326 static int l_build_type_table(cl_device_id device);
327
328 static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
329
330 static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
331 static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
332 static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
333
334 static std::string conversion_functions(const TypeInfo& ti);
335 static std::string global_decls(const TypeInfo& ti, bool with_init);
336 static std::string global_check_function(const TypeInfo& ti);
337 static std::string writer_function(const TypeInfo& ti);
338 static std::string reader_function(const TypeInfo& ti);
339
340 static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
341 static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
342
343 static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
344 static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
345
346 static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
347 static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
348
349
350
351 ////////////////////
352 // File scope function definitions
353
print_build_log(cl_program program,cl_uint num_devices,cl_device_id * device_list,cl_uint count,const char ** strings,const size_t * lengths,const char * options)354 static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
355 {
356 cl_uint i;
357 cl_int error;
358 BufferOwningPtr<cl_device_id> devices;
359
360 if(num_devices == 0 || device_list == NULL)
361 {
362 error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
363 test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
364
365 device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
366 devices.reset(device_list);
367
368 memset(device_list, 0, sizeof(cl_device_id) * num_devices);
369
370 error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
371 test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
372 }
373
374 cl_uint z;
375 bool sourcePrinted = false;
376
377 for(z = 0; z < num_devices; z++)
378 {
379 char deviceName[4096] = "";
380 error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
381 check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
382
383 cl_build_status buildStatus;
384 error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
385 check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
386
387 if(buildStatus != CL_BUILD_SUCCESS)
388 {
389 if(!sourcePrinted)
390 {
391 log_error("Build options: %s\n", options);
392 if(count && strings)
393 {
394 log_error("Original source is: ------------\n");
395 for(i = 0; i < count; i++) log_error("%s", strings[i]);
396 }
397 sourcePrinted = true;
398 }
399
400 char statusString[64] = "";
401 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
402 sprintf(statusString, "CL_BUILD_SUCCESS");
403 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
404 sprintf(statusString, "CL_BUILD_NONE");
405 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
406 sprintf(statusString, "CL_BUILD_ERROR");
407 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
408 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
409 else
410 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
411
412 log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
413
414 size_t paramSize = 0;
415 error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize);
416 if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
417
418 std::string log;
419 log.resize(paramSize/sizeof(char));
420
421 error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
422 if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
423 if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
424 else
425 {
426 log_error("Build log:\n", deviceName);
427 log_error("%s\n", log.c_str());
428 }
429 }
430 }
431 return error;
432 }
433
l_load_abilities(cl_device_id device)434 static void l_load_abilities(cl_device_id device)
435 {
436 l_has_half = is_extension_available(device,"cl_khr_fp16");
437 l_has_double = is_extension_available(device,"cl_khr_fp64");
438 l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
439
440 l_has_int64_atomics
441 = is_extension_available(device,"cl_khr_int64_base_atomics")
442 && is_extension_available(device,"cl_khr_int64_extended_atomics");
443
444 {
445 int status = CL_SUCCESS;
446 cl_uint addr_bits = 32;
447 status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
448 l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
449 }
450
451 // 32-bit devices always have intptr atomics.
452 l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
453
454 union { char c[4]; int i; } probe;
455 probe.i = 1;
456 l_host_is_big_endian = !probe.c[0];
457
458 // Determine max global id.
459 {
460 int status = CL_SUCCESS;
461 cl_uint max_dim = 0;
462 status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
463 assert( status == CL_SUCCESS );
464 assert( max_dim > 0 );
465 size_t max_id[3];
466 max_id[0] = 0;
467 status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
468 assert( status == CL_SUCCESS );
469 l_max_global_id0 = max_id[0];
470 }
471
472 { // Is separate compilation supported?
473 int status = CL_SUCCESS;
474 l_linker_available = false;
475 status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
476 assert( status == CL_SUCCESS );
477 }
478 }
479
480
l_get_fp64_pragma(void)481 static const char* l_get_fp64_pragma(void)
482 {
483 return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
484 }
485
l_get_cles_int64_pragma(void)486 static const char* l_get_cles_int64_pragma(void)
487 {
488 return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
489 }
490
l_get_int64_atomic_pragma(void)491 static const char* l_get_int64_atomic_pragma(void)
492 {
493 return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
494 "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
495 }
496
l_build_type_table(cl_device_id device)497 static int l_build_type_table(cl_device_id device)
498 {
499 int status = CL_SUCCESS;
500 size_t iscalar = 0;
501 size_t ivecsize = 0;
502 int vecsizes[] = { 2, 3, 4, 8, 16 };
503 const char* vecbase[] = {
504 "uchar", "char",
505 "ushort", "short",
506 "uint", "int",
507 "ulong", "long",
508 "float",
509 "double"
510 };
511 int vecbase_size[] = {
512 1, 1,
513 2, 2,
514 4, 4,
515 8, 8,
516 4,
517 8
518 };
519 const char* like_size_t[] = {
520 "intptr_t",
521 "uintptr_t",
522 "size_t",
523 "ptrdiff_t"
524 };
525 const char* atomics[] = {
526 "atomic_int", "atomic_uint",
527 "atomic_long", "atomic_ulong",
528 "atomic_float",
529 "atomic_double",
530 };
531 int atomics_size[] = {
532 4, 4,
533 8, 8,
534 4,
535 8
536 };
537 const char* intptr_atomics[] = {
538 "atomic_intptr_t",
539 "atomic_uintptr_t",
540 "atomic_size_t",
541 "atomic_ptrdiff_t"
542 };
543
544 l_load_abilities(device);
545 num_type_info = 0;
546
547 // Boolean.
548 type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
549
550 // Vector types, and the related scalar element types.
551 for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
552 if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
553 if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
554
555 // Scalar
556 TypeInfo* elem_type = type_info + num_type_info++;
557 *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
558
559 // Vector
560 for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
561 type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
562 }
563 }
564
565 // Size_t-like types
566 for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
567 type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
568 }
569
570 // Atomic types.
571 for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
572 if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
573 if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
574
575 // The +7 is used to skip over the "atomic_" prefix.
576 const char* buf_type = atomics[iscalar] + 7;
577 type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
578 }
579 if ( l_has_intptr_atomics ) {
580 for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
581 type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
582 }
583 }
584
585 assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
586
587 #if 0
588 for ( size_t i = 0 ; i < num_type_info ; i++ ) {
589 type_info[ i ].dump(stdout);
590 }
591 exit(0);
592 #endif
593
594 return status;
595 }
596
l_find_type(const char * name)597 static const TypeInfo& l_find_type( const char* name )
598 {
599 auto itr =
600 std::find_if(type_info, type_info + num_type_info,
601 [name](TypeInfo& ti) { return ti.get_name() == name; });
602 assert(itr != type_info + num_type_info);
603 return *itr;
604 }
605
606
607
608 // Populate return parameters for max program variable size, preferred program variable size.
609
l_get_device_info(cl_device_id device,size_t * max_size_ret,size_t * pref_size_ret)610 static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
611 {
612 int err = CL_SUCCESS;
613 size_t return_size = 0;
614
615 err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
616 if ( err != CL_SUCCESS ) {
617 log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
618 return err;
619 }
620 if ( return_size != sizeof(size_t) ) {
621 log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
622 return 1;
623 }
624 if ( return_size != sizeof(size_t) ) {
625 log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
626 return 1;
627 }
628
629 return_size = 0;
630 err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
631 if ( err != CL_SUCCESS ) {
632 log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
633 return err;
634 }
635 if ( return_size != sizeof(size_t) ) {
636 log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
637 return 1;
638 }
639
640 return CL_SUCCESS;
641 }
642
643
l_set_randomly(cl_uchar * buf,size_t buf_size,RandomSeed & rand_state)644 static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
645 {
646 assert( 0 == (buf_size % sizeof(cl_uint) ) );
647 for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
648 *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
649 }
650 #if 0
651 for ( size_t i = 0; i < buf_size ; i++ ) {
652 printf("%02x",buf[i]);
653 }
654 printf("\n");
655 #endif
656 }
657
658 // Return num_value values of the given type.
659 // Returns CL_SUCCESS if they compared as equal.
l_compare(const char * test_name,const cl_uchar * expected,const cl_uchar * received,size_t num_values,const TypeInfo & ti)660 static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
661 {
662 // Compare only the valid returned bytes.
663 for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
664 const cl_uchar* expv = expected + value_idx * ti.get_size();
665 const cl_uchar* gotv = received + value_idx * ti.get_size();
666 if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
667 std::string exp_str = ti.as_string( expv );
668 std::string got_str = ti.as_string( gotv );
669 log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
670 test_name,
671 ti.get_name_c_str(), value_idx,
672 exp_str.c_str(),
673 got_str.c_str() );
674 return 1;
675 }
676 }
677 return CL_SUCCESS;
678 }
679
680 // Copy a target value from src[idx] to dest[idx]
l_copy(cl_uchar * dest,unsigned dest_idx,const cl_uchar * src,unsigned src_idx,const TypeInfo & ti)681 static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
682 {
683 cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
684 const cl_uchar* raw_src = src + src_idx * ti.get_size();
685 memcpy( raw_dest, raw_src, ti.get_value_size() );
686
687 return 0;
688 }
689
690
conversion_functions(const TypeInfo & ti)691 static std::string conversion_functions(const TypeInfo& ti)
692 {
693 std::string result;
694 static char buf[MAX_STR];
695 int num_printed = 0;
696 // The atomic types just use the base type.
697 if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
698 // The type is represented in a buffer by itself.
699 num_printed = snprintf(buf,MAX_STR,
700 "%s from_buf(%s a) { return a; }\n"
701 "%s to_buf(%s a) { return a; }\n",
702 ti.get_buf_elem_type(), ti.get_buf_elem_type(),
703 ti.get_buf_elem_type(), ti.get_buf_elem_type() );
704 } else {
705 // Just use C-style cast.
706 num_printed = snprintf(buf,MAX_STR,
707 "%s from_buf(%s a) { return (%s)a; }\n"
708 "%s to_buf(%s a) { return (%s)a; }\n",
709 ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
710 ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
711 }
712 // Add initializations.
713 if ( ti.is_atomic() ) {
714 num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
715 "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
716 } else {
717 // This cast works even if the target type is a vector type.
718 num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
719 "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
720 }
721 assert( num_printed < MAX_STR ); // or increase MAX_STR
722 result = buf;
723 return result;
724 }
725
global_decls(const TypeInfo & ti,bool with_init)726 static std::string global_decls(const TypeInfo& ti, bool with_init )
727 {
728 const char* tn = ti.get_name_c_str();
729 const char* vol = (ti.is_atomic() ? " volatile " : " ");
730 static char decls[MAX_STR];
731 int num_printed = 0;
732 if ( with_init ) {
733 const char *decls_template_with_init =
734 "%s %s var = INIT_VAR(0);\n"
735 "global %s %s g_var = INIT_VAR(1);\n"
736 "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
737 "volatile global %s %s* p_var = &a_var[1];\n\n";
738 num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
739 vol,tn,vol,tn,vol,tn,vol,tn);
740 } else {
741 const char *decls_template_no_init =
742 "%s %s var;\n"
743 "global %s %s g_var;\n"
744 "%s %s a_var[2];\n"
745 "global %s %s* p_var;\n\n";
746 num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
747 vol,tn,vol,tn,vol,tn,vol,tn);
748 }
749 assert( num_printed < sizeof(decls) );
750 return std::string(decls);
751 }
752
753 // Return the source code for the "global_check" function for the given type.
754 // This function checks that all program-scope variables have appropriate
755 // initial values when no explicit initializer is used. If all tests pass the
756 // kernel writes a non-zero value to its output argument, otherwise it writes
757 // zero.
global_check_function(const TypeInfo & ti)758 static std::string global_check_function(const TypeInfo& ti)
759 {
760 const std::string type_name = ti.get_buf_elem_type();
761
762 // all() should only be used on vector inputs. For scalar comparison, the
763 // result of the equality operator can be used as a bool value.
764 const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
765 const std::string is_equality_true = is_scalar ? "" : "all";
766
767 std::string code = "kernel void global_check(global int* out) {\n";
768 code += " const " + type_name + " zero = ((" + type_name + ")0);\n";
769 code += " bool status = true;\n";
770 if (ti.is_atomic()) {
771 code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
772 code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
773 code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
774 code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
775 } else {
776 code += " status &= " + is_equality_true + "(var == zero);\n";
777 code += " status &= " + is_equality_true + "(g_var == zero);\n";
778 code += " status &= " + is_equality_true + "(a_var[0] == zero);\n";
779 code += " status &= " + is_equality_true + "(a_var[1] == zero);\n";
780 }
781 code += " status &= (p_var == NULL);\n";
782 code += " *out = status ? 1 : 0;\n";
783 code += "}\n\n";
784
785 return code;
786 }
787
788 // Return the source text for the writer function for the given type.
789 // For types that can't be passed as pointer-to-type as a kernel argument,
790 // use a substitute base type of the same size.
writer_function(const TypeInfo & ti)791 static std::string writer_function(const TypeInfo& ti)
792 {
793 static char writer_src[MAX_STR];
794 int num_printed = 0;
795 if ( !ti.is_atomic() ) {
796 const char* writer_template_normal =
797 "kernel void writer( global %s* src, uint idx ) {\n"
798 " var = from_buf(src[0]);\n"
799 " g_var = from_buf(src[1]);\n"
800 " a_var[0] = from_buf(src[2]);\n"
801 " a_var[1] = from_buf(src[3]);\n"
802 " p_var = a_var + idx;\n"
803 "}\n\n";
804 num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
805 } else {
806 const char* writer_template_atomic =
807 "kernel void writer( global %s* src, uint idx ) {\n"
808 " atomic_store( &var, from_buf(src[0]) );\n"
809 " atomic_store( &g_var, from_buf(src[1]) );\n"
810 " atomic_store( &a_var[0], from_buf(src[2]) );\n"
811 " atomic_store( &a_var[1], from_buf(src[3]) );\n"
812 " p_var = a_var + idx;\n"
813 "}\n\n";
814 num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
815 }
816 assert( num_printed < sizeof(writer_src) );
817 std::string result = writer_src;
818 return result;
819 }
820
821
822 // Return source text for teh reader function for the given type.
823 // For types that can't be passed as pointer-to-type as a kernel argument,
824 // use a substitute base type of the same size.
reader_function(const TypeInfo & ti)825 static std::string reader_function(const TypeInfo& ti)
826 {
827 static char reader_src[MAX_STR];
828 int num_printed = 0;
829 if ( !ti.is_atomic() ) {
830 const char* reader_template_normal =
831 "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
832 " *p_var = from_buf(ptr_write_val);\n"
833 " dest[0] = to_buf(var);\n"
834 " dest[1] = to_buf(g_var);\n"
835 " dest[2] = to_buf(a_var[0]);\n"
836 " dest[3] = to_buf(a_var[1]);\n"
837 "}\n\n";
838 num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
839 } else {
840 const char* reader_template_atomic =
841 "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
842 " atomic_store( p_var, from_buf(ptr_write_val) );\n"
843 " dest[0] = to_buf( atomic_load( &var ) );\n"
844 " dest[1] = to_buf( atomic_load( &g_var ) );\n"
845 " dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
846 " dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
847 "}\n\n";
848 num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
849 }
850 assert( num_printed < sizeof(reader_src) );
851 std::string result = reader_src;
852 return result;
853 }
854
855 // Check that all globals where appropriately default-initialized.
check_global_initialization(cl_context context,cl_program program,cl_command_queue queue)856 static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
857 {
858 int status = CL_SUCCESS;
859
860 // Create a buffer on device to store a unique integer.
861 cl_int is_init_valid = 0;
862 clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
863 test_error_ret(status, "Failed to allocate buffer", status);
864
865 // Create, setup and invoke kernel.
866 clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
867 test_error_ret(status, "Failed to create global_check kernel", status);
868 status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
869 test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
870 const cl_uint work_dim = 1;
871 const size_t global_work_offset[] = { 0 };
872 const size_t global_work_size[] = { 1 };
873 status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
874 test_error_ret(status, "Failed to run global_check kernel", status);
875 status = clFinish(queue);
876 test_error_ret(status, "clFinish() failed", status);
877
878 // Read back the memory buffer from the device.
879 status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
880 test_error_ret(status, "Failed to read buffer from device", status);
881 if (is_init_valid == 0) {
882 log_error("Unexpected default values were detected");
883 return 1;
884 }
885
886 return CL_SUCCESS;
887 }
888
889 // Check write-then-read.
l_write_read(cl_device_id device,cl_context context,cl_command_queue queue)890 static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
891 {
892 int status = CL_SUCCESS;
893 int itype;
894
895 RandomSeed rand_state( gRandomSeed );
896
897 for ( itype = 0; itype < num_type_info ; itype++ ) {
898 status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
899 FLUSH;
900 }
901
902 return status;
903 }
904
l_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)905 static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
906 {
907 int err = CL_SUCCESS;
908 std::string type_name( ti.get_name() );
909 const char* tn = type_name.c_str();
910 log_info(" %s ",tn);
911
912 StringTable ksrc;
913 ksrc.add( l_get_fp64_pragma() );
914 ksrc.add( l_get_cles_int64_pragma() );
915 if (ti.is_atomic_64bit())
916 ksrc.add( l_get_int64_atomic_pragma() );
917 ksrc.add( conversion_functions(ti) );
918 ksrc.add( global_decls(ti,false) );
919 ksrc.add( global_check_function(ti) );
920 ksrc.add( writer_function(ti) );
921 ksrc.add( reader_function(ti) );
922
923 int status = CL_SUCCESS;
924 clProgramWrapper program;
925 clKernelWrapper writer;
926
927 status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
928 test_error_ret(status,"Failed to create program for read-after-write test",status);
929
930 clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
931 test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
932
933 // Check size query.
934 size_t used_bytes = 0;
935 status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
936 test_error_ret(status,"Failed to query global variable total size",status);
937 size_t expected_used_bytes =
938 (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
939 + ( l_64bit_device ? 8 : 4 ); // The pointer
940 if ( used_bytes < expected_used_bytes ) {
941 log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
942 err |= 1;
943 }
944
945 err |= check_global_initialization(context, program, queue);
946
947 // We need to create 5 random values of the given type,
948 // and read 4 of them back.
949 const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
950 const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
951 cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
952 cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
953
954 clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
955 test_error_ret(status,"Failed to allocate write buffer",status);
956 clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
957 test_error_ret(status,"Failed to allocate read buffer",status);
958
959 status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
960 status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
961
962 // Boolean random data needs to be massaged a bit more.
963 const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
964 unsigned bool_iter = 0;
965
966 for ( int iround = 0; iround < num_rounds ; iround++ ) {
967 for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
968 // Generate new random data to push through.
969 // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
970
971 cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
972
973 if ( ti.is_bool() ) {
974 // For boolean, random data cast to bool isn't very random.
975 // So use the bottom bit of bool_value_iter to get true
976 // diversity.
977 for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
978 write_data[value_idx] = (1<<value_idx) & bool_iter;
979 //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
980 }
981 bool_iter++;
982 } else {
983 l_set_randomly( write_data, write_data_size, rand_state );
984 }
985 status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
986
987 // The value to write via the pointer should be taken from the
988 // 5th typed slot of the write_data.
989 status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
990
991 // Determine the expected values.
992 cl_uchar expected[read_data_size];
993 memset( expected, -1, sizeof(expected) );
994 l_copy( expected, 0, write_data, 0, ti );
995 l_copy( expected, 1, write_data, 1, ti );
996 l_copy( expected, 2, write_data, 2, ti );
997 l_copy( expected, 3, write_data, 3, ti );
998 // But we need to take into account the value from the pointer write.
999 // The 2 represents where the "a" array values begin in our read-back.
1000 l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
1001
1002 clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1003
1004 if ( ti.is_bool() ) {
1005 // Collapse down to one bit.
1006 for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
1007 }
1008
1009 cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1010 memset(read_data, -1, read_data_size);
1011 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1012
1013 // Now run the kernel
1014 const size_t one = 1;
1015 status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1016 status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1017 status = clFinish(queue); test_error_ret(status,"finish",status);
1018
1019 read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1020
1021 if ( ti.is_bool() ) {
1022 // Collapse down to one bit.
1023 for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
1024 }
1025
1026 // Compare only the valid returned bytes.
1027 int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
1028 // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
1029 err |= compare_result;
1030
1031 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1032
1033 if ( err ) break;
1034 }
1035 }
1036
1037 if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1038 align_free(write_data);
1039 align_free(read_data);
1040 return err;
1041 }
1042
1043
1044 // Check initialization, then, read, then write, then read.
l_init_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1045 static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
1046 {
1047 int status = CL_SUCCESS;
1048 int itype;
1049
1050 RandomSeed rand_state( gRandomSeed );
1051
1052 for ( itype = 0; itype < num_type_info ; itype++ ) {
1053 status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
1054 }
1055 return status;
1056 }
l_init_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1057 static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
1058 {
1059 int err = CL_SUCCESS;
1060 std::string type_name( ti.get_name() );
1061 const char* tn = type_name.c_str();
1062 log_info(" %s ",tn);
1063
1064 StringTable ksrc;
1065 ksrc.add( l_get_fp64_pragma() );
1066 ksrc.add( l_get_cles_int64_pragma() );
1067 if (ti.is_atomic_64bit())
1068 ksrc.add( l_get_int64_atomic_pragma() );
1069 ksrc.add( conversion_functions(ti) );
1070 ksrc.add( global_decls(ti,true) );
1071 ksrc.add( writer_function(ti) );
1072 ksrc.add( reader_function(ti) );
1073
1074 int status = CL_SUCCESS;
1075 clProgramWrapper program;
1076 clKernelWrapper writer;
1077
1078 status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
1079 test_error_ret(status,"Failed to create program for init-read-after-write test",status);
1080
1081 clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1082 test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
1083
1084 // Check size query.
1085 size_t used_bytes = 0;
1086 status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1087 test_error_ret(status,"Failed to query global variable total size",status);
1088 size_t expected_used_bytes =
1089 (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
1090 + ( l_64bit_device ? 8 : 4 ); // The pointer
1091 if ( used_bytes < expected_used_bytes ) {
1092 log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
1093 err |= 1;
1094 }
1095
1096 // We need to create 5 random values of the given type,
1097 // and read 4 of them back.
1098 const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1099 const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
1100
1101 cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1102 cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1103 clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
1104 test_error_ret(status,"Failed to allocate write buffer",status);
1105 clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
1106 test_error_ret(status,"Failed to allocate read buffer",status);
1107
1108 status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
1109 status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
1110
1111 // Boolean random data needs to be massaged a bit more.
1112 const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
1113 unsigned bool_iter = 0;
1114
1115 // We need to count iterations. We do something *different on the
1116 // first iteration, to ensure we actually pick up the initialized
1117 // values.
1118 unsigned iteration = 0;
1119
1120 for ( int iround = 0; iround < num_rounds ; iround++ ) {
1121 for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
1122 // Generate new random data to push through.
1123 // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
1124
1125 cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
1126
1127 if ( ti.is_bool() ) {
1128 // For boolean, random data cast to bool isn't very random.
1129 // So use the bottom bit of bool_value_iter to get true
1130 // diversity.
1131 for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
1132 write_data[value_idx] = (1<<value_idx) & bool_iter;
1133 //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
1134 }
1135 bool_iter++;
1136 } else {
1137 l_set_randomly( write_data, write_data_size, rand_state );
1138 }
1139 status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
1140
1141 if ( !iteration ) {
1142 // On first iteration, the value we write via the last arg
1143 // to the "reader" function is 0.
1144 // It's way easier to code the test this way.
1145 ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
1146 }
1147
1148 // The value to write via the pointer should be taken from the
1149 // 5th typed slot of the write_data.
1150 status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
1151
1152 // Determine the expected values.
1153 cl_uchar expected[read_data_size];
1154 memset( expected, -1, sizeof(expected) );
1155 if ( iteration ) {
1156 l_copy( expected, 0, write_data, 0, ti );
1157 l_copy( expected, 1, write_data, 1, ti );
1158 l_copy( expected, 2, write_data, 2, ti );
1159 l_copy( expected, 3, write_data, 3, ti );
1160 // But we need to take into account the value from the pointer write.
1161 // The 2 represents where the "a" array values begin in our read-back.
1162 // But we need to take into account the value from the pointer write.
1163 l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
1164 } else {
1165 // On first iteration, expect these initialized values!
1166 // See the decls_template_with_init above.
1167 ti.init( expected, 0 );
1168 ti.init( expected + ti.get_size(), 1 );
1169 ti.init( expected + 2*ti.get_size(), 1 );
1170 // Emulate the effect of the write via the pointer.
1171 // The value is 0, not 1 (see above).
1172 // The pointer is always initialized to the second element
1173 // of the array. So it goes into slot 3 of the "expected" array.
1174 ti.init( expected + 3*ti.get_size(), 0 );
1175 }
1176
1177 if ( ti.is_bool() ) {
1178 // Collapse down to one bit.
1179 for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
1180 }
1181
1182 clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1183
1184 cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1185 memset( read_data, -1, read_data_size );
1186 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1187
1188 // Now run the kernel
1189 const size_t one = 1;
1190 if ( iteration ) {
1191 status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1192 } else {
1193 // On first iteration, we should be picking up the
1194 // initialized value. So don't enqueue the writer.
1195 }
1196 status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1197 status = clFinish(queue); test_error_ret(status,"finish",status);
1198
1199 read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
1200
1201 if ( ti.is_bool() ) {
1202 // Collapse down to one bit.
1203 for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
1204 }
1205
1206 // Compare only the valid returned bytes.
1207 //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
1208 int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
1209 //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
1210 err |= compare_result;
1211
1212 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1213
1214 if ( err ) break;
1215
1216 iteration++;
1217 }
1218 }
1219
1220 if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1221 align_free(write_data);
1222 align_free(read_data);
1223
1224 return err;
1225 }
1226
1227
1228 // Check that we can make at least one variable with size
1229 // max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
l_capacity(cl_device_id device,cl_context context,cl_command_queue queue,size_t max_size)1230 static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
1231 {
1232 int err = CL_SUCCESS;
1233 // Just test one type.
1234 const TypeInfo ti( l_find_type("uchar") );
1235 log_info(" l_capacity...");
1236
1237 const char prog_src_template[] =
1238 #if defined(_WIN32)
1239 "uchar var[%Iu];\n\n"
1240 #else
1241 "uchar var[%zu];\n\n"
1242 #endif
1243 "kernel void get_max_size( global ulong* size_ret ) {\n"
1244 #if defined(_WIN32)
1245 " *size_ret = (ulong)%Iu;\n"
1246 #else
1247 " *size_ret = (ulong)%zu;\n"
1248 #endif
1249 "}\n\n"
1250 "kernel void writer( global uchar* src ) {\n"
1251 " var[get_global_id(0)] = src[get_global_linear_id()];\n"
1252 "}\n\n"
1253 "kernel void reader( global uchar* dest ) {\n"
1254 " dest[get_global_linear_id()] = var[get_global_id(0)];\n"
1255 "}\n\n";
1256 char prog_src[MAX_STR];
1257 int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
1258 assert( num_printed < MAX_STR ); // or increase MAX_STR
1259
1260 StringTable ksrc;
1261 ksrc.add( prog_src );
1262
1263 int status = CL_SUCCESS;
1264 clProgramWrapper program;
1265 clKernelWrapper get_max_size;
1266
1267 status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
1268 test_error_ret(status,"Failed to create program for capacity test",status);
1269
1270 // Check size query.
1271 size_t used_bytes = 0;
1272 status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1273 test_error_ret(status,"Failed to query global variable total size",status);
1274 if ( used_bytes < max_size ) {
1275 log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
1276 err |= 1;
1277 }
1278
1279 // Prepare to execute
1280 clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
1281 test_error_ret(status,"Failed to create writer kernel for capacity test",status);
1282 clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1283 test_error_ret(status,"Failed to create reader kernel for capacity test",status);
1284
1285 cl_ulong max_size_ret = 0;
1286 const size_t arr_size = 10*1024*1024;
1287 cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
1288
1289 if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
1290
1291 clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
1292 test_error_ret(status,"Failed to allocate size query buffer",status);
1293 clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
1294 test_error_ret(status,"Failed to allocate write buffer",status);
1295
1296 status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
1297 status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
1298 status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
1299
1300 // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
1301 const size_t one = 1;
1302 status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
1303 status = clFinish(queue); test_error_ret(status,"finish",status);
1304
1305 cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
1306 if ( max_size_ret != max_size ) {
1307 log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
1308 (unsigned long long) max_size_ret,
1309 (unsigned long long) max_size );
1310 err |= 1;
1311 }
1312 clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
1313
1314 RandomSeed rand_state_write( gRandomSeed );
1315 for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
1316 size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1317 l_set_randomly( buffer, curr_size, rand_state_write );
1318 status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
1319 status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1320 status = clFinish(queue); test_error_ret(status,"finish",status);
1321 }
1322
1323 RandomSeed rand_state_read( gRandomSeed );
1324 for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
1325 size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1326 status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1327 cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
1328 l_set_randomly( buffer, curr_size, rand_state_read );
1329 err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
1330 clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
1331 }
1332
1333 if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1334 align_free(buffer);
1335
1336 return err;
1337 }
1338
1339
1340 // Check operation on a user type.
l_user_type(cl_device_id device,cl_context context,cl_command_queue queue,bool separate_compile)1341 static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
1342 {
1343 int err = CL_SUCCESS;
1344 // Just test one type.
1345 const TypeInfo ti( l_find_type("uchar") );
1346 log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
1347
1348 if ( separate_compile && ! l_linker_available ) {
1349 log_info("Separate compilation is not supported. Skipping test\n");
1350 return err;
1351 }
1352
1353 const char type_src[] =
1354 "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
1355 const char def_src[] =
1356 "my_struct_t var = { 'a', 42 };\n\n";
1357 const char decl_src[] =
1358 "extern my_struct_t var;\n\n";
1359
1360 // Don't use a host struct. We can't guarantee that the host
1361 // compiler has the same structure layout as the device compiler.
1362 const char writer_src[] =
1363 "kernel void writer( uchar c, uint i ) {\n"
1364 " var.c = c;\n"
1365 " var.i = i;\n"
1366 "}\n\n";
1367 const char reader_src[] =
1368 "kernel void reader( global uchar* C, global uint* I ) {\n"
1369 " *C = var.c;\n"
1370 " *I = var.i;\n"
1371 "}\n\n";
1372
1373 clProgramWrapper program;
1374
1375 if ( separate_compile ) {
1376 // Separate compilation flow.
1377 StringTable wksrc;
1378 wksrc.add( type_src );
1379 wksrc.add( def_src );
1380 wksrc.add( writer_src );
1381
1382 StringTable rksrc;
1383 rksrc.add( type_src );
1384 rksrc.add( decl_src );
1385 rksrc.add( reader_src );
1386
1387 int status = CL_SUCCESS;
1388 clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
1389 test_error_ret(status,"Failed to create writer program for user type test",status);
1390
1391 status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
1392 if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
1393 {
1394 print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
1395 return status;
1396 }
1397
1398 clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
1399 test_error_ret(status,"Failed to create reader program for user type test",status);
1400
1401 status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
1402 if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
1403 {
1404 print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
1405 return status;
1406 }
1407
1408 cl_program progs[2];
1409 progs[0] = writer_program;
1410 progs[1] = reader_program;
1411
1412 program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
1413 if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
1414 {
1415 print_build_log(program, 1, &device, 0, NULL, NULL, "");
1416 return status;
1417 }
1418 } else {
1419 // Single compilation flow.
1420 StringTable ksrc;
1421 ksrc.add( type_src );
1422 ksrc.add( def_src );
1423 ksrc.add( writer_src );
1424 ksrc.add( reader_src );
1425
1426 int status = CL_SUCCESS;
1427
1428 status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
1429 if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
1430 {
1431 print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
1432 return status;
1433 }
1434
1435 status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
1436 if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
1437 {
1438 print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
1439 return status;
1440 }
1441 }
1442
1443
1444 // Check size query.
1445 size_t used_bytes = 0;
1446 int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1447 test_error_ret(status,"Failed to query global variable total size",status);
1448 size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
1449 if ( used_bytes < expected_size ) {
1450 log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
1451 err |= 1;
1452 }
1453
1454 // Prepare to execute
1455 clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
1456 test_error_ret(status,"Failed to create writer kernel for user type test",status);
1457 clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
1458 test_error_ret(status,"Failed to create reader kernel for user type test",status);
1459
1460 // Set up data.
1461 cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
1462 cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
1463
1464 clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
1465 test_error_ret(status,"Failed to allocate uchar buffer",status);
1466 clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
1467 test_error_ret(status,"Failed to allocate uint buffer",status);
1468
1469 status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
1470 status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
1471
1472 cl_uchar expected_uchar = 'a';
1473 cl_uint expected_uint = 42;
1474 for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
1475 // Read back data
1476 *uchar_data = -1;
1477 *uint_data = -1;
1478 const size_t one = 1;
1479 status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
1480 status = clFinish(queue); test_error_ret(status,"finish",status);
1481
1482 cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
1483 cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
1484
1485 if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
1486 log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
1487 iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
1488 err |= 1;
1489 }
1490
1491 clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0);
1492 clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0);
1493
1494 // Mutate the data.
1495 expected_uchar++;
1496 expected_uint++;
1497
1498 // Write the new values into persistent store.
1499 *uchar_data = expected_uchar;
1500 *uint_data = expected_uint;
1501 status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
1502 status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
1503 status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
1504 status = clFinish(queue); test_error_ret(status,"finish",status);
1505 }
1506
1507 if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1508 align_free(uchar_data);
1509 align_free(uint_data);
1510 return err;
1511 }
1512
1513 // Determines whether its valid to skip this test based on the driver version
1514 // and the features it optionally supports.
1515 // Whether the test should be skipped is writen into the out paramter skip.
1516 // The check returns an error code for the clDeviceInfo query.
should_skip(cl_device_id device,cl_bool & skip)1517 static cl_int should_skip(cl_device_id device, cl_bool& skip)
1518 {
1519 // Assume we can't skip to begin with.
1520 skip = CL_FALSE;
1521
1522 // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
1523 // to test for 3.0 since that is when program scope global variables become
1524 // optional.
1525 if (get_device_cl_version(device) >= Version(3, 0))
1526 {
1527 size_t max_global_variable_size{};
1528 test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
1529 sizeof(max_global_variable_size),
1530 &max_global_variable_size, nullptr),
1531 "clGetDeviceInfo failed");
1532 skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
1533 }
1534 return CL_SUCCESS;
1535 }
1536
1537 ////////////////////
1538 // Global functions
1539
1540
1541 // Test support for variables at program scope. Miscellaneous
test_progvar_prog_scope_misc(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1542 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1543 {
1544 cl_bool skip{ CL_FALSE };
1545 auto error = should_skip(device, skip);
1546 if (CL_SUCCESS != error)
1547 {
1548 return TEST_FAIL;
1549 }
1550 if (skip)
1551 {
1552 log_info("Skipping progvar_prog_scope_misc since it is optionally not "
1553 "supported on this device\n");
1554 return TEST_SKIPPED_ITSELF;
1555 }
1556 size_t max_size = 0;
1557 size_t pref_size = 0;
1558
1559 cl_int err = CL_SUCCESS;
1560
1561 err = l_get_device_info( device, &max_size, &pref_size );
1562 err |= l_build_type_table( device );
1563
1564 err |= l_capacity( device, context, queue, max_size );
1565 err |= l_user_type( device, context, queue, false );
1566 err |= l_user_type( device, context, queue, true );
1567
1568 return err;
1569 }
1570
1571
1572 // Test support for variables at program scope. Unitialized data
test_progvar_prog_scope_uninit(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1573 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1574 {
1575 cl_bool skip{ CL_FALSE };
1576 auto error = should_skip(device, skip);
1577 if (CL_SUCCESS != error)
1578 {
1579 return TEST_FAIL;
1580 }
1581 if (skip)
1582 {
1583 log_info(
1584 "Skipping progvar_prog_scope_uninit since it is optionally not "
1585 "supported on this device\n");
1586 return TEST_SKIPPED_ITSELF;
1587 }
1588 size_t max_size = 0;
1589 size_t pref_size = 0;
1590
1591 cl_int err = CL_SUCCESS;
1592
1593 err = l_get_device_info( device, &max_size, &pref_size );
1594 err |= l_build_type_table( device );
1595
1596 err |= l_write_read( device, context, queue );
1597
1598 return err;
1599 }
1600
1601 // Test support for variables at program scope. Initialized data.
test_progvar_prog_scope_init(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1602 int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1603 {
1604 cl_bool skip{ CL_FALSE };
1605 auto error = should_skip(device, skip);
1606 if (CL_SUCCESS != error)
1607 {
1608 return TEST_FAIL;
1609 }
1610 if (skip)
1611 {
1612 log_info("Skipping progvar_prog_scope_init since it is optionally not "
1613 "supported on this device\n");
1614 return TEST_SKIPPED_ITSELF;
1615 }
1616 size_t max_size = 0;
1617 size_t pref_size = 0;
1618
1619 cl_int err = CL_SUCCESS;
1620
1621 err = l_get_device_info( device, &max_size, &pref_size );
1622 err |= l_build_type_table( device );
1623
1624 err |= l_init_write_read( device, context, queue );
1625
1626 return err;
1627 }
1628
1629
1630 // A simple test for support of static variables inside a kernel.
test_progvar_func_scope(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1631 int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1632 {
1633 cl_bool skip{ CL_FALSE };
1634 auto error = should_skip(device, skip);
1635 if (CL_SUCCESS != error)
1636 {
1637 return TEST_FAIL;
1638 }
1639 if (skip)
1640 {
1641 log_info("Skipping progvar_func_scope since it is optionally not "
1642 "supported on this device\n");
1643 return TEST_SKIPPED_ITSELF;
1644 }
1645 size_t max_size = 0;
1646 size_t pref_size = 0;
1647
1648 cl_int err = CL_SUCCESS;
1649
1650 // Deliberately have two variables with the same name but in different
1651 // scopes.
1652 // Also, use a large initialized structure in both cases.
1653 const char prog_src[] =
1654 "typedef struct { char c; int16 i; } mystruct_t;\n"
1655 "kernel void test_bump( global int* value, int which ) {\n"
1656 " if ( which ) {\n"
1657 // Explicit address space.
1658 // Last element set to 0
1659 " static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
1660 " *value = persistent.i.sf++;\n"
1661 " } else {\n"
1662 // Implicitly global
1663 // Last element set to 100
1664 " static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
1665 " *value = persistent.i.sf++;\n"
1666 " }\n"
1667 "}\n";
1668
1669 StringTable ksrc;
1670 ksrc.add( prog_src );
1671
1672 int status = CL_SUCCESS;
1673 clProgramWrapper program;
1674 clKernelWrapper test_bump;
1675
1676 status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
1677 test_error_ret(status, "Failed to create program for function static variable test", status);
1678
1679 // Check size query.
1680 size_t used_bytes = 0;
1681 status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
1682 test_error_ret(status,"Failed to query global variable total size",status);
1683 size_t expected_size = 2 * sizeof(cl_int); // Two ints.
1684 if ( used_bytes < expected_size ) {
1685 log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
1686 err |= 1;
1687 }
1688
1689 // Prepare the data.
1690 cl_int counter_value = 0;
1691 clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
1692 test_error_ret(status,"Failed to allocate counter query buffer",status);
1693
1694 status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
1695
1696 // Go a few rounds, alternating between the two counters in the kernel.
1697
1698 // Same as initial values in kernel.
1699 // But "true" which increments the 0-based counter, and "false" which
1700 // increments the 100-based counter.
1701 cl_int expected_counter[2] = { 100, 0 };
1702
1703 const size_t one = 1;
1704 for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
1705 for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
1706 status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
1707 status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
1708 status = clFinish(queue); test_error_ret(status,"finish",status);
1709
1710 cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
1711
1712 if ( counter_value != expected_counter[iwhich] ) {
1713 log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
1714 iround, iwhich, expected_counter[iwhich], counter_value );
1715 err |= 1;
1716 }
1717 expected_counter[iwhich]++; // Emulate behaviour of the kernel.
1718
1719 clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
1720 }
1721 }
1722
1723 if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
1724
1725 return err;
1726 }
1727