1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18 
19 #if defined( __APPLE__ )
20     #include <signal.h>
21     #include <sys/signal.h>
22     #include <setjmp.h>
23 #endif
24 
25 extern bool gTestImage2DFromBuffer;
26 
27 // Utility function to clamp down image sizes for certain tests to avoid
28 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)29 static size_t reduceImageSizeRange(size_t maxDimSize) {
30   size_t DimSize = maxDimSize/32;
31   if (DimSize < (size_t) 16)
32     return 16;
33   else if (DimSize > (size_t) 256)
34     return 256;
35   else
36     return DimSize;
37 }
38 
39 const char *read2DKernelSourcePattern =
40 "__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
41 "{\n"
42 "%s"
43 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
44 "%s"
45 "%s"
46 "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
47 "}";
48 
49 const char *read_write2DKernelSourcePattern =
50 "__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
51 "{\n"
52 "%s"
53 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
54 "%s"
55 "%s"
56 "   results[offset] = read_image%s( input, coords %s);\n"
57 "}";
58 
59 const char *intCoordKernelSource =
60 "   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
61 
62 const char *floatKernelSource =
63 "   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
64 
65 static const char *samplerKernelArg = " sampler_t imageSampler,";
66 
67 static const char *lodOffsetSource =
68 "   unsigned int lod_int = (unsigned int) lod;\n"
69 "   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
70 "   int offset = tidY*width_lod + tidX;\n";
71 
72 static const char *offsetSource =
73 "   int offset = tidY*get_image_width(input) + tidX;\n";
74 
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)75 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
76                                                 T *resultPtr, T * expected, float error,
77                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
78 {
79     int actualX, actualY;
80     int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
81     bool clampingErr = false, clamped = false, otherClampingBug = false;
82     int clampedX, clampedY, ignoreMe;
83 
84     clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
85 
86     if( found )
87     {
88         // Is it a clamping bug?
89         if( clamped && clampedX == actualX && clampedY == actualY )
90         {
91             if( (--numClamped) == 0 )
92             {
93                 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
94                 if (imageInfo->format->image_channel_order == CL_DEPTH)
95                 {
96                     if( printAsFloat )
97                     {
98                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
99                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
100                     }
101                     else
102                     {
103                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
104                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
105                     }
106                 }
107                 else
108                 {
109                     if( printAsFloat )
110                     {
111                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
112                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
113                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
114                     }
115                     else
116                     {
117                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
118                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
119                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
120                     }
121                 }
122                 return 1;
123             }
124             clampingErr = true;
125             otherClampingBug = true;
126         }
127     }
128     if( clamped && !otherClampingBug )
129     {
130         // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
131         if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
132         {
133             if( (--numClamped) == 0 )
134             {
135                 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
136                 if (imageInfo->format->image_channel_order == CL_DEPTH)
137                 {
138                     if( printAsFloat )
139                     {
140                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
141                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
142                     }
143                     else
144                     {
145                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
146                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
147                     }
148                 }
149                 else
150                 {
151                     if( printAsFloat )
152                     {
153                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
154                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
155                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
156                     }
157                     else
158                     {
159                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
160                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
161                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
162                     }
163                 }
164                 return 1;
165             }
166             clampingErr = true;
167         }
168     }
169     if( !clampingErr )
170     {
171         if (imageInfo->format->image_channel_order == CL_DEPTH)
172         {
173             if( printAsFloat )
174             {
175               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
176                         (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
177             }
178             else
179             {
180               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
181                         (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
182             }
183         }
184         else
185         {
186             if( printAsFloat )
187             {
188                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
189                           (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
190                           (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
191             }
192             else
193             {
194                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
195                           (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
196                                     (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
197             }
198         }
199         log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
200         if( clamped )
201         {
202             log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
203         }
204         if( printAsFloat && gExtraValidateInfo)
205         {
206             log_error( "Nearby values:\n" );
207             log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
208             for( int yOff = -2; yOff <= 1; yOff++ )
209             {
210                 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
211                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
212                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
213                 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
214                 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
215                 if (imageInfo->format->image_channel_order == CL_DEPTH)
216                 {
217                     log_error( "%d\t(%g)",clampedY + yOff, top[0] );
218                     log_error( " (%g)", real[0] );
219                     log_error( " (%g)",bot[0] );
220                     log_error( " (%g)\n",bot2[0] );
221                 }
222                 else
223                 {
224                     log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
225                     log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
226                     log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
227                     log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
228                 }
229             }
230 
231             if( clampedY < 1 )
232             {
233                 log_error( "Nearby values:\n" );
234                 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
235                 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
236                 {
237                     float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
238                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
239                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
240                     read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
241                     read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
242                     if (imageInfo->format->image_channel_order == CL_DEPTH)
243                     {
244                         log_error( "%d\t(%g)",clampedY + yOff, top[0] );
245                         log_error( " (%g)", real[0] );
246                         log_error( " (%g)",bot[0] );
247                         log_error( " (%g)\n",bot2[0] );
248                     }
249                     else
250                     {
251                         log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
252                         log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
253                         log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
254                         log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
255                     }
256                 }
257             }
258         }
259 
260         if( imageSampler->filter_mode != CL_FILTER_LINEAR )
261         {
262             if( found )
263                 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
264             else
265                 log_error( "\tValue not actually found in image\n" );
266         }
267         log_error( "\n" );
268 
269         numClamped = -1; // We force the clamped counter to never work
270         if( ( --numTries ) == 0 )
271         {
272             return 1;
273         }
274     }
275     return 0;
276 }
277 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)278 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
279 {
280     size_t i = 0;
281     if( gDisableOffsets )
282     {
283         for( size_t y = 0; y < imageInfo->height; y++ )
284         {
285             for( size_t x = 0; x < imageInfo->width; x++, i++ )
286             {
287                 xOffsets[ i ] = (float) (xfract + (double) x);
288                 yOffsets[ i ] = (float) (yfract + (double) y);
289             }
290         }
291     }
292     else
293     {
294         for( size_t y = 0; y < imageInfo->height; y++ )
295         {
296             for( size_t x = 0; x < imageInfo->width; x++, i++ )
297             {
298                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
299                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
300             }
301         }
302     }
303 
304     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
305     {
306         i = 0;
307         for( size_t y = 0; y < imageInfo->height; y++ )
308         {
309             for( size_t x = 0; x < imageInfo->width; x++, i++ )
310             {
311                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
312                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
313             }
314         }
315     }
316 
317     if( normalized_coords )
318     {
319         i = 0;
320         for( size_t y = 0; y < imageInfo->height; y++ )
321         {
322             for( size_t x = 0; x < imageInfo->width; x++, i++ )
323             {
324                 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
325                 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
326             }
327         }
328     }
329 }
330 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)331 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
332 {
333     size_t i = 0;
334     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
335 
336     if( gTestMipmaps )
337     {
338         width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
339         height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
340     }
341     if( gDisableOffsets )
342     {
343         for( size_t y = 0; y < height_lod; y++ )
344         {
345             for( size_t x = 0; x < width_lod; x++, i++ )
346             {
347                 xOffsets[ i ] = (float) (xfract + (float) x);
348                 yOffsets[ i ] = (float) (yfract + (float) y);
349             }
350         }
351     }
352     else
353     {
354         for( size_t y = 0; y < height_lod; y++ )
355         {
356             for( size_t x = 0; x < width_lod; x++, i++ )
357             {
358                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
359                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
360             }
361         }
362     }
363 
364     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
365     {
366         i = 0;
367         for( size_t y = 0; y < height_lod; y++ )
368         {
369             for( size_t x = 0; x < width_lod; x++, i++ )
370             {
371                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
372                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
373             }
374         }
375     }
376 
377     if( normalized_coords )
378     {
379         i = 0;
380         for( size_t y = 0; y < height_lod; y++ )
381         {
382             for( size_t x = 0; x < width_lod; x++, i++ )
383             {
384                 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
385                 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
386             }
387         }
388     }
389 }
390 
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)391 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
392                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
393 {
394     // Validate results element by element
395     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
396     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
397     /*
398      * FLOAT output type
399      */
400     if( outputType == kFloat )
401     {
402         // Validate float results
403         float *resultPtr = (float *)(char *)resultValues;
404         float expected[4], error=0.0f;
405         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
406         for( size_t y = 0, j = 0; y < height_lod; y++ )
407         {
408             for( size_t x = 0; x < width_lod; x++, j++ )
409             {
410                 // Step 1: go through and see if the results verify for the pixel
411                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
412                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
413                 int checkOnlyOnePixel = 0;
414                 int found_pixel = 0;
415                 float offset = NORM_OFFSET;
416                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
417 #if defined( __APPLE__ )
418                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
419                     || gDeviceType != CL_DEVICE_TYPE_GPU
420 #endif
421                     )
422                     offset = 0.0f;          // Loop only once
423 
424                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
425                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
426 
427                         // Try sampling the pixel, without flushing denormals.
428                         int containsDenormals = 0;
429                         FloatPixel maxPixel;
430                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
431                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
432                                                                     imageSampler, expected, 0, &containsDenormals );
433 
434                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
435                         // Clamp to the minimum absolute error for the format
436                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
437                         float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
438 
439                         // Check if the result matches.
440                         if( ! (err1 <= maxErr1) )
441                         {
442                             //try flushing the denormals, if there is a failure.
443                             if( containsDenormals )
444                             {
445                                 // If implementation decide to flush subnormals to zero,
446                                 // max error needs to be adjusted
447                                 maxErr1 += 4 * FLT_MIN;
448 
449                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
450                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
451                                                                              imageSampler, expected, 0, NULL );
452 
453                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
454                             }
455                         }
456 
457                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
458                         found_pixel = (err1 <= maxErr1);
459                     }//norm_offset_x
460                 }//norm_offset_y
461 
462 
463                 // Step 2: If we did not find a match, then print out debugging info.
464                 if (!found_pixel) {
465                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
466                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
467                     checkOnlyOnePixel = 0;
468                     int shouldReturn = 0;
469                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
470                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
471 
472                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
473                             // E.g., test one pixel.
474                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
475                                 norm_offset_x = 0.0f;
476                                 norm_offset_y = 0.0f;
477                                 checkOnlyOnePixel = 1;
478                             }
479 
480                             int containsDenormals = 0;
481                             FloatPixel maxPixel;
482                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
483                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
484                                                                                     imageSampler, expected, 0, &containsDenormals );
485 
486                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
487                             float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
488 
489 
490                             if( ! (err1 <= maxErr1) )
491                             {
492                                 //try flushing the denormals, if there is a failure.
493                                 if( containsDenormals )
494                                 {
495                                     maxErr1 += 4 * FLT_MIN;
496 
497                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
498                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
499                                                                                  imageSampler, expected, 0, NULL );
500 
501                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
502                                 }
503                             }
504                             if( ! (err1 <= maxErr1) )
505                             {
506                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
507 
508                                 float tempOut[4];
509                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
510                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
511 
512                                 log_error( "Step by step:\n" );
513                                 FloatPixel temp;
514                                 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
515                                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
516                                                                                imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
517                                 log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
518                                                     Ulp_Error( resultPtr[0], expected[0] ),
519                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
520 
521                             } else {
522                                 log_error("Test error: we should have detected this passing above.\n");
523                             }
524 
525                         }//norm_offset_x
526                     }//norm_offset_y
527                     if( shouldReturn )
528                         return 1;
529                 } // if (!found_pixel)
530 
531                 resultPtr += 1;
532             }
533         }
534     }
535     else
536     {
537         log_error("Test error: Not supported format.\n");
538         return 1;
539     }
540     return 0;
541 }
542 
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)543 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
544                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
545 {
546     // Validate results element by element
547     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
548     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
549     /*
550      * FLOAT output type
551      */
552     if( outputType == kFloat )
553     {
554         // Validate float results
555         float *resultPtr = (float *)(char *)resultValues;
556         float expected[4], error=0.0f;
557         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
558         for( size_t y = 0, j = 0; y < height_lod; y++ )
559         {
560             for( size_t x = 0; x < width_lod; x++, j++ )
561             {
562                 // Step 1: go through and see if the results verify for the pixel
563                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
564                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
565                 int checkOnlyOnePixel = 0;
566                 int found_pixel = 0;
567                 float offset = NORM_OFFSET;
568                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
569 #if defined( __APPLE__ )
570                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
571                     || gDeviceType != CL_DEVICE_TYPE_GPU
572 #endif
573                     )
574                     offset = 0.0f;          // Loop only once
575 
576                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
577                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
578 
579 
580                         // Try sampling the pixel, without flushing denormals.
581                         int containsDenormals = 0;
582                         FloatPixel maxPixel;
583                         if ( gTestMipmaps )
584                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
585                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
586                                                                         imageSampler, expected, 0, &containsDenormals, lod );
587                         else
588                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
589                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
590                                                                         imageSampler, expected, 0, &containsDenormals );
591 
592                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
593                         float err2 = ABS_ERROR(resultPtr[1], expected[1]);
594                         float err3 = ABS_ERROR(resultPtr[2], expected[2]);
595                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
596                         // Clamp to the minimum absolute error for the format
597                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
598                         if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
599                         if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
600                         if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
601                         float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
602                         float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
603                         float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
604                         float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
605 
606                         // Check if the result matches.
607                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
608                            ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
609                         {
610                             //try flushing the denormals, if there is a failure.
611                             if( containsDenormals )
612                             {
613                                // If implementation decide to flush subnormals to zero,
614                                // max error needs to be adjusted
615                                 maxErr1 += 4 * FLT_MIN;
616                                 maxErr2 += 4 * FLT_MIN;
617                                 maxErr3 += 4 * FLT_MIN;
618                                 maxErr4 += 4 * FLT_MIN;
619 
620                                 if(gTestMipmaps)
621                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
622                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
623                                                                                  imageSampler, expected, 0, NULL,lod );
624                                 else
625                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
626                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
627                                                                                  imageSampler, expected, 0, NULL );
628 
629                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
630                                 err2 = ABS_ERROR(resultPtr[1], expected[1]);
631                                 err3 = ABS_ERROR(resultPtr[2], expected[2]);
632                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
633                             }
634                         }
635 
636                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
637                         found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
638                     }//norm_offset_x
639                 }//norm_offset_y
640 
641 
642                 // Step 2: If we did not find a match, then print out debugging info.
643                 if (!found_pixel) {
644                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
645                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
646                     checkOnlyOnePixel = 0;
647                     int shouldReturn = 0;
648                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
649                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
650 
651                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
652                             // E.g., test one pixel.
653                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
654                                 norm_offset_x = 0.0f;
655                                 norm_offset_y = 0.0f;
656                                 checkOnlyOnePixel = 1;
657                             }
658 
659                             int containsDenormals = 0;
660                             FloatPixel maxPixel;
661                             if(gTestMipmaps)
662                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
663                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
664                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
665                             else
666                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
667                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
668                                                                                         imageSampler, expected, 0, &containsDenormals );
669 
670                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
671                             float err2 = ABS_ERROR(resultPtr[1], expected[1]);
672                             float err3 = ABS_ERROR(resultPtr[2], expected[2]);
673                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
674                             float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
675                             float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
676                             float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
677                             float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
678 
679 
680                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
681                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
682                             {
683                                 //try flushing the denormals, if there is a failure.
684                                 if( containsDenormals )
685                                 {
686                                     maxErr1 += 4 * FLT_MIN;
687                                     maxErr2 += 4 * FLT_MIN;
688                                     maxErr3 += 4 * FLT_MIN;
689                                     maxErr4 += 4 * FLT_MIN;
690 
691                                     if(gTestMipmaps)
692                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
693                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
694                                                                                      imageSampler, expected, 0, NULL, lod );
695                                     else
696                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
697                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
698                                                                                      imageSampler, expected, 0, NULL );
699 
700                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
701                                     err2 = ABS_ERROR(resultPtr[1], expected[1]);
702                                     err3 = ABS_ERROR(resultPtr[2], expected[2]);
703                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
704                                 }
705                             }
706                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
707                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
708                             {
709                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
710 
711                                 float tempOut[4];
712                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
713                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
714 
715                                 log_error( "Step by step:\n" );
716                                 FloatPixel temp;
717                                 if( gTestMipmaps )
718                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
719                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
720                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
721                                  else
722                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
723                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
724                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
725                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
726                                                     Ulp_Error( resultPtr[0], expected[0] ),
727                                                     Ulp_Error( resultPtr[1], expected[1] ),
728                                                     Ulp_Error( resultPtr[2], expected[2] ),
729                                                     Ulp_Error( resultPtr[3], expected[3] ),
730                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
731 
732                             } else {
733                                 log_error("Test error: we should have detected this passing above.\n");
734                             }
735 
736                         }//norm_offset_x
737                     }//norm_offset_y
738                     if( shouldReturn )
739                         return 1;
740                 } // if (!found_pixel)
741 
742                 resultPtr += 4;
743             }
744         }
745     }
746     /*
747      * UINT output type
748      */
749     else if( outputType == kUInt )
750     {
751         // Validate unsigned integer results
752         unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
753         unsigned int expected[4];
754         float error;
755         for( size_t y = 0, j = 0; y < height_lod ; y++ )
756         {
757             for( size_t x = 0; x < width_lod ; x++, j++ )
758             {
759                 // Step 1: go through and see if the results verify for the pixel
760                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
761                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
762                 int checkOnlyOnePixel = 0;
763                 int found_pixel = 0;
764                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
765                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
766 
767                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
768                         // E.g., test one pixel.
769                         if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
770                             norm_offset_x = 0.0f;
771                             norm_offset_y = 0.0f;
772                             checkOnlyOnePixel = 1;
773                         }
774 
775                         if ( gTestMipmaps )
776                             sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
777                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
778                                                                                              imageSampler, expected, lod );
779                         else
780                             sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
781                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
782                                                                                              imageSampler, expected);
783 
784 
785                         error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
786                                        errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
787 
788                         if (error <= MAX_ERR)
789                             found_pixel = 1;
790                     }//norm_offset_x
791                 }//norm_offset_y
792 
793                 // Step 2: If we did not find a match, then print out debugging info.
794                 if (!found_pixel) {
795                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
796                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
797                     checkOnlyOnePixel = 0;
798                     int shouldReturn = 0;
799                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
800                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
801 
802                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
803                             // E.g., test one pixel.
804                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
805                                 norm_offset_x = 0.0f;
806                                 norm_offset_y = 0.0f;
807                                 checkOnlyOnePixel = 1;
808                             }
809 
810                             if( gTestMipmaps )
811                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
812                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
813                                                                                                  imageSampler, expected, lod );
814                             else
815                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
816                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
817                                                                                                  imageSampler, expected);
818 
819 
820                             error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
821                                            errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
822 
823                             if( error > MAX_ERR )
824                             {
825                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
826 
827                                 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
828                                                                                          expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
829                             } else {
830                                 log_error("Test error: we should have detected this passing above.\n");
831                             }
832                         }//norm_offset_x
833                     }//norm_offset_y
834                     if( shouldReturn )
835                         return 1;
836                 } // if (!found_pixel)
837 
838                 resultPtr += 4;
839             }
840         }
841     }
842     /*
843      * INT output type
844      */
845     else
846     {
847         // Validate integer results
848         int *resultPtr = (int *)(char *)resultValues;
849         int expected[4];
850         float error;
851         for( size_t y = 0, j = 0; y < height_lod ; y++ )
852         {
853             for( size_t x = 0; x < width_lod; x++, j++ )
854             {
855                 // Step 1: go through and see if the results verify for the pixel
856                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
857                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
858                 int checkOnlyOnePixel = 0;
859                 int found_pixel = 0;
860                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
861                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
862 
863                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
864                         // E.g., test one pixel.
865                         if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
866                             norm_offset_x = 0.0f;
867                             norm_offset_y = 0.0f;
868                             checkOnlyOnePixel = 1;
869                         }
870 
871                         if ( gTestMipmaps )
872                             sample_image_pixel_offset<int>( imagePtr, imageInfo,
873                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
874                                                             imageSampler, expected , lod);
875                         else
876                             sample_image_pixel_offset<int>( imageValues, imageInfo,
877                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
878                                                             imageSampler, expected );
879 
880 
881                         error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
882                                        errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
883 
884                         if (error <= MAX_ERR)
885                             found_pixel = 1;
886                     }//norm_offset_x
887                 }//norm_offset_y
888 
889                 // Step 2: If we did not find a match, then print out debugging info.
890                 if (!found_pixel) {
891                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
892                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
893                     checkOnlyOnePixel = 0;
894                     int shouldReturn = 0;
895                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
896                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
897 
898                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
899                             // E.g., test one pixel.
900                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
901                                 norm_offset_x = 0.0f;
902                                 norm_offset_y = 0.0f;
903                                 checkOnlyOnePixel = 1;
904                             }
905 
906                             if ( gTestMipmaps )
907                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
908                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
909                                                                 imageSampler, expected, lod );
910                             else
911                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
912                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
913                                                                 imageSampler, expected );
914 
915 
916                             error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
917                                            errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
918 
919                             if( error > MAX_ERR )
920                             {
921                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
922 
923                                 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
924                                                                                 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
925                             } else {
926                                 log_error("Test error: we should have detected this passing above.\n");
927                             }
928                         }//norm_offset_x
929                     }//norm_offset_y
930                     if( shouldReturn )
931                         return 1;
932                 } // if (!found_pixel)
933 
934                 resultPtr += 4;
935             }
936         }
937     }
938     return 0;
939 }
940 
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)941 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
942                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
943 {
944     // Validate results element by element
945     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
946     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
947     /*
948      * FLOAT output type
949      */
950     if( outputType == kFloat )
951     {
952         // Validate float results
953         float *resultPtr = (float *)(char *)resultValues;
954         float expected[4], error=0.0f;
955         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
956         for( size_t y = 0, j = 0; y < height_lod; y++ )
957         {
958             for( size_t x = 0; x < width_lod; x++, j++ )
959             {
960                 // Step 1: go through and see if the results verify for the pixel
961                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
962                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
963                 int checkOnlyOnePixel = 0;
964                 int found_pixel = 0;
965                 float offset = NORM_OFFSET;
966                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
967 #if defined( __APPLE__ )
968                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
969                     || gDeviceType != CL_DEVICE_TYPE_GPU
970 #endif
971                     )
972                     offset = 0.0f;          // Loop only once
973 
974                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
975                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
976 
977 
978                         // Try sampling the pixel, without flushing denormals.
979                         int containsDenormals = 0;
980                         FloatPixel maxPixel;
981                         if ( gTestMipmaps )
982                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
983                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
984                                                                         imageSampler, expected, 0, &containsDenormals, lod );
985                         else
986                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
987                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
988                                                                         imageSampler, expected, 0, &containsDenormals );
989                         float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
990                                                sRGBmap(expected[0]));
991                         float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
992                                                sRGBmap(expected[1]));
993                         float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
994                                                sRGBmap(expected[2]));
995                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
996                         float maxErr = 0.5;
997 
998                         // Check if the result matches.
999                         if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1000                            ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1001                         {
1002                             //try flushing the denormals, if there is a failure.
1003                             if( containsDenormals )
1004                             {
1005                                 // If implementation decide to flush subnormals to zero,
1006                                 // max error needs to be adjusted
1007                                 maxErr += 4 * FLT_MIN;
1008 
1009                                 if(gTestMipmaps)
1010                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1011                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1012                                                                                  imageSampler, expected, 0, NULL,lod );
1013                                 else
1014                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1015                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1016                                                                                  imageSampler, expected, 0, NULL );
1017 
1018                                 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1019                                                  sRGBmap(expected[0]));
1020                                 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1021                                                  sRGBmap(expected[1]));
1022                                 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1023                                                  sRGBmap(expected[2]));
1024                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1025                             }
1026                         }
1027 
1028                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1029                         found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
1030                     }//norm_offset_x
1031                 }//norm_offset_y
1032 
1033 
1034                 // Step 2: If we did not find a match, then print out debugging info.
1035                 if (!found_pixel) {
1036                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1037                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1038                     checkOnlyOnePixel = 0;
1039                     int shouldReturn = 0;
1040                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1041                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1042 
1043                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1044                             // E.g., test one pixel.
1045                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
1046                                 norm_offset_x = 0.0f;
1047                                 norm_offset_y = 0.0f;
1048                                 checkOnlyOnePixel = 1;
1049                             }
1050 
1051                             int containsDenormals = 0;
1052                             FloatPixel maxPixel;
1053                             if(gTestMipmaps)
1054                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1055                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1056                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1057                             else
1058                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1059                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1060                                                                                         imageSampler, expected, 0, &containsDenormals );
1061 
1062                             float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1063                                                    sRGBmap(expected[0]));
1064                             float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1065                                                    sRGBmap(expected[1]));
1066                             float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1067                                                    sRGBmap(expected[2]));
1068                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1069                             float maxErr = 0.6;
1070 
1071                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1072                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1073                             {
1074                                 //try flushing the denormals, if there is a failure.
1075                                 if( containsDenormals )
1076                                 {
1077                                     // If implementation decide to flush subnormals to zero,
1078                                     // max error needs to be adjusted
1079                                     maxErr += 4 * FLT_MIN;
1080                                     if(gTestMipmaps)
1081                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1082                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1083                                                                                      imageSampler, expected, 0, NULL, lod );
1084                                     else
1085                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1086                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1087                                                                                      imageSampler, expected, 0, NULL );
1088 
1089                                     err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1090                                                      sRGBmap(expected[0]));
1091                                     err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1092                                                      sRGBmap(expected[1]));
1093                                     err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1094                                                      sRGBmap(expected[2]));
1095                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
1096                                 }
1097                             }
1098                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1099                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1100                             {
1101                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1102 
1103                                 float tempOut[4];
1104                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1105                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1106 
1107                                 log_error( "Step by step:\n" );
1108                                 FloatPixel temp;
1109                                 if( gTestMipmaps )
1110                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1111                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1112                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1113                                  else
1114                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1115                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1116                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1117                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
1118                                                     Ulp_Error( resultPtr[0], expected[0] ),
1119                                                     Ulp_Error( resultPtr[1], expected[1] ),
1120                                                     Ulp_Error( resultPtr[2], expected[2] ),
1121                                                     Ulp_Error( resultPtr[3], expected[3] ),
1122                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1123 
1124                             } else {
1125                                 log_error("Test error: we should have detected this passing above.\n");
1126                             }
1127 
1128                         }//norm_offset_x
1129                     }//norm_offset_y
1130                     if( shouldReturn )
1131                         return 1;
1132                 } // if (!found_pixel)
1133 
1134                 resultPtr += 4;
1135             }
1136         }
1137     }
1138     else {
1139         log_error("Test error: NOT SUPPORTED.\n");
1140     }
1141     return 0;
1142 }
1143 
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1144 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1145 {
1146     bool pass = true;
1147     // Compare floats
1148     if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1149     {
1150         // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1151         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1152         {
1153             if ( isnan( expected[j] ) && isnan( actual[j] ) )
1154                 continue;
1155             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1156                 continue;
1157             if (expected[j] != actual[j])
1158             {
1159                 pass = false;
1160                 break;
1161             }
1162         }
1163     }
1164     return pass;
1165 }
1166 
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1167 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1168 {
1169     bool pass = true;
1170     // Compare half floats
1171     if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1172 
1173         // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1174         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1175             if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1176                 continue;
1177             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1178                 continue;
1179             if (expected[j] != actual[j])
1180             {
1181                 pass = false;
1182                 break;
1183             }
1184         }
1185     }
1186     return pass;
1187 }
1188 
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1189 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1190                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
1191                        bool useFloatCoords, ExplicitType outputType, MTdata d )
1192 {
1193     int error;
1194     static int initHalf = 0;
1195     cl_mem imageBuffer;
1196     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
1197     size_t threads[2];
1198 
1199     clMemWrapper xOffsets, yOffsets, results;
1200     clSamplerWrapper actualSampler;
1201     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1202 
1203     // The DataBuffer template class really does use delete[], not free -- IRO
1204     BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1205     BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1206 
1207     if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1208         if( DetectFloatToHalfRoundingMode(queue) )
1209             return 1;
1210 
1211     // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1212     BufferOwningPtr<char> imageValues;
1213     generate_random_image_data( imageInfo, imageValues, d );
1214 
1215     if( gDebugTrace )
1216     {
1217         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1218         if( gTestMipmaps )
1219         {
1220             log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1221         }
1222     }
1223 
1224     // Construct testing sources
1225     clProtectedImage protImage;
1226     clMemWrapper unprotImage;
1227     cl_mem image;
1228 
1229     if(gtestTypesToRun & kReadTests)
1230     {
1231         image_read_write_flags = CL_MEM_READ_ONLY;
1232     }
1233     else
1234     {
1235         image_read_write_flags = CL_MEM_READ_WRITE;
1236     }
1237 
1238     if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1239     {
1240         if (gTestImage2DFromBuffer)
1241         {
1242             generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1243             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1244                                           imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1245             test_error( error, "Unable to create buffer" );
1246             unprotImage = create_image_2d_buffer( context,
1247                                           image_read_write_flags,
1248                                           imageInfo->format,
1249                                           imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1250                                           imageBuffer, &error );
1251 
1252         }
1253         else
1254         {
1255             // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1256             // Do not use protected images for max image size test since it rounds the row size to a page size
1257             if (gTestMaxImages) {
1258                 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1259                 unprotImage = create_image_2d( context,
1260                                         image_read_write_flags | CL_MEM_USE_HOST_PTR,
1261                                         imageInfo->format,
1262                                         imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1263                                         maxImageUseHostPtrBackingStore, &error );
1264             }
1265             else
1266             {
1267                 error = protImage.Create( context,
1268                                         image_read_write_flags,
1269                                         imageInfo->format, imageInfo->width, imageInfo->height );
1270             }
1271         }
1272 
1273         if( error != CL_SUCCESS )
1274         {
1275             if (gTestImage2DFromBuffer) {
1276                 clReleaseMemObject(imageBuffer);
1277                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1278                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1279                     return 0;
1280                 }
1281             }
1282 
1283             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1284             return error;
1285         }
1286 
1287         if (gTestMaxImages || gTestImage2DFromBuffer)
1288             image = (cl_mem)unprotImage;
1289         else
1290             image = (cl_mem)protImage;
1291     }
1292     else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1293     {
1294         if (gTestImage2DFromBuffer)
1295         {
1296             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1297                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1298             test_error( error, "Unable to create buffer" );
1299             unprotImage = create_image_2d_buffer( context,
1300                                                  image_read_write_flags,
1301                                                  imageInfo->format,
1302                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1303                                                  imageBuffer, &error );
1304 
1305         }
1306         else
1307         {
1308             // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1309             unprotImage = create_image_2d( context,
1310                                       image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1311                                       imageInfo->format,
1312                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1313                                       imageValues, &error );
1314         }
1315         if( error != CL_SUCCESS )
1316         {
1317             if (gTestImage2DFromBuffer) {
1318                 clReleaseMemObject(imageBuffer);
1319                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1320                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1321                     return 0;
1322                 }
1323             }
1324 
1325             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1326             return error;
1327         }
1328         image = unprotImage;
1329     }
1330     else // Either CL_MEM_ALLOC_HOST_PTR or none
1331     {
1332         if( gTestMipmaps )
1333         {
1334             cl_image_desc image_desc = {0};
1335             image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1336             image_desc.image_width = imageInfo->width;
1337             image_desc.image_height = imageInfo->height;
1338             image_desc.num_mip_levels = imageInfo->num_mip_levels;
1339             unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1340         }
1341         else if (gTestImage2DFromBuffer)
1342         {
1343             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1344                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1345             test_error( error, "Unable to create buffer" );
1346             unprotImage = create_image_2d_buffer( context,
1347                                                  image_read_write_flags,
1348                                                  imageInfo->format,
1349                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1350                                                  imageBuffer, &error );
1351 
1352         }
1353         else
1354         {
1355             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1356             // it works just as if no flag is specified, so we just do the same thing either way
1357             unprotImage = create_image_2d( context,
1358                                       image_read_write_flags | gMemFlagsToUse,
1359                                       imageInfo->format,
1360                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1361                                       imageValues, &error );
1362         }
1363         if( error != CL_SUCCESS )
1364         {
1365             if (gTestImage2DFromBuffer) {
1366                 clReleaseMemObject(imageBuffer);
1367                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1368                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1369                     return 0;
1370                 }
1371             }
1372 
1373             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1374             return error;
1375         }
1376         image = unprotImage;
1377     }
1378 
1379     if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1380     {
1381         if( gDebugTrace )
1382             log_info( " - Writing image...\n" );
1383 
1384         size_t origin[ 3 ] = { 0, 0, 0 };
1385         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1386 
1387         if(!gTestMipmaps)
1388         {
1389             error = clEnqueueWriteImage(queue, image, CL_TRUE,
1390                                         origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1391                                        imageValues, 0, NULL, NULL);
1392             if (error != CL_SUCCESS)
1393             {
1394                 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1395                 return error;
1396             }
1397         }
1398         else
1399         {
1400             size_t tmpNextLevelOffset = 0;
1401             for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1402             {
1403                 origin[2] = level;
1404                 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1405                                             origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1406                                             (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1407                 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1408                 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1409                 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1410             }
1411         }
1412     }
1413 
1414     if( gDebugTrace )
1415         log_info( " - Creating kernel arguments...\n" );
1416 
1417     xOffsets =
1418         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1419                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1420                        xOffsetValues, &error);
1421     test_error( error, "Unable to create x offset buffer" );
1422     yOffsets =
1423         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1424                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1425                        yOffsetValues, &error);
1426     test_error( error, "Unable to create y offset buffer" );
1427     results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1428                              get_explicit_type_size(outputType) * 4
1429                                  * imageInfo->width * imageInfo->height,
1430                              NULL, &error);
1431     test_error( error, "Unable to create result buffer" );
1432 
1433     // Create sampler to use
1434     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1435     test_error(error, "Unable to create image sampler");
1436 
1437     // Set arguments
1438     int idx = 0;
1439     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1440     test_error( error, "Unable to set kernel arguments" );
1441     if( !gUseKernelSamplers )
1442     {
1443         error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1444         test_error( error, "Unable to set kernel arguments" );
1445     }
1446     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1447     test_error( error, "Unable to set kernel arguments" );
1448     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1449     test_error( error, "Unable to set kernel arguments" );
1450     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1451     test_error( error, "Unable to set kernel arguments" );
1452 
1453     // A cast of troublesome offsets. The first one has to be zero.
1454     const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1455     int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1456     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1457     int loopCount = 2 * float_offset_count;
1458     if( ! useFloatCoords )
1459         loopCount = 1;
1460     if (gTestMaxImages) {
1461         loopCount = 1;
1462       log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1463     }
1464 
1465     if(gtestTypesToRun & kReadWriteTests)
1466     {
1467         loopCount = 1;
1468     }
1469 
1470     // Get the maximum absolute error for this format
1471     double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1472     if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1473 
1474     if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1475         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1476         if (initHalf) {
1477             log_info("Half rounding mode successfully detected.\n");
1478         }
1479     }
1480 
1481     size_t nextLevelOffset = 0;
1482     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1483     for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1484     {
1485         size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1486         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1487         float lod_float = (float)lod;
1488         char *imagePtr = (char *)imageValues + nextLevelOffset;
1489         if( gTestMipmaps )
1490         {
1491             if(gDebugTrace)
1492                 log_info("\t- Working at mip level %d\n", lod);
1493             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1494         }
1495 
1496         // Validate results element by element
1497         for( int q = 0; q < loopCount; q++ )
1498         {
1499             float offset = float_offsets[ q % float_offset_count ];
1500 
1501             // Init the coordinates
1502             InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1503                                 q>=float_offset_count ? -offset: offset,
1504                                 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1505 
1506             error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1507             test_error( error, "Unable to write x offsets" );
1508             error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1509             test_error( error, "Unable to write y offsets" );
1510 
1511             // Get results
1512             memset( resultValues, 0xff, resultValuesSize );
1513             clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1514 
1515             // Run the kernel
1516             threads[0] = (size_t)width_lod;
1517             threads[1] = (size_t)height_lod;
1518             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1519             test_error( error, "Unable to run kernel" );
1520 
1521             if( gDebugTrace )
1522                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1523 
1524             error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1525             test_error( error, "Unable to read results from kernel" );
1526             if( gDebugTrace )
1527                 log_info( "    results read\n" );
1528 
1529             int retCode;
1530             switch (imageInfo->format->image_channel_order) {
1531             case CL_DEPTH:
1532                 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1533                 break;
1534             case CL_sRGB:
1535             case CL_sRGBx:
1536             case CL_sRGBA:
1537             case CL_sBGRA:
1538                 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1539                 break;
1540             default:
1541                 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1542             }
1543             if (retCode)
1544                 return retCode;
1545         }
1546         end:
1547         if ( gTestMipmaps )
1548         {
1549             nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1550             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1551             height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1552         }
1553     }
1554 
1555     if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1556 
1557     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1558 }
1559 
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1560 int test_read_image_set_2D(cl_device_id device, cl_context context,
1561                            cl_command_queue queue,
1562                            const cl_image_format *format,
1563                            image_sampler_data *imageSampler, bool floatCoords,
1564                            ExplicitType outputType)
1565 {
1566     char programSrc[10240];
1567     const char *ptr;
1568     const char *readFormat;
1569     clProgramWrapper program;
1570     clKernelWrapper kernel;
1571     const char *KernelSourcePattern = NULL;
1572 
1573     if (gTestImage2DFromBuffer)
1574     {
1575         if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1576         {
1577             switch (format->image_channel_data_type)
1578             {
1579                 case CL_UNORM_INT8:
1580                 case CL_UNORM_INT16:
1581                 case CL_SNORM_INT8:
1582                 case CL_SNORM_INT16:
1583                 case CL_HALF_FLOAT:
1584                 case CL_FLOAT:
1585                 case CL_SIGNED_INT8:
1586                 case CL_SIGNED_INT16:
1587                 case CL_SIGNED_INT32:
1588                 case CL_UNSIGNED_INT8:
1589                 case CL_UNSIGNED_INT16:
1590                 case CL_UNSIGNED_INT32:
1591                     log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1592                              GetChannelTypeName( format->image_channel_data_type ));
1593                     return 0;
1594                 default:
1595                     break;
1596             }
1597         }
1598     }
1599 
1600 
1601     RandomSeed seed( gRandomSeed );
1602     int error;
1603 
1604     // Get our operating params
1605     size_t maxWidth, maxHeight;
1606     cl_ulong maxAllocSize, memSize;
1607     image_descriptor imageInfo = { 0x0 };
1608     size_t pixelSize;
1609 
1610     imageInfo.format = format;
1611     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1612     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1613     pixelSize = get_pixel_size( imageInfo.format );
1614 
1615     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1616     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1617     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1618     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1619     test_error( error, "Unable to get max image 2D size from device" );
1620 
1621     if (memSize > (cl_ulong)SIZE_MAX) {
1622       memSize = (cl_ulong)SIZE_MAX;
1623     }
1624 
1625     // Determine types
1626     if( outputType == kInt )
1627         readFormat = "i";
1628     else if( outputType == kUInt )
1629         readFormat = "ui";
1630     else // kFloat
1631         readFormat = "f";
1632 
1633     // Construct the source
1634     const char *samplerArg = samplerKernelArg;
1635     char samplerVar[ 1024 ] = "";
1636     if( gUseKernelSamplers )
1637     {
1638         get_sampler_kernel_code( imageSampler, samplerVar );
1639         samplerArg = "";
1640     }
1641 
1642     if(gtestTypesToRun & kReadTests)
1643     {
1644         KernelSourcePattern = read2DKernelSourcePattern;
1645     }
1646     else
1647     {
1648         KernelSourcePattern = read_write2DKernelSourcePattern;
1649     }
1650 
1651 
1652     sprintf( programSrc, KernelSourcePattern,
1653             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
1654             samplerArg, get_explicit_type_name( outputType ),
1655             (format->image_channel_order == CL_DEPTH) ? "" : "4",
1656             gTestMipmaps?", float lod":" ",
1657             samplerVar,
1658             gTestMipmaps? lodOffsetSource : offsetSource,
1659             floatCoords ? floatKernelSource : intCoordKernelSource,
1660             readFormat,
1661             gTestMipmaps?", lod":" ");
1662 
1663     ptr = programSrc;
1664     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1665                                         "sample_kernel");
1666     test_error( error, "Unable to create testing kernel" );
1667 
1668     if( gTestSmallImages )
1669     {
1670         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1671         {
1672             imageInfo.rowPitch = imageInfo.width * pixelSize;
1673             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1674             {
1675                 if( gTestMipmaps )
1676                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1677 
1678                 if( gDebugTrace )
1679                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1680 
1681                 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1682                 if( retCode )
1683                     return retCode;
1684             }
1685         }
1686     }
1687     else if( gTestMaxImages )
1688     {
1689         // Try a specific set of maximum sizes
1690         size_t numbeOfSizes;
1691         size_t sizes[100][3];
1692 
1693         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1694 
1695         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1696         {
1697             imageInfo.width = sizes[ idx ][ 0 ];
1698             imageInfo.height = sizes[ idx ][ 1 ];
1699             imageInfo.rowPitch = imageInfo.width * pixelSize;
1700             log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1701 
1702             if( gTestMipmaps )
1703                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1704 
1705             if( gDebugTrace )
1706                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1707             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1708             if( retCode )
1709                 return retCode;
1710         }
1711     }
1712     else if( gTestRounding )
1713     {
1714         uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1715         typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1716         imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1717         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1718         while( imageInfo.height >= maxHeight / 2 )
1719         {
1720             imageInfo.width <<= 1;
1721             imageInfo.height >>= 1;
1722         }
1723 
1724         while( imageInfo.width >= maxWidth / 2 )
1725             imageInfo.width >>= 1;
1726         imageInfo.rowPitch = imageInfo.width * pixelSize;
1727 
1728         gRoundingStartValue = 0;
1729         do
1730         {
1731             if( gDebugTrace )
1732                 log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1733             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1734             if( retCode )
1735                 return retCode;
1736 
1737             gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1738 
1739         } while( gRoundingStartValue < typeRange );
1740     }
1741     else
1742     {
1743         cl_uint imagePitchAlign = 0;
1744         if (gTestImage2DFromBuffer)
1745         {
1746 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1747             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1748 #endif
1749             if (!imagePitchAlign || error) {
1750               test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1751               imagePitchAlign = 1;
1752             }
1753         }
1754 
1755         int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1756         int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1757 
1758         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1759         {
1760             cl_ulong size;
1761             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1762             // image, the result array, plus offset arrays, will fit in the global ram space
1763             do
1764             {
1765                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1766                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1767 
1768                 imageInfo.rowPitch = imageInfo.width * pixelSize;
1769                 if( gTestMipmaps )
1770                 {
1771                     imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1772                     size = 4 * compute_mipmapped_image_size(imageInfo);
1773                 }
1774                 else
1775                 {
1776                     if( gEnablePitch )
1777                     {
1778                         size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1779                         imageInfo.rowPitch += extraWidth * pixelSize;
1780                     }
1781 
1782                 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1783                     if (gTestImage2DFromBuffer)
1784                     {
1785                         size_t pitch = imagePitchAlign * pixelSize;
1786                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1787                     }
1788 
1789                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1790                 }
1791             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
1792 
1793             if( gDebugTrace )
1794                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1795             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1796             if( retCode )
1797                 return retCode;
1798         }
1799     }
1800 
1801     return 0;
1802 }
1803