1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18
19 #if defined( __APPLE__ )
20 #include <signal.h>
21 #include <sys/signal.h>
22 #include <setjmp.h>
23 #endif
24
25 extern bool gTestImage2DFromBuffer;
26
27 // Utility function to clamp down image sizes for certain tests to avoid
28 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)29 static size_t reduceImageSizeRange(size_t maxDimSize) {
30 size_t DimSize = maxDimSize/32;
31 if (DimSize < (size_t) 16)
32 return 16;
33 else if (DimSize > (size_t) 256)
34 return 256;
35 else
36 return DimSize;
37 }
38
39 const char *read2DKernelSourcePattern =
40 "__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
41 "{\n"
42 "%s"
43 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
44 "%s"
45 "%s"
46 " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
47 "}";
48
49 const char *read_write2DKernelSourcePattern =
50 "__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
51 "{\n"
52 "%s"
53 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
54 "%s"
55 "%s"
56 " results[offset] = read_image%s( input, coords %s);\n"
57 "}";
58
59 const char *intCoordKernelSource =
60 " int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
61
62 const char *floatKernelSource =
63 " float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
64
65 static const char *samplerKernelArg = " sampler_t imageSampler,";
66
67 static const char *lodOffsetSource =
68 " unsigned int lod_int = (unsigned int) lod;\n"
69 " int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
70 " int offset = tidY*width_lod + tidX;\n";
71
72 static const char *offsetSource =
73 " int offset = tidY*get_image_width(input) + tidX;\n";
74
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)75 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
76 T *resultPtr, T * expected, float error,
77 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
78 {
79 int actualX, actualY;
80 int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
81 bool clampingErr = false, clamped = false, otherClampingBug = false;
82 int clampedX, clampedY, ignoreMe;
83
84 clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
85
86 if( found )
87 {
88 // Is it a clamping bug?
89 if( clamped && clampedX == actualX && clampedY == actualY )
90 {
91 if( (--numClamped) == 0 )
92 {
93 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
94 if (imageInfo->format->image_channel_order == CL_DEPTH)
95 {
96 if( printAsFloat )
97 {
98 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
99 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
100 }
101 else
102 {
103 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
104 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
105 }
106 }
107 else
108 {
109 if( printAsFloat )
110 {
111 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
112 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
113 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
114 }
115 else
116 {
117 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
118 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
119 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
120 }
121 }
122 return 1;
123 }
124 clampingErr = true;
125 otherClampingBug = true;
126 }
127 }
128 if( clamped && !otherClampingBug )
129 {
130 // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
131 if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
132 {
133 if( (--numClamped) == 0 )
134 {
135 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
136 if (imageInfo->format->image_channel_order == CL_DEPTH)
137 {
138 if( printAsFloat )
139 {
140 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
141 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
142 }
143 else
144 {
145 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
146 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
147 }
148 }
149 else
150 {
151 if( printAsFloat )
152 {
153 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
154 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
155 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
156 }
157 else
158 {
159 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
160 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
161 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
162 }
163 }
164 return 1;
165 }
166 clampingErr = true;
167 }
168 }
169 if( !clampingErr )
170 {
171 if (imageInfo->format->image_channel_order == CL_DEPTH)
172 {
173 if( printAsFloat )
174 {
175 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
176 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
177 }
178 else
179 {
180 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
181 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
182 }
183 }
184 else
185 {
186 if( printAsFloat )
187 {
188 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n",
189 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
190 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
191 }
192 else
193 {
194 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
195 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
196 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
197 }
198 }
199 log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
200 if( clamped )
201 {
202 log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
203 }
204 if( printAsFloat && gExtraValidateInfo)
205 {
206 log_error( "Nearby values:\n" );
207 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
208 for( int yOff = -2; yOff <= 1; yOff++ )
209 {
210 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
211 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
212 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
213 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
214 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
215 if (imageInfo->format->image_channel_order == CL_DEPTH)
216 {
217 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
218 log_error( " (%g)", real[0] );
219 log_error( " (%g)",bot[0] );
220 log_error( " (%g)\n",bot2[0] );
221 }
222 else
223 {
224 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
225 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
226 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
227 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
228 }
229 }
230
231 if( clampedY < 1 )
232 {
233 log_error( "Nearby values:\n" );
234 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
235 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
236 {
237 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
238 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
239 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
240 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
241 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
242 if (imageInfo->format->image_channel_order == CL_DEPTH)
243 {
244 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
245 log_error( " (%g)", real[0] );
246 log_error( " (%g)",bot[0] );
247 log_error( " (%g)\n",bot2[0] );
248 }
249 else
250 {
251 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
252 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
253 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
254 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
255 }
256 }
257 }
258 }
259
260 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
261 {
262 if( found )
263 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
264 else
265 log_error( "\tValue not actually found in image\n" );
266 }
267 log_error( "\n" );
268
269 numClamped = -1; // We force the clamped counter to never work
270 if( ( --numTries ) == 0 )
271 {
272 return 1;
273 }
274 }
275 return 0;
276 }
277
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)278 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
279 {
280 size_t i = 0;
281 if( gDisableOffsets )
282 {
283 for( size_t y = 0; y < imageInfo->height; y++ )
284 {
285 for( size_t x = 0; x < imageInfo->width; x++, i++ )
286 {
287 xOffsets[ i ] = (float) (xfract + (double) x);
288 yOffsets[ i ] = (float) (yfract + (double) y);
289 }
290 }
291 }
292 else
293 {
294 for( size_t y = 0; y < imageInfo->height; y++ )
295 {
296 for( size_t x = 0; x < imageInfo->width; x++, i++ )
297 {
298 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
299 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
300 }
301 }
302 }
303
304 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
305 {
306 i = 0;
307 for( size_t y = 0; y < imageInfo->height; y++ )
308 {
309 for( size_t x = 0; x < imageInfo->width; x++, i++ )
310 {
311 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
312 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
313 }
314 }
315 }
316
317 if( normalized_coords )
318 {
319 i = 0;
320 for( size_t y = 0; y < imageInfo->height; y++ )
321 {
322 for( size_t x = 0; x < imageInfo->width; x++, i++ )
323 {
324 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
325 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
326 }
327 }
328 }
329 }
330
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)331 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
332 {
333 size_t i = 0;
334 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
335
336 if( gTestMipmaps )
337 {
338 width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
339 height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
340 }
341 if( gDisableOffsets )
342 {
343 for( size_t y = 0; y < height_lod; y++ )
344 {
345 for( size_t x = 0; x < width_lod; x++, i++ )
346 {
347 xOffsets[ i ] = (float) (xfract + (float) x);
348 yOffsets[ i ] = (float) (yfract + (float) y);
349 }
350 }
351 }
352 else
353 {
354 for( size_t y = 0; y < height_lod; y++ )
355 {
356 for( size_t x = 0; x < width_lod; x++, i++ )
357 {
358 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
359 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
360 }
361 }
362 }
363
364 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
365 {
366 i = 0;
367 for( size_t y = 0; y < height_lod; y++ )
368 {
369 for( size_t x = 0; x < width_lod; x++, i++ )
370 {
371 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
372 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
373 }
374 }
375 }
376
377 if( normalized_coords )
378 {
379 i = 0;
380 for( size_t y = 0; y < height_lod; y++ )
381 {
382 for( size_t x = 0; x < width_lod; x++, i++ )
383 {
384 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
385 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
386 }
387 }
388 }
389 }
390
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)391 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
392 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
393 {
394 // Validate results element by element
395 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
396 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
397 /*
398 * FLOAT output type
399 */
400 if( outputType == kFloat )
401 {
402 // Validate float results
403 float *resultPtr = (float *)(char *)resultValues;
404 float expected[4], error=0.0f;
405 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
406 for( size_t y = 0, j = 0; y < height_lod; y++ )
407 {
408 for( size_t x = 0; x < width_lod; x++, j++ )
409 {
410 // Step 1: go through and see if the results verify for the pixel
411 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
412 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
413 int checkOnlyOnePixel = 0;
414 int found_pixel = 0;
415 float offset = NORM_OFFSET;
416 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
417 #if defined( __APPLE__ )
418 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
419 || gDeviceType != CL_DEVICE_TYPE_GPU
420 #endif
421 )
422 offset = 0.0f; // Loop only once
423
424 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
425 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
426
427 // Try sampling the pixel, without flushing denormals.
428 int containsDenormals = 0;
429 FloatPixel maxPixel;
430 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
431 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
432 imageSampler, expected, 0, &containsDenormals );
433
434 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
435 // Clamp to the minimum absolute error for the format
436 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
437 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
438
439 // Check if the result matches.
440 if( ! (err1 <= maxErr1) )
441 {
442 //try flushing the denormals, if there is a failure.
443 if( containsDenormals )
444 {
445 // If implementation decide to flush subnormals to zero,
446 // max error needs to be adjusted
447 maxErr1 += 4 * FLT_MIN;
448
449 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
450 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
451 imageSampler, expected, 0, NULL );
452
453 err1 = ABS_ERROR(resultPtr[0], expected[0]);
454 }
455 }
456
457 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
458 found_pixel = (err1 <= maxErr1);
459 }//norm_offset_x
460 }//norm_offset_y
461
462
463 // Step 2: If we did not find a match, then print out debugging info.
464 if (!found_pixel) {
465 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
466 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
467 checkOnlyOnePixel = 0;
468 int shouldReturn = 0;
469 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
470 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
471
472 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
473 // E.g., test one pixel.
474 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
475 norm_offset_x = 0.0f;
476 norm_offset_y = 0.0f;
477 checkOnlyOnePixel = 1;
478 }
479
480 int containsDenormals = 0;
481 FloatPixel maxPixel;
482 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
483 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
484 imageSampler, expected, 0, &containsDenormals );
485
486 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
487 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
488
489
490 if( ! (err1 <= maxErr1) )
491 {
492 //try flushing the denormals, if there is a failure.
493 if( containsDenormals )
494 {
495 maxErr1 += 4 * FLT_MIN;
496
497 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
498 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
499 imageSampler, expected, 0, NULL );
500
501 err1 = ABS_ERROR(resultPtr[0], expected[0]);
502 }
503 }
504 if( ! (err1 <= maxErr1) )
505 {
506 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
507
508 float tempOut[4];
509 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
510 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
511
512 log_error( "Step by step:\n" );
513 FloatPixel temp;
514 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
515 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
516 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
517 log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n",
518 Ulp_Error( resultPtr[0], expected[0] ),
519 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
520
521 } else {
522 log_error("Test error: we should have detected this passing above.\n");
523 }
524
525 }//norm_offset_x
526 }//norm_offset_y
527 if( shouldReturn )
528 return 1;
529 } // if (!found_pixel)
530
531 resultPtr += 1;
532 }
533 }
534 }
535 else
536 {
537 log_error("Test error: Not supported format.\n");
538 return 1;
539 }
540 return 0;
541 }
542
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)543 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
544 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
545 {
546 // Validate results element by element
547 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
548 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
549 /*
550 * FLOAT output type
551 */
552 if( outputType == kFloat )
553 {
554 // Validate float results
555 float *resultPtr = (float *)(char *)resultValues;
556 float expected[4], error=0.0f;
557 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
558 for( size_t y = 0, j = 0; y < height_lod; y++ )
559 {
560 for( size_t x = 0; x < width_lod; x++, j++ )
561 {
562 // Step 1: go through and see if the results verify for the pixel
563 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
564 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
565 int checkOnlyOnePixel = 0;
566 int found_pixel = 0;
567 float offset = NORM_OFFSET;
568 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
569 #if defined( __APPLE__ )
570 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
571 || gDeviceType != CL_DEVICE_TYPE_GPU
572 #endif
573 )
574 offset = 0.0f; // Loop only once
575
576 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
577 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
578
579
580 // Try sampling the pixel, without flushing denormals.
581 int containsDenormals = 0;
582 FloatPixel maxPixel;
583 if ( gTestMipmaps )
584 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
585 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
586 imageSampler, expected, 0, &containsDenormals, lod );
587 else
588 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
589 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
590 imageSampler, expected, 0, &containsDenormals );
591
592 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
593 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
594 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
595 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
596 // Clamp to the minimum absolute error for the format
597 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
598 if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
599 if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
600 if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
601 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
602 float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
603 float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
604 float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
605
606 // Check if the result matches.
607 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
608 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
609 {
610 //try flushing the denormals, if there is a failure.
611 if( containsDenormals )
612 {
613 // If implementation decide to flush subnormals to zero,
614 // max error needs to be adjusted
615 maxErr1 += 4 * FLT_MIN;
616 maxErr2 += 4 * FLT_MIN;
617 maxErr3 += 4 * FLT_MIN;
618 maxErr4 += 4 * FLT_MIN;
619
620 if(gTestMipmaps)
621 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
622 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
623 imageSampler, expected, 0, NULL,lod );
624 else
625 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
626 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
627 imageSampler, expected, 0, NULL );
628
629 err1 = ABS_ERROR(resultPtr[0], expected[0]);
630 err2 = ABS_ERROR(resultPtr[1], expected[1]);
631 err3 = ABS_ERROR(resultPtr[2], expected[2]);
632 err4 = ABS_ERROR(resultPtr[3], expected[3]);
633 }
634 }
635
636 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
637 found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4);
638 }//norm_offset_x
639 }//norm_offset_y
640
641
642 // Step 2: If we did not find a match, then print out debugging info.
643 if (!found_pixel) {
644 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
645 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
646 checkOnlyOnePixel = 0;
647 int shouldReturn = 0;
648 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
649 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
650
651 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
652 // E.g., test one pixel.
653 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
654 norm_offset_x = 0.0f;
655 norm_offset_y = 0.0f;
656 checkOnlyOnePixel = 1;
657 }
658
659 int containsDenormals = 0;
660 FloatPixel maxPixel;
661 if(gTestMipmaps)
662 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
663 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
664 imageSampler, expected, 0, &containsDenormals, lod );
665 else
666 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
667 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
668 imageSampler, expected, 0, &containsDenormals );
669
670 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
671 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
672 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
673 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
674 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
675 float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
676 float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
677 float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
678
679
680 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
681 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
682 {
683 //try flushing the denormals, if there is a failure.
684 if( containsDenormals )
685 {
686 maxErr1 += 4 * FLT_MIN;
687 maxErr2 += 4 * FLT_MIN;
688 maxErr3 += 4 * FLT_MIN;
689 maxErr4 += 4 * FLT_MIN;
690
691 if(gTestMipmaps)
692 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
693 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
694 imageSampler, expected, 0, NULL, lod );
695 else
696 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
697 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
698 imageSampler, expected, 0, NULL );
699
700 err1 = ABS_ERROR(resultPtr[0], expected[0]);
701 err2 = ABS_ERROR(resultPtr[1], expected[1]);
702 err3 = ABS_ERROR(resultPtr[2], expected[2]);
703 err4 = ABS_ERROR(resultPtr[3], expected[3]);
704 }
705 }
706 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
707 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
708 {
709 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
710
711 float tempOut[4];
712 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
713 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
714
715 log_error( "Step by step:\n" );
716 FloatPixel temp;
717 if( gTestMipmaps )
718 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
719 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
720 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
721 else
722 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
723 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
724 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
725 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
726 Ulp_Error( resultPtr[0], expected[0] ),
727 Ulp_Error( resultPtr[1], expected[1] ),
728 Ulp_Error( resultPtr[2], expected[2] ),
729 Ulp_Error( resultPtr[3], expected[3] ),
730 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
731
732 } else {
733 log_error("Test error: we should have detected this passing above.\n");
734 }
735
736 }//norm_offset_x
737 }//norm_offset_y
738 if( shouldReturn )
739 return 1;
740 } // if (!found_pixel)
741
742 resultPtr += 4;
743 }
744 }
745 }
746 /*
747 * UINT output type
748 */
749 else if( outputType == kUInt )
750 {
751 // Validate unsigned integer results
752 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
753 unsigned int expected[4];
754 float error;
755 for( size_t y = 0, j = 0; y < height_lod ; y++ )
756 {
757 for( size_t x = 0; x < width_lod ; x++, j++ )
758 {
759 // Step 1: go through and see if the results verify for the pixel
760 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
761 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
762 int checkOnlyOnePixel = 0;
763 int found_pixel = 0;
764 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
765 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
766
767 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
768 // E.g., test one pixel.
769 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
770 norm_offset_x = 0.0f;
771 norm_offset_y = 0.0f;
772 checkOnlyOnePixel = 1;
773 }
774
775 if ( gTestMipmaps )
776 sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
777 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
778 imageSampler, expected, lod );
779 else
780 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
781 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
782 imageSampler, expected);
783
784
785 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
786 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
787
788 if (error <= MAX_ERR)
789 found_pixel = 1;
790 }//norm_offset_x
791 }//norm_offset_y
792
793 // Step 2: If we did not find a match, then print out debugging info.
794 if (!found_pixel) {
795 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
796 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
797 checkOnlyOnePixel = 0;
798 int shouldReturn = 0;
799 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
800 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
801
802 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
803 // E.g., test one pixel.
804 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
805 norm_offset_x = 0.0f;
806 norm_offset_y = 0.0f;
807 checkOnlyOnePixel = 1;
808 }
809
810 if( gTestMipmaps )
811 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
812 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
813 imageSampler, expected, lod );
814 else
815 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
816 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
817 imageSampler, expected);
818
819
820 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
821 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
822
823 if( error > MAX_ERR )
824 {
825 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
826
827 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
828 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
829 } else {
830 log_error("Test error: we should have detected this passing above.\n");
831 }
832 }//norm_offset_x
833 }//norm_offset_y
834 if( shouldReturn )
835 return 1;
836 } // if (!found_pixel)
837
838 resultPtr += 4;
839 }
840 }
841 }
842 /*
843 * INT output type
844 */
845 else
846 {
847 // Validate integer results
848 int *resultPtr = (int *)(char *)resultValues;
849 int expected[4];
850 float error;
851 for( size_t y = 0, j = 0; y < height_lod ; y++ )
852 {
853 for( size_t x = 0; x < width_lod; x++, j++ )
854 {
855 // Step 1: go through and see if the results verify for the pixel
856 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
857 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
858 int checkOnlyOnePixel = 0;
859 int found_pixel = 0;
860 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
861 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
862
863 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
864 // E.g., test one pixel.
865 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
866 norm_offset_x = 0.0f;
867 norm_offset_y = 0.0f;
868 checkOnlyOnePixel = 1;
869 }
870
871 if ( gTestMipmaps )
872 sample_image_pixel_offset<int>( imagePtr, imageInfo,
873 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
874 imageSampler, expected , lod);
875 else
876 sample_image_pixel_offset<int>( imageValues, imageInfo,
877 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
878 imageSampler, expected );
879
880
881 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
882 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
883
884 if (error <= MAX_ERR)
885 found_pixel = 1;
886 }//norm_offset_x
887 }//norm_offset_y
888
889 // Step 2: If we did not find a match, then print out debugging info.
890 if (!found_pixel) {
891 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
892 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
893 checkOnlyOnePixel = 0;
894 int shouldReturn = 0;
895 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
896 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
897
898 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
899 // E.g., test one pixel.
900 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
901 norm_offset_x = 0.0f;
902 norm_offset_y = 0.0f;
903 checkOnlyOnePixel = 1;
904 }
905
906 if ( gTestMipmaps )
907 sample_image_pixel_offset<int>( imageValues, imageInfo,
908 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
909 imageSampler, expected, lod );
910 else
911 sample_image_pixel_offset<int>( imageValues, imageInfo,
912 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
913 imageSampler, expected );
914
915
916 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
917 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
918
919 if( error > MAX_ERR )
920 {
921 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
922
923 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
924 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
925 } else {
926 log_error("Test error: we should have detected this passing above.\n");
927 }
928 }//norm_offset_x
929 }//norm_offset_y
930 if( shouldReturn )
931 return 1;
932 } // if (!found_pixel)
933
934 resultPtr += 4;
935 }
936 }
937 }
938 return 0;
939 }
940
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)941 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
942 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
943 {
944 // Validate results element by element
945 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
946 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
947 /*
948 * FLOAT output type
949 */
950 if( outputType == kFloat )
951 {
952 // Validate float results
953 float *resultPtr = (float *)(char *)resultValues;
954 float expected[4], error=0.0f;
955 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
956 for( size_t y = 0, j = 0; y < height_lod; y++ )
957 {
958 for( size_t x = 0; x < width_lod; x++, j++ )
959 {
960 // Step 1: go through and see if the results verify for the pixel
961 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
962 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
963 int checkOnlyOnePixel = 0;
964 int found_pixel = 0;
965 float offset = NORM_OFFSET;
966 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
967 #if defined( __APPLE__ )
968 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
969 || gDeviceType != CL_DEVICE_TYPE_GPU
970 #endif
971 )
972 offset = 0.0f; // Loop only once
973
974 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
975 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
976
977
978 // Try sampling the pixel, without flushing denormals.
979 int containsDenormals = 0;
980 FloatPixel maxPixel;
981 if ( gTestMipmaps )
982 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
983 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
984 imageSampler, expected, 0, &containsDenormals, lod );
985 else
986 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
987 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
988 imageSampler, expected, 0, &containsDenormals );
989 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
990 sRGBmap(expected[0]));
991 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
992 sRGBmap(expected[1]));
993 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
994 sRGBmap(expected[2]));
995 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
996 float maxErr = 0.5;
997
998 // Check if the result matches.
999 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1000 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1001 {
1002 //try flushing the denormals, if there is a failure.
1003 if( containsDenormals )
1004 {
1005 // If implementation decide to flush subnormals to zero,
1006 // max error needs to be adjusted
1007 maxErr += 4 * FLT_MIN;
1008
1009 if(gTestMipmaps)
1010 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1011 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1012 imageSampler, expected, 0, NULL,lod );
1013 else
1014 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1015 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1016 imageSampler, expected, 0, NULL );
1017
1018 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1019 sRGBmap(expected[0]));
1020 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1021 sRGBmap(expected[1]));
1022 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1023 sRGBmap(expected[2]));
1024 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1025 }
1026 }
1027
1028 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1029 found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr);
1030 }//norm_offset_x
1031 }//norm_offset_y
1032
1033
1034 // Step 2: If we did not find a match, then print out debugging info.
1035 if (!found_pixel) {
1036 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1037 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1038 checkOnlyOnePixel = 0;
1039 int shouldReturn = 0;
1040 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1041 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1042
1043 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1044 // E.g., test one pixel.
1045 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
1046 norm_offset_x = 0.0f;
1047 norm_offset_y = 0.0f;
1048 checkOnlyOnePixel = 1;
1049 }
1050
1051 int containsDenormals = 0;
1052 FloatPixel maxPixel;
1053 if(gTestMipmaps)
1054 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1055 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1056 imageSampler, expected, 0, &containsDenormals, lod );
1057 else
1058 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1059 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1060 imageSampler, expected, 0, &containsDenormals );
1061
1062 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1063 sRGBmap(expected[0]));
1064 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1065 sRGBmap(expected[1]));
1066 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1067 sRGBmap(expected[2]));
1068 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1069 float maxErr = 0.6;
1070
1071 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1072 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1073 {
1074 //try flushing the denormals, if there is a failure.
1075 if( containsDenormals )
1076 {
1077 // If implementation decide to flush subnormals to zero,
1078 // max error needs to be adjusted
1079 maxErr += 4 * FLT_MIN;
1080 if(gTestMipmaps)
1081 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1082 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1083 imageSampler, expected, 0, NULL, lod );
1084 else
1085 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1086 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1087 imageSampler, expected, 0, NULL );
1088
1089 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1090 sRGBmap(expected[0]));
1091 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1092 sRGBmap(expected[1]));
1093 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1094 sRGBmap(expected[2]));
1095 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1096 }
1097 }
1098 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1099 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1100 {
1101 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1102
1103 float tempOut[4];
1104 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1105 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1106
1107 log_error( "Step by step:\n" );
1108 FloatPixel temp;
1109 if( gTestMipmaps )
1110 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1111 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1112 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1113 else
1114 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1115 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1116 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1117 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
1118 Ulp_Error( resultPtr[0], expected[0] ),
1119 Ulp_Error( resultPtr[1], expected[1] ),
1120 Ulp_Error( resultPtr[2], expected[2] ),
1121 Ulp_Error( resultPtr[3], expected[3] ),
1122 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1123
1124 } else {
1125 log_error("Test error: we should have detected this passing above.\n");
1126 }
1127
1128 }//norm_offset_x
1129 }//norm_offset_y
1130 if( shouldReturn )
1131 return 1;
1132 } // if (!found_pixel)
1133
1134 resultPtr += 4;
1135 }
1136 }
1137 }
1138 else {
1139 log_error("Test error: NOT SUPPORTED.\n");
1140 }
1141 return 0;
1142 }
1143
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1144 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1145 {
1146 bool pass = true;
1147 // Compare floats
1148 if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1149 {
1150 // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1151 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1152 {
1153 if ( isnan( expected[j] ) && isnan( actual[j] ) )
1154 continue;
1155 if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1156 continue;
1157 if (expected[j] != actual[j])
1158 {
1159 pass = false;
1160 break;
1161 }
1162 }
1163 }
1164 return pass;
1165 }
1166
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1167 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1168 {
1169 bool pass = true;
1170 // Compare half floats
1171 if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1172
1173 // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1174 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1175 if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1176 continue;
1177 if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1178 continue;
1179 if (expected[j] != actual[j])
1180 {
1181 pass = false;
1182 break;
1183 }
1184 }
1185 }
1186 return pass;
1187 }
1188
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1189 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1190 image_descriptor *imageInfo, image_sampler_data *imageSampler,
1191 bool useFloatCoords, ExplicitType outputType, MTdata d )
1192 {
1193 int error;
1194 static int initHalf = 0;
1195 cl_mem imageBuffer;
1196 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
1197 size_t threads[2];
1198
1199 clMemWrapper xOffsets, yOffsets, results;
1200 clSamplerWrapper actualSampler;
1201 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1202
1203 // The DataBuffer template class really does use delete[], not free -- IRO
1204 BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1205 BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1206
1207 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1208 if( DetectFloatToHalfRoundingMode(queue) )
1209 return 1;
1210
1211 // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1212 BufferOwningPtr<char> imageValues;
1213 generate_random_image_data( imageInfo, imageValues, d );
1214
1215 if( gDebugTrace )
1216 {
1217 log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1218 if( gTestMipmaps )
1219 {
1220 log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1221 }
1222 }
1223
1224 // Construct testing sources
1225 clProtectedImage protImage;
1226 clMemWrapper unprotImage;
1227 cl_mem image;
1228
1229 if(gtestTypesToRun & kReadTests)
1230 {
1231 image_read_write_flags = CL_MEM_READ_ONLY;
1232 }
1233 else
1234 {
1235 image_read_write_flags = CL_MEM_READ_WRITE;
1236 }
1237
1238 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1239 {
1240 if (gTestImage2DFromBuffer)
1241 {
1242 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1243 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1244 imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1245 test_error( error, "Unable to create buffer" );
1246 unprotImage = create_image_2d_buffer( context,
1247 image_read_write_flags,
1248 imageInfo->format,
1249 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1250 imageBuffer, &error );
1251
1252 }
1253 else
1254 {
1255 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1256 // Do not use protected images for max image size test since it rounds the row size to a page size
1257 if (gTestMaxImages) {
1258 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1259 unprotImage = create_image_2d( context,
1260 image_read_write_flags | CL_MEM_USE_HOST_PTR,
1261 imageInfo->format,
1262 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1263 maxImageUseHostPtrBackingStore, &error );
1264 }
1265 else
1266 {
1267 error = protImage.Create( context,
1268 image_read_write_flags,
1269 imageInfo->format, imageInfo->width, imageInfo->height );
1270 }
1271 }
1272
1273 if( error != CL_SUCCESS )
1274 {
1275 if (gTestImage2DFromBuffer) {
1276 clReleaseMemObject(imageBuffer);
1277 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1278 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1279 return 0;
1280 }
1281 }
1282
1283 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1284 return error;
1285 }
1286
1287 if (gTestMaxImages || gTestImage2DFromBuffer)
1288 image = (cl_mem)unprotImage;
1289 else
1290 image = (cl_mem)protImage;
1291 }
1292 else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1293 {
1294 if (gTestImage2DFromBuffer)
1295 {
1296 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1297 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1298 test_error( error, "Unable to create buffer" );
1299 unprotImage = create_image_2d_buffer( context,
1300 image_read_write_flags,
1301 imageInfo->format,
1302 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1303 imageBuffer, &error );
1304
1305 }
1306 else
1307 {
1308 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1309 unprotImage = create_image_2d( context,
1310 image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1311 imageInfo->format,
1312 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1313 imageValues, &error );
1314 }
1315 if( error != CL_SUCCESS )
1316 {
1317 if (gTestImage2DFromBuffer) {
1318 clReleaseMemObject(imageBuffer);
1319 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1320 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1321 return 0;
1322 }
1323 }
1324
1325 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1326 return error;
1327 }
1328 image = unprotImage;
1329 }
1330 else // Either CL_MEM_ALLOC_HOST_PTR or none
1331 {
1332 if( gTestMipmaps )
1333 {
1334 cl_image_desc image_desc = {0};
1335 image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1336 image_desc.image_width = imageInfo->width;
1337 image_desc.image_height = imageInfo->height;
1338 image_desc.num_mip_levels = imageInfo->num_mip_levels;
1339 unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1340 }
1341 else if (gTestImage2DFromBuffer)
1342 {
1343 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1344 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1345 test_error( error, "Unable to create buffer" );
1346 unprotImage = create_image_2d_buffer( context,
1347 image_read_write_flags,
1348 imageInfo->format,
1349 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1350 imageBuffer, &error );
1351
1352 }
1353 else
1354 {
1355 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1356 // it works just as if no flag is specified, so we just do the same thing either way
1357 unprotImage = create_image_2d( context,
1358 image_read_write_flags | gMemFlagsToUse,
1359 imageInfo->format,
1360 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1361 imageValues, &error );
1362 }
1363 if( error != CL_SUCCESS )
1364 {
1365 if (gTestImage2DFromBuffer) {
1366 clReleaseMemObject(imageBuffer);
1367 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1368 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1369 return 0;
1370 }
1371 }
1372
1373 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1374 return error;
1375 }
1376 image = unprotImage;
1377 }
1378
1379 if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1380 {
1381 if( gDebugTrace )
1382 log_info( " - Writing image...\n" );
1383
1384 size_t origin[ 3 ] = { 0, 0, 0 };
1385 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1386
1387 if(!gTestMipmaps)
1388 {
1389 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1390 origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1391 imageValues, 0, NULL, NULL);
1392 if (error != CL_SUCCESS)
1393 {
1394 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1395 return error;
1396 }
1397 }
1398 else
1399 {
1400 size_t tmpNextLevelOffset = 0;
1401 for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1402 {
1403 origin[2] = level;
1404 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1405 origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1406 (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1407 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1408 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1409 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1410 }
1411 }
1412 }
1413
1414 if( gDebugTrace )
1415 log_info( " - Creating kernel arguments...\n" );
1416
1417 xOffsets =
1418 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1419 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1420 xOffsetValues, &error);
1421 test_error( error, "Unable to create x offset buffer" );
1422 yOffsets =
1423 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1424 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1425 yOffsetValues, &error);
1426 test_error( error, "Unable to create y offset buffer" );
1427 results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1428 get_explicit_type_size(outputType) * 4
1429 * imageInfo->width * imageInfo->height,
1430 NULL, &error);
1431 test_error( error, "Unable to create result buffer" );
1432
1433 // Create sampler to use
1434 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1435 test_error(error, "Unable to create image sampler");
1436
1437 // Set arguments
1438 int idx = 0;
1439 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1440 test_error( error, "Unable to set kernel arguments" );
1441 if( !gUseKernelSamplers )
1442 {
1443 error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1444 test_error( error, "Unable to set kernel arguments" );
1445 }
1446 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1447 test_error( error, "Unable to set kernel arguments" );
1448 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1449 test_error( error, "Unable to set kernel arguments" );
1450 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1451 test_error( error, "Unable to set kernel arguments" );
1452
1453 // A cast of troublesome offsets. The first one has to be zero.
1454 const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1455 int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1456 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1457 int loopCount = 2 * float_offset_count;
1458 if( ! useFloatCoords )
1459 loopCount = 1;
1460 if (gTestMaxImages) {
1461 loopCount = 1;
1462 log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1463 }
1464
1465 if(gtestTypesToRun & kReadWriteTests)
1466 {
1467 loopCount = 1;
1468 }
1469
1470 // Get the maximum absolute error for this format
1471 double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1472 if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1473
1474 if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1475 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1476 if (initHalf) {
1477 log_info("Half rounding mode successfully detected.\n");
1478 }
1479 }
1480
1481 size_t nextLevelOffset = 0;
1482 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1483 for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1484 {
1485 size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1486 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1487 float lod_float = (float)lod;
1488 char *imagePtr = (char *)imageValues + nextLevelOffset;
1489 if( gTestMipmaps )
1490 {
1491 if(gDebugTrace)
1492 log_info("\t- Working at mip level %d\n", lod);
1493 error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1494 }
1495
1496 // Validate results element by element
1497 for( int q = 0; q < loopCount; q++ )
1498 {
1499 float offset = float_offsets[ q % float_offset_count ];
1500
1501 // Init the coordinates
1502 InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1503 q>=float_offset_count ? -offset: offset,
1504 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1505
1506 error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1507 test_error( error, "Unable to write x offsets" );
1508 error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1509 test_error( error, "Unable to write y offsets" );
1510
1511 // Get results
1512 memset( resultValues, 0xff, resultValuesSize );
1513 clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1514
1515 // Run the kernel
1516 threads[0] = (size_t)width_lod;
1517 threads[1] = (size_t)height_lod;
1518 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1519 test_error( error, "Unable to run kernel" );
1520
1521 if( gDebugTrace )
1522 log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1523
1524 error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1525 test_error( error, "Unable to read results from kernel" );
1526 if( gDebugTrace )
1527 log_info( " results read\n" );
1528
1529 int retCode;
1530 switch (imageInfo->format->image_channel_order) {
1531 case CL_DEPTH:
1532 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1533 break;
1534 case CL_sRGB:
1535 case CL_sRGBx:
1536 case CL_sRGBA:
1537 case CL_sBGRA:
1538 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1539 break;
1540 default:
1541 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1542 }
1543 if (retCode)
1544 return retCode;
1545 }
1546 end:
1547 if ( gTestMipmaps )
1548 {
1549 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1550 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1551 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1552 }
1553 }
1554
1555 if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1556
1557 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1558 }
1559
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1560 int test_read_image_set_2D(cl_device_id device, cl_context context,
1561 cl_command_queue queue,
1562 const cl_image_format *format,
1563 image_sampler_data *imageSampler, bool floatCoords,
1564 ExplicitType outputType)
1565 {
1566 char programSrc[10240];
1567 const char *ptr;
1568 const char *readFormat;
1569 clProgramWrapper program;
1570 clKernelWrapper kernel;
1571 const char *KernelSourcePattern = NULL;
1572
1573 if (gTestImage2DFromBuffer)
1574 {
1575 if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1576 {
1577 switch (format->image_channel_data_type)
1578 {
1579 case CL_UNORM_INT8:
1580 case CL_UNORM_INT16:
1581 case CL_SNORM_INT8:
1582 case CL_SNORM_INT16:
1583 case CL_HALF_FLOAT:
1584 case CL_FLOAT:
1585 case CL_SIGNED_INT8:
1586 case CL_SIGNED_INT16:
1587 case CL_SIGNED_INT32:
1588 case CL_UNSIGNED_INT8:
1589 case CL_UNSIGNED_INT16:
1590 case CL_UNSIGNED_INT32:
1591 log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1592 GetChannelTypeName( format->image_channel_data_type ));
1593 return 0;
1594 default:
1595 break;
1596 }
1597 }
1598 }
1599
1600
1601 RandomSeed seed( gRandomSeed );
1602 int error;
1603
1604 // Get our operating params
1605 size_t maxWidth, maxHeight;
1606 cl_ulong maxAllocSize, memSize;
1607 image_descriptor imageInfo = { 0x0 };
1608 size_t pixelSize;
1609
1610 imageInfo.format = format;
1611 imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1612 imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1613 pixelSize = get_pixel_size( imageInfo.format );
1614
1615 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1616 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1617 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1618 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1619 test_error( error, "Unable to get max image 2D size from device" );
1620
1621 if (memSize > (cl_ulong)SIZE_MAX) {
1622 memSize = (cl_ulong)SIZE_MAX;
1623 }
1624
1625 // Determine types
1626 if( outputType == kInt )
1627 readFormat = "i";
1628 else if( outputType == kUInt )
1629 readFormat = "ui";
1630 else // kFloat
1631 readFormat = "f";
1632
1633 // Construct the source
1634 const char *samplerArg = samplerKernelArg;
1635 char samplerVar[ 1024 ] = "";
1636 if( gUseKernelSamplers )
1637 {
1638 get_sampler_kernel_code( imageSampler, samplerVar );
1639 samplerArg = "";
1640 }
1641
1642 if(gtestTypesToRun & kReadTests)
1643 {
1644 KernelSourcePattern = read2DKernelSourcePattern;
1645 }
1646 else
1647 {
1648 KernelSourcePattern = read_write2DKernelSourcePattern;
1649 }
1650
1651
1652 sprintf( programSrc, KernelSourcePattern,
1653 (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
1654 samplerArg, get_explicit_type_name( outputType ),
1655 (format->image_channel_order == CL_DEPTH) ? "" : "4",
1656 gTestMipmaps?", float lod":" ",
1657 samplerVar,
1658 gTestMipmaps? lodOffsetSource : offsetSource,
1659 floatCoords ? floatKernelSource : intCoordKernelSource,
1660 readFormat,
1661 gTestMipmaps?", lod":" ");
1662
1663 ptr = programSrc;
1664 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1665 "sample_kernel");
1666 test_error( error, "Unable to create testing kernel" );
1667
1668 if( gTestSmallImages )
1669 {
1670 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1671 {
1672 imageInfo.rowPitch = imageInfo.width * pixelSize;
1673 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1674 {
1675 if( gTestMipmaps )
1676 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1677
1678 if( gDebugTrace )
1679 log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1680
1681 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1682 if( retCode )
1683 return retCode;
1684 }
1685 }
1686 }
1687 else if( gTestMaxImages )
1688 {
1689 // Try a specific set of maximum sizes
1690 size_t numbeOfSizes;
1691 size_t sizes[100][3];
1692
1693 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1694
1695 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1696 {
1697 imageInfo.width = sizes[ idx ][ 0 ];
1698 imageInfo.height = sizes[ idx ][ 1 ];
1699 imageInfo.rowPitch = imageInfo.width * pixelSize;
1700 log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1701
1702 if( gTestMipmaps )
1703 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1704
1705 if( gDebugTrace )
1706 log_info( " at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1707 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1708 if( retCode )
1709 return retCode;
1710 }
1711 }
1712 else if( gTestRounding )
1713 {
1714 uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1715 typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1716 imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1717 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1718 while( imageInfo.height >= maxHeight / 2 )
1719 {
1720 imageInfo.width <<= 1;
1721 imageInfo.height >>= 1;
1722 }
1723
1724 while( imageInfo.width >= maxWidth / 2 )
1725 imageInfo.width >>= 1;
1726 imageInfo.rowPitch = imageInfo.width * pixelSize;
1727
1728 gRoundingStartValue = 0;
1729 do
1730 {
1731 if( gDebugTrace )
1732 log_info( " at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1733 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1734 if( retCode )
1735 return retCode;
1736
1737 gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1738
1739 } while( gRoundingStartValue < typeRange );
1740 }
1741 else
1742 {
1743 cl_uint imagePitchAlign = 0;
1744 if (gTestImage2DFromBuffer)
1745 {
1746 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1747 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1748 #endif
1749 if (!imagePitchAlign || error) {
1750 test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1751 imagePitchAlign = 1;
1752 }
1753 }
1754
1755 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1756 int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1757
1758 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1759 {
1760 cl_ulong size;
1761 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1762 // image, the result array, plus offset arrays, will fit in the global ram space
1763 do
1764 {
1765 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1766 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1767
1768 imageInfo.rowPitch = imageInfo.width * pixelSize;
1769 if( gTestMipmaps )
1770 {
1771 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1772 size = 4 * compute_mipmapped_image_size(imageInfo);
1773 }
1774 else
1775 {
1776 if( gEnablePitch )
1777 {
1778 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1779 imageInfo.rowPitch += extraWidth * pixelSize;
1780 }
1781
1782 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1783 if (gTestImage2DFromBuffer)
1784 {
1785 size_t pitch = imagePitchAlign * pixelSize;
1786 imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1787 }
1788
1789 size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1790 }
1791 } while( size > maxAllocSize || ( size * 3 ) > memSize );
1792
1793 if( gDebugTrace )
1794 log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1795 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1796 if( retCode )
1797 return retCode;
1798 }
1799 }
1800
1801 return 0;
1802 }
1803