1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "structs.h"
17 
18 
19 #include "defines.h"
20 
21 #define DEBUG_MEM_ALLOC 0
22 
23 /** typedef struct _bufferStruct
24  {
25  void * m_pIn;
26  void * m_pOut;
27 
28  cl_mem m_outBuffer;
29  cl_mem m_inBuffer;
30 
31  size_t m_bufSize;
32  } bufferStruct;
33  */
34 
35 
newClState(cl_device_id device,cl_context context,cl_command_queue queue)36 clState *newClState(cl_device_id device, cl_context context,
37                     cl_command_queue queue)
38 {
39     clState *pResult = (clState *)malloc(sizeof(clState));
40 #if DEBUG_MEM_ALLOC
41     log_info("malloc clState * %x\n", pResult);
42 #endif
43 
44     pResult->m_device = device;
45     pResult->m_context = context;
46     pResult->m_queue = queue;
47 
48     pResult->m_kernel = NULL;
49     pResult->m_program = NULL;
50     return pResult;
51 }
52 
destroyClState(clState * pState)53 clState *destroyClState(clState *pState)
54 {
55     clStateDestroyProgramAndKernel(pState);
56 #if DEBUG_MEM_ALLOC
57     log_info("delete (free) clState * %x\n", pState);
58 #endif
59     free(pState);
60     return NULL;
61 }
62 
63 
clStateMakeProgram(clState * pState,const char * prog,const char * kernelName)64 int clStateMakeProgram(clState *pState, const char *prog,
65                        const char *kernelName)
66 {
67     const char *srcArr[1] = { NULL };
68     srcArr[0] = prog;
69     int err =
70         create_single_kernel_helper(pState->m_context, &(pState->m_program),
71                                     &(pState->m_kernel), 1, srcArr, kernelName);
72 #if DEBUG_MEM_ALLOC
73     log_info("create program and kernel\n");
74 #endif
75     return err;
76 }
77 
runKernel(clState * pState,size_t numThreads)78 int runKernel(clState *pState, size_t numThreads)
79 {
80     int err;
81     pState->m_numThreads = numThreads;
82     err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
83                                  &(pState->m_numThreads), NULL, 0, NULL, NULL);
84     if (err != CL_SUCCESS)
85     {
86         log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
87         return -1;
88     }
89     return 0;
90 }
91 
92 
clStateDestroyProgramAndKernel(clState * pState)93 void clStateDestroyProgramAndKernel(clState *pState)
94 {
95 #if DEBUG_MEM_ALLOC
96     log_info("destroy program and kernel\n");
97 #endif
98     if (pState->m_kernel != NULL)
99     {
100         clReleaseKernel(pState->m_kernel);
101         pState->m_kernel = NULL;
102     }
103     if (pState->m_program != NULL)
104     {
105         clReleaseProgram(pState->m_program);
106         pState->m_program = NULL;
107     }
108 }
109 
newBufferStruct(size_t inSize,size_t outSize,clState * pClState)110 bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
111 {
112     int error;
113     bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
114 #if DEBUG_MEM_ALLOC
115     log_info("malloc bufferStruct * %x\n", pResult);
116 #endif
117 
118     pResult->m_bufSizeIn = inSize;
119     pResult->m_bufSizeOut = outSize;
120 
121     pResult->m_pIn = malloc(inSize);
122     pResult->m_pOut = malloc(outSize);
123 #if DEBUG_MEM_ALLOC
124     log_info("malloc m_pIn %x\n", pResult->m_pIn);
125     log_info("malloc m_pOut %x\n", pResult->m_pOut);
126 #endif
127 
128     pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
129                                          inSize, NULL, &error);
130     if (pResult->m_inBuffer == NULL)
131     {
132         vlog_error("clCreateArray failed for input (%d)\n", error);
133         return destroyBufferStruct(pResult, pClState);
134     }
135 #if DEBUG_MEM_ALLOC
136     log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
137 #endif
138 
139     pResult->m_outBuffer = clCreateBuffer(
140         pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
141     if (pResult->m_outBuffer == NULL)
142     {
143         vlog_error("clCreateArray failed for output (%d)\n", error);
144         return destroyBufferStruct(pResult, pClState);
145     }
146 #if DEBUG_MEM_ALLOC
147     log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
148 #endif
149 
150     pResult->m_bufferUploaded = false;
151 
152     return pResult;
153 }
154 
destroyBufferStruct(bufferStruct * destroyMe,clState * pClState)155 bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
156 {
157     if (destroyMe)
158     {
159         if (destroyMe->m_outBuffer != NULL)
160         {
161 #if DEBUG_MEM_ALLOC
162             log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
163 #endif
164             clReleaseMemObject(destroyMe->m_outBuffer);
165             destroyMe->m_outBuffer = NULL;
166         }
167         if (destroyMe->m_inBuffer != NULL)
168         {
169 #if DEBUG_MEM_ALLOC
170             log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
171 #endif
172             clReleaseMemObject(destroyMe->m_inBuffer);
173             destroyMe->m_inBuffer = NULL;
174         }
175         if (destroyMe->m_pIn != NULL)
176         {
177 #if DEBUG_MEM_ALLOC
178             log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
179 #endif
180             free(destroyMe->m_pIn);
181             destroyMe->m_pIn = NULL;
182         }
183         if (destroyMe->m_pOut != NULL)
184         {
185 #if DEBUG_MEM_ALLOC
186             log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
187 #endif
188             free(destroyMe->m_pOut);
189             destroyMe->m_pOut = NULL;
190         }
191 #if DEBUG_MEM_ALLOC
192         log_info("delete (free) bufferStruct * %x\n", destroyMe);
193 #endif
194         free((void *)destroyMe);
195         destroyMe = NULL;
196     }
197     return destroyMe;
198 }
199 
initContents(bufferStruct * pBufferStruct,clState * pClState,size_t typeSize,size_t countIn,size_t countOut)200 void initContents(bufferStruct *pBufferStruct, clState *pClState,
201                   size_t typeSize, size_t countIn, size_t countOut)
202 {
203     size_t i;
204 
205     uint64_t start = 0;
206 
207     switch (typeSize)
208     {
209         case 1: {
210             uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
211             for (i = 0; i < countIn; ++i)
212             {
213                 ub[i] = (uint8_t)start++;
214             }
215             break;
216         }
217         case 2: {
218             uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
219             for (i = 0; i < countIn; ++i)
220             {
221                 us[i] = (uint16_t)start++;
222             }
223             break;
224         }
225         case 4: {
226             if (!g_wimpyMode)
227             {
228                 uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
229                 for (i = 0; i < countIn; ++i)
230                 {
231                     ui[i] = (uint32_t)start++;
232                 }
233             }
234             else
235             {
236                 // The short test doesn't iterate over the entire 32 bit space
237                 // so we alternate between positive and negative values
238                 int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
239                 int32_t sign = 1;
240                 for (i = 0; i < countIn; ++i, ++start)
241                 {
242                     ui[i] = (int32_t)start * sign;
243                     sign = sign * -1;
244                 }
245             }
246             break;
247         }
248         case 8: {
249             // We don't iterate over the entire space of 64 bit so for the
250             // selects, we want to test positive and negative values
251             int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
252             int64_t sign = 1;
253             for (i = 0; i < countIn; ++i, ++start)
254             {
255                 ll[i] = start * sign;
256                 sign = sign * -1;
257             }
258             break;
259         }
260         default: {
261             log_error("invalid type size %x\n", (int)typeSize);
262         }
263     }
264     // pBufferStruct->m_bufSizeIn
265     // pBufferStruct->m_bufSizeOut
266 }
267 
pushArgs(bufferStruct * pBufferStruct,clState * pClState)268 int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
269 {
270     int err;
271     if (!pBufferStruct->m_bufferUploaded)
272     {
273         err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
274                                    CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
275                                    pBufferStruct->m_pIn, 0, NULL, NULL);
276 #if DEBUG_MEM_ALLOC
277         log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
278 #endif
279         if (err != CL_SUCCESS)
280         {
281             log_error("clEnqueueWriteBuffer failed\n");
282             return -1;
283         }
284         pBufferStruct->m_bufferUploaded = true;
285     }
286 
287     err = clSetKernelArg(
288         pClState->m_kernel, 0,
289         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
290         &(pBufferStruct->m_inBuffer));
291 #if DEBUG_MEM_ALLOC
292     // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
293 #endif
294     if (err != CL_SUCCESS)
295     {
296         log_error("clSetKernelArgs failed, first arg (0)\n");
297         return -1;
298     }
299 
300     err = clSetKernelArg(
301         pClState->m_kernel, 1,
302         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
303         &(pBufferStruct->m_outBuffer));
304     if (err != CL_SUCCESS)
305     {
306         log_error("clSetKernelArgs failed, second arg (1)\n");
307         return -1;
308     }
309 
310 #if DEBUG_MEM_ALLOC
311     // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
312 #endif
313 
314     return 0;
315 }
316 
retrieveResults(bufferStruct * pBufferStruct,clState * pClState)317 int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
318 {
319     int err;
320     err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
321                               CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
322                               pBufferStruct->m_pOut, 0, NULL, NULL);
323     if (err != CL_SUCCESS)
324     {
325         log_error("clEnqueueReadBuffer failed\n");
326         return -1;
327     }
328     return 0;
329 }
330 
331 // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
332 // and g_arrVecSizes
checkCorrectnessAlign(bufferStruct * pBufferStruct,clState * pClState,size_t minAlign)333 int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
334                           size_t minAlign)
335 {
336     size_t i;
337     cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
338     for (i = 0; i < pClState->m_numThreads; ++i)
339     {
340         if ((targetArr[i]) % minAlign != (cl_uint)0)
341         {
342             vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
343                        i, pClState->m_numThreads, minAlign, targetArr[i]);
344             return -1;
345         }
346     }
347 
348     /*    log_info("\n");
349      for(i = 0; i < 4; ++i) {
350      log_info("%lx, ", targetArr[i]);
351      }
352      log_info("\n");
353      fflush(stdout); */
354     return 0;
355 }
356 
checkCorrectnessStep(bufferStruct * pBufferStruct,clState * pClState,size_t typeSize,size_t vecWidth)357 int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
358                          size_t typeSize, size_t vecWidth)
359 {
360     size_t i;
361     cl_int targetSize = (cl_int)vecWidth;
362     cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
363     if (targetSize == 3)
364     {
365         targetSize = 4; // hack for 4-aligned vec3 types
366     }
367     for (i = 0; i < pClState->m_numThreads; ++i)
368     {
369         if (targetArr[i] != targetSize)
370         {
371             vlog_error("Error %ld (of %ld).  Expected %d, got %d\n", i,
372                        pClState->m_numThreads, targetSize, targetArr[i]);
373             return -1;
374         }
375     }
376     return 0;
377 }
378 
379 // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
380 // and g_arrVecSizes
checkPackedCorrectness(bufferStruct * pBufferStruct,clState * pClState,size_t totSize,size_t beforeSize)381 int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
382                            size_t totSize, size_t beforeSize)
383 {
384     size_t i;
385     cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
386     for (i = 0; i < pClState->m_numThreads; ++i)
387     {
388         if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
389         {
390             vlog_error("Error %d (of %d).  Expected %d more than a multple of "
391                        "%d, got %d \n",
392                        i, pClState->m_numThreads, beforeSize, totSize,
393                        targetArr[i]);
394             return -1;
395         }
396     }
397 
398     /*    log_info("\n");
399      for(i = 0; i < 4; ++i) {
400      log_info("%lx, ", targetArr[i]);
401      }
402      log_info("\n");
403      fflush(stdout); */
404     return 0;
405 }
406