// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "testBase.h" #include "harness/conversions.h" const char * atomic_index_source = "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "// Counter keeps track of which index in counts we are using.\n" "// We get that value, increment it, and then set that index in counts to our thread ID.\n" "// At the end of this we should have all thread IDs in some random location in counts\n" "// exactly once. If atom_add failed then we will write over various thread IDs and we\n" "// will be missing some.\n" "\n" "__kernel void add_index_test(__global int *counter, __global int *counts) {\n" " int tid = get_global_id(0);\n" " \n" " int counter_to_use = atom_add(counter, 1);\n" " counts[counter_to_use] = tid;\n" "}"; int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { clProgramWrapper program; clKernelWrapper kernel; clMemWrapper counter, counters; size_t numGlobalThreads, numLocalThreads; int fail = 0, succeed = 0, err; /* Check if atomics are supported. */ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) { log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n"); return 0; } //===== add_index test // The index test replicates what particles does. // It uses one memory location to keep track of the current index and then each thread // does an atomic add to it to get its new location. The threads then write to their // assigned location. At the end we check to make sure that each thread's ID shows up // exactly once in the output. numGlobalThreads = 2048; if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) ) return -1; if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) ) return -1; log_info("Execute global_threads:%d local_threads:%d\n", (int)numGlobalThreads, (int)numLocalThreads); // Create the counter that will keep track of where each thread writes. counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1, NULL, NULL); // Create the counters that will hold the results of each thread writing // its ID into a (hopefully) unique location. counters = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * numGlobalThreads, NULL, NULL); // Reset all those locations to -1 to indciate they have not been used. cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads); if (values == NULL) { log_error("add_index_test FAILED to allocate memory for initial values.\n"); fail = 1; succeed = -1; } else { memset(values, -1, numLocalThreads); unsigned int i=0; for (i=0; i= max_counts_per_bin) { bin = random_in_range(0, number_of_bins-1, d); } if (bin >= number_of_bins) log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins); if (l_bin_counts[bin]+1 > max_counts_per_bin) log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin); l_bin_counts[bin]++; l_bin_assignments[i] = bin; // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]); } err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err); return -1; } // Setup the kernel err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters); err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins); err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments); err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin); if (err) { log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err); fail=1; succeed=-1; return -1; } err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL ); if (err) { log_error("add_index_bin_test FAILED to execute kernel: %d\n", err); fail=1; succeed=-1; } cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin); if (!final_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n"); return -1; } err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL ); if (err) { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); fail = 1; succeed=-1; } cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins); if (!final_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n"); return -1; } err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL ); if (err) { log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err); fail = 1; succeed=-1; } // Verification. int errors=0; int current_bin; int search; // Print out all the contents of the bins. // for (current_bin=0; current_bin