Home
last modified time | relevance | path

Searched refs:warpSize (Results 1 – 17 of 17) sorted by relevance

/external/llvm-project/clang/lib/Headers/
D__clang_cuda_intrinsics.h26 int __width = warpSize) { \
28 ((warpSize - __width) << 8) | (__Mask)); \
31 int __width = warpSize) { \
33 ((warpSize - __width) << 8) | (__Mask)); \
36 int __width = warpSize) { \
41 int __width = warpSize) { \
56 int __width = warpSize) { \
68 unsigned long __val, __Type __offset, int __width = warpSize) { \
73 unsigned long long __val, __Type __offset, int __width = warpSize) { \
78 int __width = warpSize) { \
[all …]
D__clang_cuda_builtin_vars.h114 __attribute__((device)) const int warpSize = 32; variable
/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu35 out[i++] = warpSize; in kernel()
36warpSize = 0; // expected-error {{cannot assign to variable 'warpSize' with const-qualified type '… in kernel()
56 const void *wsptr = &warpSize; in kernel()
/external/llvm-project/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu35 out[i++] = warpSize; in kernel()
36warpSize = 0; // expected-error {{cannot assign to variable 'warpSize' with const-qualified type '… in kernel()
56 const void *wsptr = &warpSize; in kernel()
/external/clang/lib/Headers/
D__clang_cuda_intrinsics.h39 int __width = warpSize) { \
41 ((warpSize - __width) << 8) | (__Mask)); \
44 int __width = warpSize) { \
46 ((warpSize - __width) << 8) | (__Mask)); \
49 int __width = warpSize) { \
54 int __width = warpSize) { \
69 unsigned long long __in, int __offset, int __width = warpSize) { \
74 int __width = warpSize) { \
Dcuda_builtin_vars.h120 __attribute__((device)) const int warpSize = 32; variable
/external/tensorflow/tensorflow/core/util/
Dgpu_kernel_helper_test.cu.cc101 for (int width = warpSize; width > 1; width /= 2) { in GpuShuffleGetSrcLaneTest()
111 for (int src_lane = -warpSize; src_lane <= warpSize; ++src_lane) { in GpuShuffleGetSrcLaneTest()
121 for (unsigned delta = 0; delta <= warpSize; ++delta) { in GpuShuffleGetSrcLaneTest()
128 for (unsigned delta = 0; delta <= warpSize; ++delta) { in GpuShuffleGetSrcLaneTest()
135 for (int lane_lane = warpSize; lane_lane > 0; lane_lane /= 2) { in GpuShuffleGetSrcLaneTest()
Dgpu_kernel_helper.h171 int width = warpSize) {
178 unsigned mask, Eigen::half value, int delta, int width = warpSize) {
185 unsigned mask, Eigen::half value, int delta, int width = warpSize) {
192 unsigned mask, Eigen::half value, int lane_mask, int width = warpSize) {
Dgpu_device_functions.h346 int width = warpSize) {
361 int src_lane, int width = warpSize) {
385 int width = warpSize) {
401 int width = warpSize) {
425 int width = warpSize) {
441 int width = warpSize) {
465 int width = warpSize) {
485 int width = warpSize) {
501 int width = warpSize) {
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorReductionCuda.h170 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernel()
171 reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernel()
174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel()
246 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernelHalfFloat()
247 reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernelHalfFloat()
250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat()
428 for (int offset = warpSize/2; offset > 0; offset /= 2) {
432 if ((threadIdx.x & (warpSize - 1)) == 0) {
518 for (int offset = warpSize/2; offset > 0; offset /= 2) {
519 reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1);
[all …]
DTensorConvolution.h858 const int warpSize = 32;
883 const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
887 block_size.x = numext::mini(warpSize, maxX);
/external/llvm-project/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu25 out[i++] = warpSize; // CHECK: store i32 32, in kernel()
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu25 out[i++] = warpSize; // CHECK: store i32 32, in kernel()
/external/eigen/test/
Dcuda_common.h93 std::cout << " warpSize: " << deviceProp.warpSize << "\n"; in ei_test_init_cuda()
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc190 for (int32 delta = warpSize / 2; delta > 0; delta /= 2) { in BiasGradNCHW_SharedAtomics()
/external/eigen/Eigen/src/Core/arch/CUDA/
DHalf.h596 …e__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
/external/tensorflow/tensorflow/core/profiler/internal/gpu/
Dcupti_collector.cc579 device_properties_.warpSize = *warp_size; in GetDeviceCapabilities()