ldg (reference) in projects: external - OpenGrok search results

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched refs:ldg (Results 1 – 25 of 65) sorted by relevance

12 3

/external/tensorflow/tensorflow/core/kernels/
D	compare_and_bitpack_op_gpu.cu.cc	45 const T thresh = ldg(threshold); in CompareAndBitpackKernel() 49 ((((ldg(block) > thresh) << 7)) \| (((ldg(block + 1) > thresh) << 6)) \| in CompareAndBitpackKernel() 50 (((ldg(block + 2) > thresh) << 5)) \| in CompareAndBitpackKernel() 51 (((ldg(block + 3) > thresh) << 4)) \| in CompareAndBitpackKernel() 52 (((ldg(block + 4) > thresh) << 3)) \| in CompareAndBitpackKernel() 53 (((ldg(block + 5) > thresh) << 2)) \| in CompareAndBitpackKernel() 54 (((ldg(block + 6) > thresh) << 1)) \| (((ldg(block + 7) > thresh)))); in CompareAndBitpackKernel() 66 const int64 block = ldg(reinterpret_cast<const int64>(input + 8 i)); in CompareAndBitpackKernel() 83 const float thresh = ldg(threshold); in CompareAndBitpackKernel() 85 const float4 block0 = ldg(reinterpret_cast<const float4>(input + 8 i)); in CompareAndBitpackKernel() [all …]
D	softmax_op_gpu.cu.cc	88 max_val[i] = strict_cast<U>(ldg(max_logits + row)); in GenerateNormalizedProb() 99 result[i] = input[i] - max_val[i] - log(ldg(sum_probs + row)); in GenerateNormalizedProb() 101 result[i] = exp(input[i] - max_val[i]) / ldg(sum_probs + row); in GenerateNormalizedProb() 134 max_val[i] = strict_cast<float>(ldg(max_logits + row[i])); in GenerateNormalizedProb() 136 result[i] = input[i] - max_val[i] - log(ldg(sum_probs + row[i])); in GenerateNormalizedProb() 138 result[i] = exp(input[i] - max_val[i]) / ldg(sum_probs + row[i]); in GenerateNormalizedProb() 150 max_val[i] = strict_cast<float>(ldg(max_logits + row[i])); in GenerateNormalizedProb() 152 result[i] = input[i] - max_val[i] - log(ldg(sum_probs + row[i])); in GenerateNormalizedProb() 154 result[i] = exp(input[i] - max_val[i]) / ldg(sum_probs + row[i]); in GenerateNormalizedProb() 172 strict_cast<U>(logits_[gid] - ldg(max_logits_ + gid / num_cols_)); in operator ()()
D	population_count_op_gpu.cu.cc	`39 GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(input + i)); } in PopulationCountKernel() 48 output[i] = __popc(ldg(reinterpret_cast<const uint8>(input + i))); in PopulationCountKernel() 58 output[i] = __popc(ldg(reinterpret_cast<const uint16>(input + i))); in PopulationCountKernel() 66 GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popcll(ldg(input + i)); } in PopulationCountKernel()`
D	sparse_tensor_dense_matmul_op_gpu.cu.cc	`40 const int i = ldg(a_indices + 2 * a_ix + ((ADJ_A) ? 1 : 0)); in SparseTensorDenseMatMulKernel() 41 const int k = ldg(a_indices + 2 * a_ix + ((ADJ_A) ? 0 : 1)); in SparseTensorDenseMatMulKernel() 53 const T a_value = ldg(a_values + a_ix); in SparseTensorDenseMatMulKernel() 56 const T b_value = ldg(b + ((ADJ_B) ? j * b_cols + k : k * b_cols + j)); in SparseTensorDenseMatMulKernel()`
D	bias_op_gpu.cu.cc	59 output[index] = ldg(input + index) + ldg(bias + bias_offset); in BiasNHWCKernel() 71 output[index] = ldg(input + index) + ldg(bias + bias_offset); in BiasNCHWKernel() 109 GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); in BiasGradNHWC_Naive() 122 GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); in BiasGradNCHW_Naive() 141 GpuAtomicAdd(s_data + bias_offset, AccT(ldg(output_backprop + index))); in BiasGradNHWC_SharedAtomics() 173 T val = ldg(output_backprop + in BiasGradNCHW_SharedAtomics()
D	depthwise_conv_op_gpu.h	144 sum += static_cast<S>(ldg(input + input_offset)) * 145 static_cast<S>(ldg(filter + filter_offset)); 167 sum += static_cast<S>(ldg(input + input_offset)) * 168 static_cast<S>(ldg(filter + filter_offset)); 281 tile_ptr[0] = static_cast<S>(ldg(in_ptr)); 283 tile_ptr[tile_offset] = static_cast<S>(ldg(tensor_offset + in_ptr)); 288 static_cast<S>(ldg(filter_offset + filter)); 429 sum += static_cast<S>(ldg(input + input_offset)) * 430 static_cast<S>(ldg(filter + filter_offset)); 457 sum += static_cast<S>(ldg(input + input_offset)) * [all …]
D	bincount_op_gpu.cu.cc	`108 Tidx bin = ldg(in + index); in BincountReduceKernel() 139 Tidx bin = ldg(in + index); in BincountColReduceKernel() 146 T value = (weights_size == 0) ? T(1) : ldg(weights + index); in BincountColReduceKernel() 169 Tidx bin = ldg(in + index); in BincountColReduceSharedKernel() 176 T value = (weights_size == 0) ? T(1) : ldg(weights + index); in BincountColReduceSharedKernel()`
D	in_topk_op_gpu.cu.cc	`46 TargetT target_idx = ldg(targets + batch_index); in ComputePredictionMaskKernel() 53 T prediction = ldg(predictions + i); in ComputePredictionMaskKernel() 55 ldg(predictions + batch_index * num_classes + target_idx); in ComputePredictionMaskKernel()`
D	inplace_ops_functor_gpu.cu.cc	`39 p = ldg(q); in DoParallelConcatOpKernel() 96 p = ldg(q); in DoInplaceOpKernel() 99 p += ldg(q); in DoInplaceOpKernel() 102 p -= ldg(q); in DoInplaceOpKernel()`
D	gather_functor_batched_gpu.cu.h	`67 Index gather_i = ldg(indices + batch_i * indices_size + indices_i); in GatherOpKernel() 80 out[i] = ldg(params + params_i); in GatherOpKernel()`
D	gather_functor_gpu.cu.h	`56 Index gather_i = ldg(indices + indices_i); in GatherOpKernel() 69 out[i] = ldg(params + params_i); in GatherOpKernel()`
D	depthtospace_op_gpu.cu.cc	`57 (output_ptr + out_idx) = ldg(input_ptr + inp_idx); in D2S_NHWC() 96 (output_ptr + output_idx) = ldg(input_ptr + input_idx); in D2S_NCHW() 134 output_ptr[bY * output_width + bX] = ldg( in D2S_NCHW_LOOP()`
D	spacetodepth_op_gpu.cu.cc	`57 (output_ptr + out_idx) = ldg(input_ptr + inp_idx); in S2D_NHWC() 95 (output_ptr + output_idx) = ldg(input_ptr + input_idx); in S2D_NCHW() 135 ldg(input_ptr + bY * input_width + bX); in S2D_NCHW_LOOP()`
D	gather_nd_op_gpu.cu.cc	`44 const Index index_j = ldg(indices_i + j); in GatherSliceOpKernel() 61 out[i] = (out_of_bounds) ? T(0) : ldg(params + offset + loc_offset); in GatherSliceOpKernel()`
D	multinomial_op_gpu.cu.cc	`49 if (ldg(maxima + maxima_idx) == ldg(scores + index)) { in MultinomialKernel()`
D	spacetobatch_functor_gpu.cu.cc	`89 ldg(space_tensor_ptr + space_tensor_idx); in S2B() 92 ldg(batch_tensor_ptr + batch_tensor_idx); in S2B()`
/external/tensorflow/tensorflow/core/kernels/sparse/
D	kernels_gpu.cu.cc	44 return static_cast<int>(ldg(begin_ + idx * stride_)); in operator ()() 141 coo_rows_out[i] = static_cast<int>(ldg(indices + i * stride + offset)); in SparseTensorToCOOMatrixKernel() 142 coo_cols_out[i] = static_cast<int>(ldg(indices + i * stride + offset + 1)); in SparseTensorToCOOMatrixKernel() 173 indices_out[i * 2] = static_cast<int64>(ldg(coo_rows + i)); in COOMatrixToSparseTensorKernel2D() 174 indices_out[i * 2 + 1] = static_cast<int64>(ldg(coo_cols + i)); in COOMatrixToSparseTensorKernel2D() 211 indices_out[i * 3 + 1] = static_cast<int64>(ldg(coo_rows + i)); in COOMatrixToSparseTensorKernel3D() 212 indices_out[i * 3 + 2] = static_cast<int64>(ldg(coo_cols + i)); in COOMatrixToSparseTensorKernel3D() 279 c_values[i] = ldg(a_values + i) * local_batch_values[b]; in CSRSparseMatrixBatchMulVecKernel3D() 356 row_max = Eigen::numext::maxi(row_max, ldg(logits + r_i)); in CalculateRowSoftmax() 360 const T exp_i = Eigen::numext::exp(ldg(logits + r_i) - row_max); in CalculateRowSoftmax() [all …]
/external/llvm-project/llvm/test/CodeGen/AArch64/
D	arm64-mte.ll	`173 %1 = tail call i8* @llvm.aarch64.ldg(i8* %0, i8* %0) 176 ; CHECK: ldg x0, [x0] 184 %2 = tail call i8* @llvm.aarch64.ldg(i8* %0, i8* %1) 187 ; CHECK: ldg x0, [x1] 196 %1 = call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) 201 ; CHECK: ldg [[T0]], [sp] 211 %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %0) 216 ; CHECK: ldg x0, [sp] 225 %1 = tail call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) 229 ; CHECK: ldg [[T0]], [x0, #16] [all …]`
/external/llvm/test/CodeGen/NVPTX/
D	ldu-ldg.ll	`6 declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) 7 declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) 27 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) 34 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)`
D	bug26185-2.ll	`3 ; Verify that we correctly emit code for extending ldg/ldu. We do not expose 4 ; extending variants in the backend, but the ldg/ldu selection code may pick`
/external/llvm-project/llvm/test/CodeGen/NVPTX/
D	ldu-ldg.ll	`6 declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) 7 declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) 27 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) 34 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)`
D	bug26185-2.ll	`3 ; Verify that we correctly emit code for extending ldg/ldu. We do not expose 4 ; extending variants in the backend, but the ldg/ldu selection code may pick`
/external/llvm-project/llvm/test/MC/AArch64/
D	armv8.5a-mte-error.s	`878 ldg sp, [x0, #0] label 879 ldg x0, [x0, x0] label 880 ldg x0, [x0, #4096] label 881 ldg x0, [x0, #-4112] label 882 ldg #1, [x0, #255] label 883 ldg x0, [#1, #255] label 898 ldg label 899 ldg x0 label 900 ldg x0, [#0] label 901 ldg w0, [x1] label [all …]`
D	armv8.5a-mte.s	`573 ldg X0, [X1, #0] label 574 ldg X2, [sp, #-4096] label 575 ldg x3, [x4, #4080] label`
/external/tensorflow/tensorflow/core/kernels/image/
D	resize_nearest_neighbor_op_gpu.cu.cc	`61 top_data[index] = ldg(bottom_data_n + idx); in ResizeNearestNeighborNHWC() 90 top_data[index] = ldg(bottom_data_n + idx); in LegacyResizeNearestNeighborNHWC() 121 GpuAtomicAdd(bottom_diff_n + idx, ldg(top_diff + index)); in ResizeNearestNeighborBackwardNHWC() 150 GpuAtomicAdd(bottom_diff_n + idx, ldg(top_diff + index)); in LegacyResizeNearestNeighborBackwardNHWC()`

12 3

art
bionic
bootable
build
compatibility
cts
dalvik
developers
development
device
external
frameworks
hardware
kernel
libcore
libnativehelper
packages
pdk
platform_testing
sdk
system
test
toolchain
tools