/external/tensorflow/tensorflow/stream_executor/ |
D | stream.h | 61 class DeviceMemory; variable 222 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, 223 const DeviceMemory<float> &offset, 224 const DeviceMemory<float> &estimated_mean, 225 const DeviceMemory<float> &estimated_variance, 228 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean, 229 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean, 230 DeviceMemory<float> *saved_inv_var, bool is_training, 231 std::function<const DeviceMemory<float> &()> var_to_inv_var, 235 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, [all …]
|
D | blas.h | 59 class DeviceMemory; variable 179 const DeviceMemory<float> &x, int incx, 180 DeviceMemory<float> *result) = 0; 182 const DeviceMemory<double> &x, int incx, 183 DeviceMemory<double> *result) = 0; 185 const DeviceMemory<std::complex<float>> &x, int incx, 186 DeviceMemory<float> *result) = 0; 188 const DeviceMemory<std::complex<double>> &x, int incx, 189 DeviceMemory<double> *result) = 0; 193 const DeviceMemory<float> &x, int incx, [all …]
|
D | dnn.h | 920 Stream* stream, const DeviceMemory<float>& x, 921 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, 922 const DeviceMemory<float>& estimated_mean, 923 const DeviceMemory<float>& estimated_variance, 926 DeviceMemory<float>* y, DeviceMemory<float>* batch_mean, 927 DeviceMemory<float>* batch_var, DeviceMemory<float>* reserve_space_1, 928 DeviceMemory<float>* reserve_space_2, bool is_training, 929 std::function<const DeviceMemory<float>&()> var_to_inv_var, 937 Stream* stream, const DeviceMemory<Eigen::half>& x, 938 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, [all …]
|
D | stream.cc | 315 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, in ThenBatchNormalizationForward() 316 const DeviceMemory<float> &offset, in ThenBatchNormalizationForward() 317 const DeviceMemory<float> &estimated_mean, in ThenBatchNormalizationForward() 318 const DeviceMemory<float> &estimated_variance, in ThenBatchNormalizationForward() 321 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean, in ThenBatchNormalizationForward() 322 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean, in ThenBatchNormalizationForward() 323 DeviceMemory<float> *saved_inv_var, bool is_training, in ThenBatchNormalizationForward() 324 std::function<const DeviceMemory<float> &()> var_to_inv_var, in ThenBatchNormalizationForward() 343 const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x, in ThenBatchNormalizationBackward() 344 const DeviceMemory<float> &scale, const DeviceMemory<float> &mean, in ThenBatchNormalizationBackward() [all …]
|
D | fft.h | 56 class DeviceMemory; variable 182 const DeviceMemory<std::complex<float>> &input, 183 DeviceMemory<std::complex<float>> *output) = 0; 185 const DeviceMemory<std::complex<double>> &input, 186 DeviceMemory<std::complex<double>> *output) = 0; 190 const DeviceMemory<float> &input, 191 DeviceMemory<std::complex<float>> *output) = 0; 193 const DeviceMemory<double> &input, 194 DeviceMemory<std::complex<double>> *output) = 0; 198 const DeviceMemory<std::complex<float>> &input, [all …]
|
D | device_memory.h | 110 class DeviceMemory final : public DeviceMemoryBase { 113 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory() function 114 DeviceMemory(std::nullptr_t) : DeviceMemory() {} in DeviceMemory() function 118 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory() function 132 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes) { in MakeFromByteSize() 133 return DeviceMemory<ElemT>(opaque, bytes); in MakeFromByteSize() 154 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory() function 223 const DeviceMemory<ElemT> &cref() const { return wrapped_; } in cref() 228 DeviceMemory<ElemT> *ptr() { return &wrapped_; } in ptr() 229 const DeviceMemory<ElemT> *ptr() const { return &wrapped_; } in ptr() [all …]
|
D | stream_executor_pimpl.h | 113 DeviceMemory<T> AllocateArray(uint64 element_count); 124 DeviceMemory<T> AllocateScalar() { in AllocateScalar() 137 DeviceMemory<T> AllocateZeroed(); 154 DeviceMemory<T> AllocateSubBuffer(DeviceMemory<T> *parent, 160 ScopedDeviceMemory<T> AllocateOwnedSubBuffer(DeviceMemory<T> *parent, in AllocateOwnedSubBuffer() 174 port::StatusOr<DeviceMemory<T>> GetSymbol(const string &symbol_name); 263 port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src, in SynchronousMemcpyD2H() 668 inline DeviceMemory<T> StreamExecutor::AllocateArray(uint64 element_count) { in AllocateArray() 671 return DeviceMemory<T>::MakeFromByteSize(opaque, bytes); in AllocateArray() 675 inline port::StatusOr<DeviceMemory<T>> StreamExecutor::GetSymbol( in GetSymbol() [all …]
|
D | rng.h | 30 class DeviceMemory; variable 55 DeviceMemory<float> *v) = 0; 57 DeviceMemory<double> *v) = 0; 59 DeviceMemory<std::complex<float>> *v) = 0; 61 DeviceMemory<std::complex<double>> *v) = 0; 66 DeviceMemory<float> *v) { in DoPopulateRandGaussian() 72 double stddev, DeviceMemory<double> *v) { in DoPopulateRandGaussian()
|
D | temporary_device_memory.h | 113 DeviceMemory<T>* mutable_device_memory() { in mutable_device_memory() 115 return reinterpret_cast<DeviceMemory<T>*>( in mutable_device_memory() 120 const DeviceMemory<T>& device_memory() const { in device_memory() 122 return reinterpret_cast<const DeviceMemory<T>&>( in device_memory()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.h | 67 const DeviceMemory<Eigen::half>& input_data, 69 const DeviceMemory<Eigen::half>& input_h_data, 71 const DeviceMemory<Eigen::half>& input_c_data, 72 const DeviceMemory<Eigen::half>& params, 74 DeviceMemory<Eigen::half>* output_data, 76 DeviceMemory<Eigen::half>* output_h_data, 78 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 84 const DeviceMemory<float>& input_data, 86 const DeviceMemory<float>& input_h_data, 88 const DeviceMemory<float>& input_c_data, [all …]
|
D | cuda_blas.cc | 642 const DeviceMemory<float> &x, int incx, in DoBlasAsum() 643 DeviceMemory<float> *result) { in DoBlasAsum() 650 const DeviceMemory<double> &x, int incx, in DoBlasAsum() 651 DeviceMemory<double> *result) { in DoBlasAsum() 658 const DeviceMemory<std::complex<float>> &x, int incx, in DoBlasAsum() 659 DeviceMemory<float> *result) { in DoBlasAsum() 666 const DeviceMemory<std::complex<double>> &x, int incx, in DoBlasAsum() 667 DeviceMemory<double> *result) { in DoBlasAsum() 674 const DeviceMemory<float> &x, int incx, in DoBlasAxpy() 675 DeviceMemory<float> *y, int incy) { in DoBlasAxpy() [all …]
|
D | cuda_dnn.cc | 990 DeviceMemory<uint8> state_memory; in CudnnDropoutDescriptor() 1426 const DeviceMemory<T>& input_data, in ExtractAndCheckRnnForward() 1428 const DeviceMemory<T>& input_h_data, in ExtractAndCheckRnnForward() 1430 const DeviceMemory<T>& input_c_data, const DeviceMemory<T>& params, in ExtractAndCheckRnnForward() 1432 const DeviceMemory<T>& output_data, in ExtractAndCheckRnnForward() 1434 const DeviceMemory<T>& output_h_data, in ExtractAndCheckRnnForward() 1436 const DeviceMemory<T>& output_c_data, RnnModelDims* model_dims) { in ExtractAndCheckRnnForward() 1504 DeviceMemory<uint8>* workspace) { in CreateRnnWorkspace() 1525 *workspace = DeviceMemory<uint8>(); in CreateRnnWorkspace() 1536 const DeviceMemory<T>& input_data, in DoRnnForwardImpl() [all …]
|
D | cuda_blas.h | 114 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda, 115 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, T beta, 116 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc, 128 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a, 129 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta, 130 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type, 138 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 139 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 140 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); 146 const DeviceMemory<T> &a, int lda, [all …]
|
D | cuda_rng.h | 32 class DeviceMemory; variable 58 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<float> *v) override; 59 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<double> *v) override; 61 DeviceMemory<std::complex<float>> *v) override; 63 DeviceMemory<std::complex<double>> *v) override; 65 DeviceMemory<float> *v) override; 67 DeviceMemory<double> *v) override; 75 bool DoPopulateRandUniformInternal(Stream *stream, DeviceMemory<T> *v); 78 DeviceMemory<ElemT> *v, FuncT func);
|
D | cuda_rng.cc | 153 DeviceMemory<T> *v) { in DoPopulateRandUniformInternal() 190 bool CUDARng::DoPopulateRandUniform(Stream *stream, DeviceMemory<float> *v) { in DoPopulateRandUniform() 194 bool CUDARng::DoPopulateRandUniform(Stream *stream, DeviceMemory<double> *v) { in DoPopulateRandUniform() 199 DeviceMemory<std::complex<float>> *v) { in DoPopulateRandUniform() 204 DeviceMemory<std::complex<double>> *v) { in DoPopulateRandUniform() 211 DeviceMemory<ElemT> *v, in DoPopulateRandGaussianInternal() 233 DeviceMemory<float> *v) { in DoPopulateRandGaussian() 239 DeviceMemory<double> *v) { in DoPopulateRandGaussian()
|
D | cuda_fft.h | 90 DeviceMemory<uint8> scratch_; 122 const DeviceMemory<InputT> &input, 123 DeviceMemory<OutputT> *output); 129 const DeviceMemory<InputT> &input, 130 DeviceMemory<OutputT> *output);
|
D | cuda_fft.cc | 465 const DeviceMemory<InputT> &input, in DoFftInternal() 466 DeviceMemory<OutputT> *output) { in DoFftInternal() 492 const DeviceMemory<InputT> &input, in DoFftWithDirectionInternal() 493 DeviceMemory<OutputT> *output) { in DoFftWithDirectionInternal() 520 const DeviceMemory<std::complex<__type>> &input, \ 521 DeviceMemory<std::complex<__type>> *output) { \ 526 const DeviceMemory<__type> &input, \ 527 DeviceMemory<std::complex<__type>> *output) { \ 532 const DeviceMemory<std::complex<__type>> &input, \ 533 DeviceMemory<__type> *output) { \
|
D | cuda_helpers.h | 34 class DeviceMemory; variable 42 const T *CUDAMemory(const DeviceMemory<T> &mem) { in CUDAMemory() 49 T *CUDAMemoryMutable(DeviceMemory<T> *mem) { in CUDAMemoryMutable()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | cudnn_batchnorm_thunk.cc | 109 se::DeviceMemory<float> output(buffer_allocations.GetDeviceAddress(output_)); in ExecuteOnStream() 111 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)), in ExecuteOnStream() 112 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)), in ExecuteOnStream() 113 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)), in ExecuteOnStream() 114 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)), in ExecuteOnStream() 115 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(variance_)), in ExecuteOnStream() 171 se::DeviceMemory<float> output_data( in ExecuteOnStream() 173 se::DeviceMemory<float> output_mean( in ExecuteOnStream() 175 se::DeviceMemory<float> output_inv_stddev( in ExecuteOnStream() 178 se::DeviceMemory<float> null_device_ptr(nullptr); in ExecuteOnStream() [all …]
|
D | cudnn_convolution_runner.cc | 27 using se::DeviceMemory; 52 se::port::StatusOr<DeviceMemory<uint8>> AllocateBytes( in AllocateBytes() 65 return se::DeviceMemory<uint8>(scratch_); in AllocateBytes() 76 const Shape& output_shape, DeviceMemory<T> input_buf, in RunCudnnConvolution() 77 DeviceMemory<T> filter_buf, DeviceMemory<T> output_buf, in RunCudnnConvolution() 249 se::DeviceMemory<float>(input_buf), se::DeviceMemory<float>(filter_buf), in RunCudnnConvolution() 250 se::DeviceMemory<float>(output_buf), scratch_allocator, window, dnums, in RunCudnnConvolution() 254 se::DeviceMemory<Eigen::half>(input_buf), in RunCudnnConvolution() 255 se::DeviceMemory<Eigen::half>(filter_buf), in RunCudnnConvolution() 256 se::DeviceMemory<Eigen::half>(output_buf), in RunCudnnConvolution()
|
D | fft_thunk.cc | 52 se::port::StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes( in AllocateBytes() 74 return se::DeviceMemory<uint8>(allocated_buffer); in AllocateBytes() 175 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 177 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 184 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 186 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 200 se::DeviceMemory<float> input_data( in ExecuteOnStream() 202 se::DeviceMemory<complex64> output_data( in ExecuteOnStream() 209 se::DeviceMemory<complex64> input_data( in ExecuteOnStream() 211 se::DeviceMemory<float> output_data( in ExecuteOnStream()
|
D | gemm_thunk.cc | 55 se::DeviceMemory<Element> lhs_data(lhs_matrix.data); in DoGemm() 56 se::DeviceMemory<Element> rhs_data(rhs_matrix.data); in DoGemm() 57 se::DeviceMemory<Element> output_data(output_matrix.data); in DoGemm() 98 se::DeviceMemory<Element> lhs_data(lhs_matrix.data); in DoGemmWithAlgorithm() 99 se::DeviceMemory<Element> rhs_data(rhs_matrix.data); in DoGemmWithAlgorithm() 100 se::DeviceMemory<Element> output_data(output_matrix.data); in DoGemmWithAlgorithm()
|
D | convolution_thunk.h | 76 perftools::gputools::DeviceMemory<float> input_data, 78 perftools::gputools::DeviceMemory<float> filter_data, 80 perftools::gputools::DeviceMemory<float> output_data,
|
/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
D | gpu_debug_allocator_test.cc | 46 gpu::DeviceMemory<int64> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; in TEST() 71 gpu::DeviceMemory<int64> gpu_array_ptr{ in TEST() 76 gpu::DeviceMemory<int64> gpu_hdr_ptr{ in TEST() 104 gpu::DeviceMemory<int64> gpu_array_ptr{ in TEST() 110 gpu::DeviceMemory<int64> gpu_ftr_ptr{ in TEST() 134 gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; in TEST() 177 gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; in TEST()
|
/external/tensorflow/tensorflow/contrib/cudnn_rnn/kernels/ |
D | cudnn_rnn_ops.cc | 104 using perftools::gputools::DeviceMemory; 178 const DeviceMemory<T> AsDeviceMemory(const Tensor* tensor) { in AsDeviceMemory() 179 return DeviceMemory<T>::MakeFromByteSize( in AsDeviceMemory() 185 DeviceMemory<T> AsDeviceMemory(Tensor* tensor) { in AsDeviceMemory() 186 return DeviceMemory<T>::MakeFromByteSize( in AsDeviceMemory() 192 DeviceMemory<U> CastDeviceMemory(Tensor* tensor) { in CastDeviceMemory() 193 return DeviceMemory<U>::MakeFromByteSize( in CastDeviceMemory() 240 StatusOr<DeviceMemory<uint8>> AllocateBytes( in AllocateBytes() 252 return StatusOr<DeviceMemory<uint8>>( in AllocateBytes() 276 StatusOr<DeviceMemory<uint8>> AllocateBytes( in AllocateBytes() [all …]
|