/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | launch_dimensions.cc | 37 static int64 ThreadsPerBlockLimit(GpuDeviceInfo gpu_device_info) { in ThreadsPerBlockLimit() argument 38 int64 threads_per_block = gpu_device_info.threads_per_block_limit; in ThreadsPerBlockLimit() 47 threads_per_block = gpu_device_info.threads_per_warp; in ThreadsPerBlockLimit() 58 GpuDeviceInfo gpu_device_info, in CalculateLaunchDimensions() argument 79 int64 threads_per_block = ThreadsPerBlockLimit(gpu_device_info); in CalculateLaunchDimensions() 96 gpu_device_info.core_count * in CalculateLaunchDimensions() 97 (gpu_device_info.threads_per_core_limit / capped_threads_per_block); in CalculateLaunchDimensions()
|
D | buffer_comparator.cc | 608 GpuDeviceInfo gpu_device_info; in DeviceCompare() local 609 gpu_device_info.threads_per_block_limit = in DeviceCompare() 611 gpu_device_info.threads_per_warp = in DeviceCompare() 613 gpu_device_info.shared_memory_per_block = in DeviceCompare() 615 gpu_device_info.threads_per_core_limit = in DeviceCompare() 617 gpu_device_info.core_count = executor->GetDeviceDescription().core_count(); in DeviceCompare() 619 CalculateLaunchDimensions(buffer_shape, gpu_device_info); in DeviceCompare()
|
D | ir_emitter_context.h | 42 std::string platform_name, GpuDeviceInfo gpu_device_info, in IrEmitterContext() argument 49 gpu_device_info_(gpu_device_info), in IrEmitterContext() 64 GpuDeviceInfo gpu_device_info() const { return gpu_device_info_; } in gpu_device_info() function
|
D | gpu_compiler.cc | 551 const std::string& platform_name, GpuDeviceInfo gpu_device_info, in CompileModuleToLlvmIrImpl() argument 597 hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, in CompileModuleToLlvmIrImpl() 867 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in RunBackend() local 906 stream_exec->platform()->Name(), gpu_device_info, cuda_compute_capability, in RunBackend() 964 GpuDeviceInfo gpu_device_info; in GetGpuDeviceInfo() local 965 gpu_device_info.threads_per_block_limit = in GetGpuDeviceInfo() 967 gpu_device_info.threads_per_warp = in GetGpuDeviceInfo() 969 gpu_device_info.shared_memory_per_block = in GetGpuDeviceInfo() 971 gpu_device_info.threads_per_core_limit = in GetGpuDeviceInfo() 973 gpu_device_info.core_count = stream_exec->GetDeviceDescription().core_count(); in GetGpuDeviceInfo() [all …]
|
D | launch_dimensions.h | 69 GpuDeviceInfo gpu_device_info,
|
D | gpu_compiler.h | 156 const std::string& platform_name, GpuDeviceInfo gpu_device_info,
|
D | ir_emitter_unnested.cc | 925 input_shape, ir_emitter_context_->gpu_device_info(), unroll_factor); in EmitPadToStaticFromMlir() 1048 input_shape, ir_emitter_context_->gpu_device_info(), unroll_factor); in EmitSliceToDynamicFromMlir() 1842 element_shape, ir_emitter_context_->gpu_device_info(), unroll_factor, in EmitLoopFusionFromMlir() 1919 element_shape, ir_emitter_context_->gpu_device_info(), in HandleFusion() 2043 update_shape, ir_emitter_context_->gpu_device_info()); in HandleFusion() 2239 source_shape, ir_emitter_context_->gpu_device_info()); in EmitSelectAndScatterFromMlir() 2723 desc.updates_shape, ir_emitter_context_->gpu_device_info()); in EmitScatter() 2932 standard_iteration_shape, ir_emitter_context_->gpu_device_info()); in EmitSortFromMlir() 2964 ir_emitter_context_->gpu_device_info().threads_per_block_limit || in EmitSortFromMlir() 2966 ir_emitter_context_->gpu_device_info().shared_memory_per_block; in EmitSortFromMlir() [all …]
|
D | BUILD | 110 ":gpu_device_info", 230 name = "gpu_device_info", 231 hdrs = ["gpu_device_info.h"], 679 ":gpu_device_info", 1246 ":gpu_device_info",
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/ |
D | hlo_to_llvm_ir.cc | 46 xla::gpu::GpuDeviceInfo gpu_device_info{}; in CompileAndPrintLlvmIr() local 47 gpu_device_info.threads_per_block_limit = 1024; in CompileAndPrintLlvmIr() 48 gpu_device_info.threads_per_warp = 32; in CompileAndPrintLlvmIr() 49 gpu_device_info.shared_memory_per_block = 49152; in CompileAndPrintLlvmIr() 50 gpu_device_info.core_count = 80; in CompileAndPrintLlvmIr() 51 gpu_device_info.threads_per_core_limit = 2048; in CompileAndPrintLlvmIr() 63 /*platform_name=*/"CUDA", gpu_device_info, in CompileAndPrintLlvmIr()
|
D | mlir_gpu_test_base.cc | 54 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in CompileMlirModule() local 69 backend_->platform()->Name(), gpu_device_info, cuda_compute_capability, in CompileMlirModule()
|
D | BUILD | 564 "//tensorflow/compiler/xla/service/gpu:gpu_device_info",
|
/external/tensorflow/tensorflow/compiler/jit/ |
D | xla_device.cc | 365 auto gpu_device_info = absl::make_unique<GpuDeviceInfo>(); in GetDeviceContextLocked() local 366 gpu_device_info->stream = stream_.get(); in GetDeviceContextLocked() 367 gpu_device_info->default_context = device_context_; in GetDeviceContextLocked() 368 set_tensorflow_gpu_device_info(gpu_device_info.get()); in GetDeviceContextLocked() 369 gpu_device_info_ = std::move(gpu_device_info); in GetDeviceContextLocked()
|
/external/tensorflow/tensorflow/core/common_runtime/ |
D | ring_alg.cc | 255 const DeviceBase::GpuDeviceInfo* gpu_device_info = in TensorDebugString() local 257 if (gpu_device_info) { in TensorDebugString() 259 Status st = gpu_device_info->default_context->CopyDeviceTensorToCPUSync( in TensorDebugString()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | functional_ops.cc | 404 const DeviceBase::GpuDeviceInfo* gpu_device_info = in CondResultToBool() local 408 if (!is_hostmem_dtype && gpu_device_info && in CondResultToBool()
|