Home
last modified time | relevance | path

Searched refs:gpu_device_info (Results 1 – 14 of 14) sorted by relevance

/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dlaunch_dimensions.cc37 static int64 ThreadsPerBlockLimit(GpuDeviceInfo gpu_device_info) { in ThreadsPerBlockLimit() argument
38 int64 threads_per_block = gpu_device_info.threads_per_block_limit; in ThreadsPerBlockLimit()
47 threads_per_block = gpu_device_info.threads_per_warp; in ThreadsPerBlockLimit()
58 GpuDeviceInfo gpu_device_info, in CalculateLaunchDimensions() argument
79 int64 threads_per_block = ThreadsPerBlockLimit(gpu_device_info); in CalculateLaunchDimensions()
96 gpu_device_info.core_count * in CalculateLaunchDimensions()
97 (gpu_device_info.threads_per_core_limit / capped_threads_per_block); in CalculateLaunchDimensions()
Dbuffer_comparator.cc608 GpuDeviceInfo gpu_device_info; in DeviceCompare() local
609 gpu_device_info.threads_per_block_limit = in DeviceCompare()
611 gpu_device_info.threads_per_warp = in DeviceCompare()
613 gpu_device_info.shared_memory_per_block = in DeviceCompare()
615 gpu_device_info.threads_per_core_limit = in DeviceCompare()
617 gpu_device_info.core_count = executor->GetDeviceDescription().core_count(); in DeviceCompare()
619 CalculateLaunchDimensions(buffer_shape, gpu_device_info); in DeviceCompare()
Dir_emitter_context.h42 std::string platform_name, GpuDeviceInfo gpu_device_info, in IrEmitterContext() argument
49 gpu_device_info_(gpu_device_info), in IrEmitterContext()
64 GpuDeviceInfo gpu_device_info() const { return gpu_device_info_; } in gpu_device_info() function
Dgpu_compiler.cc551 const std::string& platform_name, GpuDeviceInfo gpu_device_info, in CompileModuleToLlvmIrImpl() argument
597 hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, in CompileModuleToLlvmIrImpl()
867 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in RunBackend() local
906 stream_exec->platform()->Name(), gpu_device_info, cuda_compute_capability, in RunBackend()
964 GpuDeviceInfo gpu_device_info; in GetGpuDeviceInfo() local
965 gpu_device_info.threads_per_block_limit = in GetGpuDeviceInfo()
967 gpu_device_info.threads_per_warp = in GetGpuDeviceInfo()
969 gpu_device_info.shared_memory_per_block = in GetGpuDeviceInfo()
971 gpu_device_info.threads_per_core_limit = in GetGpuDeviceInfo()
973 gpu_device_info.core_count = stream_exec->GetDeviceDescription().core_count(); in GetGpuDeviceInfo()
[all …]
Dlaunch_dimensions.h69 GpuDeviceInfo gpu_device_info,
Dgpu_compiler.h156 const std::string& platform_name, GpuDeviceInfo gpu_device_info,
Dir_emitter_unnested.cc925 input_shape, ir_emitter_context_->gpu_device_info(), unroll_factor); in EmitPadToStaticFromMlir()
1048 input_shape, ir_emitter_context_->gpu_device_info(), unroll_factor); in EmitSliceToDynamicFromMlir()
1842 element_shape, ir_emitter_context_->gpu_device_info(), unroll_factor, in EmitLoopFusionFromMlir()
1919 element_shape, ir_emitter_context_->gpu_device_info(), in HandleFusion()
2043 update_shape, ir_emitter_context_->gpu_device_info()); in HandleFusion()
2239 source_shape, ir_emitter_context_->gpu_device_info()); in EmitSelectAndScatterFromMlir()
2723 desc.updates_shape, ir_emitter_context_->gpu_device_info()); in EmitScatter()
2932 standard_iteration_shape, ir_emitter_context_->gpu_device_info()); in EmitSortFromMlir()
2964 ir_emitter_context_->gpu_device_info().threads_per_block_limit || in EmitSortFromMlir()
2966 ir_emitter_context_->gpu_device_info().shared_memory_per_block; in EmitSortFromMlir()
[all …]
DBUILD110 ":gpu_device_info",
230 name = "gpu_device_info",
231 hdrs = ["gpu_device_info.h"],
679 ":gpu_device_info",
1246 ":gpu_device_info",
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/
Dhlo_to_llvm_ir.cc46 xla::gpu::GpuDeviceInfo gpu_device_info{}; in CompileAndPrintLlvmIr() local
47 gpu_device_info.threads_per_block_limit = 1024; in CompileAndPrintLlvmIr()
48 gpu_device_info.threads_per_warp = 32; in CompileAndPrintLlvmIr()
49 gpu_device_info.shared_memory_per_block = 49152; in CompileAndPrintLlvmIr()
50 gpu_device_info.core_count = 80; in CompileAndPrintLlvmIr()
51 gpu_device_info.threads_per_core_limit = 2048; in CompileAndPrintLlvmIr()
63 /*platform_name=*/"CUDA", gpu_device_info, in CompileAndPrintLlvmIr()
Dmlir_gpu_test_base.cc54 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in CompileMlirModule() local
69 backend_->platform()->Name(), gpu_device_info, cuda_compute_capability, in CompileMlirModule()
DBUILD564 "//tensorflow/compiler/xla/service/gpu:gpu_device_info",
/external/tensorflow/tensorflow/compiler/jit/
Dxla_device.cc365 auto gpu_device_info = absl::make_unique<GpuDeviceInfo>(); in GetDeviceContextLocked() local
366 gpu_device_info->stream = stream_.get(); in GetDeviceContextLocked()
367 gpu_device_info->default_context = device_context_; in GetDeviceContextLocked()
368 set_tensorflow_gpu_device_info(gpu_device_info.get()); in GetDeviceContextLocked()
369 gpu_device_info_ = std::move(gpu_device_info); in GetDeviceContextLocked()
/external/tensorflow/tensorflow/core/common_runtime/
Dring_alg.cc255 const DeviceBase::GpuDeviceInfo* gpu_device_info = in TensorDebugString() local
257 if (gpu_device_info) { in TensorDebugString()
259 Status st = gpu_device_info->default_context->CopyDeviceTensorToCPUSync( in TensorDebugString()
/external/tensorflow/tensorflow/core/kernels/
Dfunctional_ops.cc404 const DeviceBase::GpuDeviceInfo* gpu_device_info = in CondResultToBool() local
408 if (!is_hostmem_dtype && gpu_device_info && in CondResultToBool()