/external/tensorflow/tensorflow/compiler/tf2xla/ |
D | sharding_util.cc | 49 Status CoreOutOfRangeError(int core, int num_cores_per_replica) { in CoreOutOfRangeError() argument 52 "; num_cores_per_replica=", num_cores_per_replica); in CoreOutOfRangeError() 57 const string& device_name, int num_cores_per_replica, in ParseShardingFromDevice() argument 77 if (core < 0 || core >= num_cores_per_replica) { in ParseShardingFromDevice() 78 return CoreOutOfRangeError(core, num_cores_per_replica); in ParseShardingFromDevice() 89 const NodeDef& node_def, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromDevice() argument 94 device_name, num_cores_per_replica, sharding, in ParseShardingFromDevice() 101 const Node& node, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromDevice() argument 109 device_name, num_cores_per_replica, sharding, in ParseShardingFromDevice() 116 const Edge& edge, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromEdgeSource() argument [all …]
|
D | sharding_util.h | 37 const string& device_name, int num_cores_per_replica, 42 const Node& node, int num_cores_per_replica, bool add_metadata); 45 const NodeDef& node_def, int num_cores_per_replica, bool add_metadata); 48 const Edge& edge, int num_cores_per_replica, bool add_metadata);
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/ |
D | tpu_rewrite_pass.cc | 303 tf_device::ClusterFuncOp op, int num_replicas, int num_cores_per_replica, in SetMetadataProtoFromClusterFuncOp() argument 307 metadata->set_num_cores_per_replica(num_cores_per_replica); in SetMetadataProtoFromClusterFuncOp() 351 int num_cores_per_replica, llvm::StringRef compilation_device, in BuildCompileOp() argument 357 cluster_func, num_replicas, num_cores_per_replica, in BuildCompileOp() 402 llvm::SmallVector<Type, 8>(num_cores_per_replica, program_type), in BuildCompileOp() 419 const int num_cores_per_replica = tpu_devices.front().size(); in AssignDevicesToReplicate() local 422 for (int core = 0; core < num_cores_per_replica; ++core) { in AssignDevicesToReplicate() 435 if (num_cores_per_replica == 1) { in AssignDevicesToReplicate() 474 const int num_cores_per_replica = tpu_devices.front().size(); in BuildParallelExecuteOp() local 483 num_cores_per_replica); in BuildParallelExecuteOp() [all …]
|
D | tpu_reorder_replicate_and_partitioned_inputs.cc | 51 size_t num_cores_per_replica = first_partitioned_input.getNumOperands(); in ReorderReplicateAndPartitionedInputs() local 64 if (partitioned_input.getNumOperands() != num_cores_per_replica) in ReorderReplicateAndPartitionedInputs() 66 << "expects " << num_cores_per_replica << " operands but found " in ReorderReplicateAndPartitionedInputs() 79 operands_per_replica_per_core.resize(num_cores_per_replica); in ReorderReplicateAndPartitionedInputs()
|
D | tpu_cluster_formation.cc | 343 int num_cores_per_replica) { in ReplicateCluster() argument 363 if (pi->getNumOperands() != num_cores_per_replica) in ReplicateCluster() 365 << "requires " << num_cores_per_replica in ReplicateCluster() 568 int num_cores_per_replica = 1; in FormClustersInBlock() local 574 num_cores_per_replica = num_cores_per_replica_attr.getInt(); in FormClustersInBlock() 578 num_cores_per_replica))) in FormClustersInBlock()
|
/external/tensorflow/tensorflow/compiler/xrt/kernels/ |
D | tpu_compile_ops.cc | 93 int num_cores_per_replica) { in CompilationCacheKey() argument 98 metadata.set_num_cores_per_replica(num_cores_per_replica); in CompilationCacheKey() 179 int num_cores_per_replica = in Compute() local 180 config.num_cores_per_replica() ? config.num_cores_per_replica() : 1; in Compute() 183 computation_proto, mesh_state, num_replicas, num_cores_per_replica); in Compute() 212 Tensor program_shape_output(DT_STRING, TensorShape({num_cores_per_replica})); in Compute() 213 for (int64 i = 0; i < num_cores_per_replica; ++i) { in Compute()
|
D | xrt_compile_ops.cc | 56 int num_cores_per_replica, xla::DeviceAssignment* device_assignment) { in GenerateXlaDeviceAssignment() argument 57 if (num_cores_per_replica != in GenerateXlaDeviceAssignment() 62 num_cores_per_replica, " computation_devices=", in GenerateXlaDeviceAssignment() 127 int num_cores_per_replica = std::max<int>(config.num_cores_per_replica(), 1); in Compile() local 128 TF_RET_CHECK(num_cores_per_replica == 1); in Compile() 171 num_cores_per_replica); in Compile() 174 num_cores_per_replica, &device_assignment)); in Compile()
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/ |
D | tpu_extract_head_tail_outside_compilation.mlir | 23 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 44 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 67 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 84 // CHECK-DAG: num_cores_per_replica = 1 94 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 118 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 144 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 166 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… 179 // CHECK-DAG: num_cores_per_replica = 1 195 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_… [all …]
|
D | outside_compiled_to_host_launch.mlir | 13 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> 28 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" 34 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () 51 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" 59 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () 83 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> 118 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
|
D | mark_ops_for_outside_compilation.mlir | 13 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<i32> 27 …}) {allow_soft_placement = false, num_cores_per_replica = 1, topology = "", device_assignment = … 45 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 59 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 77 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 91 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 102 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 116 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 134 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… 151 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = [… [all …]
|
D | tpu_rewrite.mlir | 8 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… 23 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… 38 // expected-error@+1 {{requires attribute 'num_cores_per_replica'}} 53 // expected-error@+1 {{requires attribute 'num_cores_per_replica'}} 54 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = "", step… 69 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, paddi… 84 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… 99 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… 114 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… 129 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_… [all …]
|
D | tpu_extract_outside_compilation.mlir | 14 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> 32 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" 38 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () 55 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" 63 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () 84 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () 109 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> 134 …}) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> (tensor<?xf32>, te… 164 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> 190 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32> [all …]
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/ |
D | tpu_rewrite_device_util.cc | 188 int num_replicas, int num_cores_per_replica, in GetFullMeshTPUExecutionDeviceAssignment() argument 198 if (num_cores_per_replica != 1) in GetFullMeshTPUExecutionDeviceAssignment() 201 num_cores_per_replica); in GetFullMeshTPUExecutionDeviceAssignment() 347 int num_replicas, int num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() argument 358 num_replicas * num_cores_per_replica * kTPUTopologyRank; in GetGeneralTPUExecutionDeviceAssignment() 364 kTPUTopologyRank, " (", num_replicas, " * ", num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() 382 num_cores_per_replica, TPUDeviceAndHost())); in GetGeneralTPUExecutionDeviceAssignment() 383 xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); in GetGeneralTPUExecutionDeviceAssignment() 386 for (int logical_core = 0; logical_core < num_cores_per_replica; in GetGeneralTPUExecutionDeviceAssignment() 450 Devices devices, int num_replicas, int num_cores_per_replica, in GetTPUCompilationAndExecutionDevices() argument [all …]
|
D | xla_sharding_util.cc | 211 const int num_cores_per_replica, in ExtractInputsForLogicalDevices() argument 215 input_list->reserve(num_cores_per_replica); in ExtractInputsForLogicalDevices() 216 for (int i = 0; i < num_cores_per_replica; ++i) in ExtractInputsForLogicalDevices() 251 input_index, tiled_input_size, num_cores_per_replica)); in ExtractInputsForLogicalDevices() 273 if (partitioned_input.inputs().size() != num_cores_per_replica) in ExtractInputsForLogicalDevices() 293 if (tiled_inputs_size != num_cores_per_replica) in ExtractInputsForLogicalDevices() 312 const int num_cores_per_replica, in ParseAndValidateOutputSharding() argument 341 sharding.tile_assignment_devices_size() != num_cores_per_replica) in ParseAndValidateOutputSharding() 346 sharding.tile_assignment_devices_size(), num_cores_per_replica)); in ParseAndValidateOutputSharding() 349 ((sharding.tile_assignment_devices(0) >= num_cores_per_replica) || in ParseAndValidateOutputSharding() [all …]
|
D | xla_sharding_util.h | 40 const int num_cores_per_replica, 47 const int num_cores_per_replica,
|
/external/tensorflow/tensorflow/core/tpu/graph_rewrite/ |
D | distributed_tpu_rewrite_pass.cc | 490 Status ValidateCoreNumber(int64 core, int64 num_cores_per_replica) { in ValidateCoreNumber() argument 491 if (core < 0 || core >= num_cores_per_replica) { in ValidateCoreNumber() 494 num_cores_per_replica, ")"); in ValidateCoreNumber() 1196 const int num_cores_per_replica, in ParseAndValidateSharding() argument 1203 ValidateCoreNumber(core_annotation, num_cores_per_replica)); in ParseAndValidateSharding() 1211 TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); in ParseAndValidateSharding() 1261 ParseInputShardingFromAdjacentNode(const int num_cores_per_replica, in ParseInputShardingFromAdjacentNode() argument 1266 ParseShardingFromDevice(node, num_cores_per_replica, in ParseInputShardingFromAdjacentNode() 1284 num_cores_per_replica, /*add_metadata=*/true)); in ParseInputShardingFromAdjacentNode() 1299 const int num_cores_per_replica, const std::string& arg_node_name, in ParseAndValidateShardingFromNeighbors() argument [all …]
|
D | distributed_tpu_rewrite_pass.h | 268 int num_cores_per_replica, const string& topology_attr, 305 int num_cores_per_replica, const ParameterInfo& params_info, 362 int num_cores_per_replica, const string& compile_device, 430 int num_cores_per_replica, const Node& replicate_node, 522 int* num_replicas, int* num_cores_per_replica, int* num_tasks,
|
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v2/ |
D | TPUReplicateMetadata.pbtxt | 9 name: "num_cores_per_replica" 70 name: "num_cores_per_replica" 138 name: "num_cores_per_replica" 213 name: "num_cores_per_replica"
|
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v1/ |
D | TPUReplicateMetadata.pbtxt | 9 name: "num_cores_per_replica" 70 name: "num_cores_per_replica" 138 name: "num_cores_per_replica"
|
/external/tensorflow/tensorflow/core/tpu/kernels/ |
D | tpu_compile_op_impl.cc | 52 function_name, metadata, metadata.num_cores_per_replica(), in CreateNonMlirImpl() 65 mlir_module, metadata, metadata.num_cores_per_replica(), in CreateMlirImpl()
|
D | tpu_compile_op_support.cc | 207 may_modify_variables->resize(metadata.num_cores_per_replica(), false); in AddVariableUpdatesToCores() 240 for (int64 core = 0; core < metadata.num_cores_per_replica(); in AddVariableUpdatesToCores() 259 for (int64 core = 0; core < metadata.num_cores_per_replica(); ++core) { in AddVariableUpdatesToCores() 324 metadata.num_replicas(), metadata.num_cores_per_replica(), in CreateHloModules() 410 if (num_computations != metadata->num_cores_per_replica()) { in CompileOpMetadataFromContext() 415 num_computations, " vs ", metadata->num_cores_per_replica(), ")"); in CompileOpMetadataFromContext() 431 metadata->num_cores_per_replica()) { in CompileOpMetadataFromContext() 435 metadata->num_cores_per_replica()); in CompileOpMetadataFromContext()
|
D | tpu_program_group.cc | 268 count == compilation_request.metadata().num_cores_per_replica()); in CompileAndBuild() 301 int num_cores_per_replica = in CompileAndBuild() local 302 xrt_computation_proto.config().num_cores_per_replica() in CompileAndBuild() 303 ? xrt_computation_proto.config().num_cores_per_replica() in CompileAndBuild() 305 TF_RET_CHECK(count == 1 || count == num_cores_per_replica); in CompileAndBuild()
|
D | tpu_mesh_state_interface.h | 64 return metadata.num_cores_per_replica() * metadata.num_replicas() == in NeedsStaticDeviceAssignment()
|
/external/tensorflow/tensorflow/core/api_def/base_api/ |
D | api_def_TPUReplicateMetadata.pbtxt | 11 name: "num_cores_per_replica" 37 DEPRECATED. Use num_cores_per_replica instead.
|
/external/tensorflow/tensorflow/core/profiler/convert/ |
D | op_stats_combiner.cc | 98 std::max(src.num_cores_per_replica(), dst->num_cores_per_replica())); in CombineRunEnvironment()
|