Home
last modified time | relevance | path

Searched refs:num_cores_per_replica (Results 1 – 25 of 56) sorted by relevance

123

/external/tensorflow/tensorflow/compiler/tf2xla/
Dsharding_util.cc49 Status CoreOutOfRangeError(int core, int num_cores_per_replica) { in CoreOutOfRangeError() argument
52 "; num_cores_per_replica=", num_cores_per_replica); in CoreOutOfRangeError()
57 const string& device_name, int num_cores_per_replica, in ParseShardingFromDevice() argument
77 if (core < 0 || core >= num_cores_per_replica) { in ParseShardingFromDevice()
78 return CoreOutOfRangeError(core, num_cores_per_replica); in ParseShardingFromDevice()
89 const NodeDef& node_def, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromDevice() argument
94 device_name, num_cores_per_replica, sharding, in ParseShardingFromDevice()
101 const Node& node, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromDevice() argument
109 device_name, num_cores_per_replica, sharding, in ParseShardingFromDevice()
116 const Edge& edge, int num_cores_per_replica, bool add_metadata) { in ParseShardingFromEdgeSource() argument
[all …]
Dsharding_util.h37 const string& device_name, int num_cores_per_replica,
42 const Node& node, int num_cores_per_replica, bool add_metadata);
45 const NodeDef& node_def, int num_cores_per_replica, bool add_metadata);
48 const Edge& edge, int num_cores_per_replica, bool add_metadata);
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/
Dtpu_rewrite_pass.cc303 tf_device::ClusterFuncOp op, int num_replicas, int num_cores_per_replica, in SetMetadataProtoFromClusterFuncOp() argument
307 metadata->set_num_cores_per_replica(num_cores_per_replica); in SetMetadataProtoFromClusterFuncOp()
351 int num_cores_per_replica, llvm::StringRef compilation_device, in BuildCompileOp() argument
357 cluster_func, num_replicas, num_cores_per_replica, in BuildCompileOp()
402 llvm::SmallVector<Type, 8>(num_cores_per_replica, program_type), in BuildCompileOp()
419 const int num_cores_per_replica = tpu_devices.front().size(); in AssignDevicesToReplicate() local
422 for (int core = 0; core < num_cores_per_replica; ++core) { in AssignDevicesToReplicate()
435 if (num_cores_per_replica == 1) { in AssignDevicesToReplicate()
474 const int num_cores_per_replica = tpu_devices.front().size(); in BuildParallelExecuteOp() local
483 num_cores_per_replica); in BuildParallelExecuteOp()
[all …]
Dtpu_reorder_replicate_and_partitioned_inputs.cc51 size_t num_cores_per_replica = first_partitioned_input.getNumOperands(); in ReorderReplicateAndPartitionedInputs() local
64 if (partitioned_input.getNumOperands() != num_cores_per_replica) in ReorderReplicateAndPartitionedInputs()
66 << "expects " << num_cores_per_replica << " operands but found " in ReorderReplicateAndPartitionedInputs()
79 operands_per_replica_per_core.resize(num_cores_per_replica); in ReorderReplicateAndPartitionedInputs()
Dtpu_cluster_formation.cc343 int num_cores_per_replica) { in ReplicateCluster() argument
363 if (pi->getNumOperands() != num_cores_per_replica) in ReplicateCluster()
365 << "requires " << num_cores_per_replica in ReplicateCluster()
568 int num_cores_per_replica = 1; in FormClustersInBlock() local
574 num_cores_per_replica = num_cores_per_replica_attr.getInt(); in FormClustersInBlock()
578 num_cores_per_replica))) in FormClustersInBlock()
/external/tensorflow/tensorflow/compiler/xrt/kernels/
Dtpu_compile_ops.cc93 int num_cores_per_replica) { in CompilationCacheKey() argument
98 metadata.set_num_cores_per_replica(num_cores_per_replica); in CompilationCacheKey()
179 int num_cores_per_replica = in Compute() local
180 config.num_cores_per_replica() ? config.num_cores_per_replica() : 1; in Compute()
183 computation_proto, mesh_state, num_replicas, num_cores_per_replica); in Compute()
212 Tensor program_shape_output(DT_STRING, TensorShape({num_cores_per_replica})); in Compute()
213 for (int64 i = 0; i < num_cores_per_replica; ++i) { in Compute()
Dxrt_compile_ops.cc56 int num_cores_per_replica, xla::DeviceAssignment* device_assignment) { in GenerateXlaDeviceAssignment() argument
57 if (num_cores_per_replica != in GenerateXlaDeviceAssignment()
62 num_cores_per_replica, " computation_devices=", in GenerateXlaDeviceAssignment()
127 int num_cores_per_replica = std::max<int>(config.num_cores_per_replica(), 1); in Compile() local
128 TF_RET_CHECK(num_cores_per_replica == 1); in Compile()
171 num_cores_per_replica); in Compile()
174 num_cores_per_replica, &device_assignment)); in Compile()
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/
Dtpu_extract_head_tail_outside_compilation.mlir23 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
44 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
67 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
84 // CHECK-DAG: num_cores_per_replica = 1
94 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
118 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
144 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
166 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
179 // CHECK-DAG: num_cores_per_replica = 1
195 …}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_…
[all …]
Doutside_compiled_to_host_launch.mlir13 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
28 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = ""
34 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> ()
51 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = ""
59 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> ()
83 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
118 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
Dmark_ops_for_outside_compilation.mlir13 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<i32>
27 …}) {allow_soft_placement = false, num_cores_per_replica = 1, topology = "", device_assignment = …
45 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
59 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
77 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
91 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
102 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
116 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
134 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
151 …}) {allow_soft_placement = true, num_cores_per_replica = 1, topology = "", device_assignment = […
[all …]
Dtpu_rewrite.mlir8 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
23 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
38 // expected-error@+1 {{requires attribute 'num_cores_per_replica'}}
53 // expected-error@+1 {{requires attribute 'num_cores_per_replica'}}
54 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = "", step…
69 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, paddi…
84 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
99 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
114 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
129 …cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_…
[all …]
Dtpu_extract_outside_compilation.mlir14 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
32 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = ""
38 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> ()
55 // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = ""
63 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> ()
84 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> ()
109 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
134 …}) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> (tensor<?xf32>, te…
164 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
190 }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor<?xi32>
[all …]
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/
Dtpu_rewrite_device_util.cc188 int num_replicas, int num_cores_per_replica, in GetFullMeshTPUExecutionDeviceAssignment() argument
198 if (num_cores_per_replica != 1) in GetFullMeshTPUExecutionDeviceAssignment()
201 num_cores_per_replica); in GetFullMeshTPUExecutionDeviceAssignment()
347 int num_replicas, int num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() argument
358 num_replicas * num_cores_per_replica * kTPUTopologyRank; in GetGeneralTPUExecutionDeviceAssignment()
364 kTPUTopologyRank, " (", num_replicas, " * ", num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment()
382 num_cores_per_replica, TPUDeviceAndHost())); in GetGeneralTPUExecutionDeviceAssignment()
383 xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); in GetGeneralTPUExecutionDeviceAssignment()
386 for (int logical_core = 0; logical_core < num_cores_per_replica; in GetGeneralTPUExecutionDeviceAssignment()
450 Devices devices, int num_replicas, int num_cores_per_replica, in GetTPUCompilationAndExecutionDevices() argument
[all …]
Dxla_sharding_util.cc211 const int num_cores_per_replica, in ExtractInputsForLogicalDevices() argument
215 input_list->reserve(num_cores_per_replica); in ExtractInputsForLogicalDevices()
216 for (int i = 0; i < num_cores_per_replica; ++i) in ExtractInputsForLogicalDevices()
251 input_index, tiled_input_size, num_cores_per_replica)); in ExtractInputsForLogicalDevices()
273 if (partitioned_input.inputs().size() != num_cores_per_replica) in ExtractInputsForLogicalDevices()
293 if (tiled_inputs_size != num_cores_per_replica) in ExtractInputsForLogicalDevices()
312 const int num_cores_per_replica, in ParseAndValidateOutputSharding() argument
341 sharding.tile_assignment_devices_size() != num_cores_per_replica) in ParseAndValidateOutputSharding()
346 sharding.tile_assignment_devices_size(), num_cores_per_replica)); in ParseAndValidateOutputSharding()
349 ((sharding.tile_assignment_devices(0) >= num_cores_per_replica) || in ParseAndValidateOutputSharding()
[all …]
Dxla_sharding_util.h40 const int num_cores_per_replica,
47 const int num_cores_per_replica,
/external/tensorflow/tensorflow/core/tpu/graph_rewrite/
Ddistributed_tpu_rewrite_pass.cc490 Status ValidateCoreNumber(int64 core, int64 num_cores_per_replica) { in ValidateCoreNumber() argument
491 if (core < 0 || core >= num_cores_per_replica) { in ValidateCoreNumber()
494 num_cores_per_replica, ")"); in ValidateCoreNumber()
1196 const int num_cores_per_replica, in ParseAndValidateSharding() argument
1203 ValidateCoreNumber(core_annotation, num_cores_per_replica)); in ParseAndValidateSharding()
1211 TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); in ParseAndValidateSharding()
1261 ParseInputShardingFromAdjacentNode(const int num_cores_per_replica, in ParseInputShardingFromAdjacentNode() argument
1266 ParseShardingFromDevice(node, num_cores_per_replica, in ParseInputShardingFromAdjacentNode()
1284 num_cores_per_replica, /*add_metadata=*/true)); in ParseInputShardingFromAdjacentNode()
1299 const int num_cores_per_replica, const std::string& arg_node_name, in ParseAndValidateShardingFromNeighbors() argument
[all …]
Ddistributed_tpu_rewrite_pass.h268 int num_cores_per_replica, const string& topology_attr,
305 int num_cores_per_replica, const ParameterInfo& params_info,
362 int num_cores_per_replica, const string& compile_device,
430 int num_cores_per_replica, const Node& replicate_node,
522 int* num_replicas, int* num_cores_per_replica, int* num_tasks,
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v2/
DTPUReplicateMetadata.pbtxt9 name: "num_cores_per_replica"
70 name: "num_cores_per_replica"
138 name: "num_cores_per_replica"
213 name: "num_cores_per_replica"
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v1/
DTPUReplicateMetadata.pbtxt9 name: "num_cores_per_replica"
70 name: "num_cores_per_replica"
138 name: "num_cores_per_replica"
/external/tensorflow/tensorflow/core/tpu/kernels/
Dtpu_compile_op_impl.cc52 function_name, metadata, metadata.num_cores_per_replica(), in CreateNonMlirImpl()
65 mlir_module, metadata, metadata.num_cores_per_replica(), in CreateMlirImpl()
Dtpu_compile_op_support.cc207 may_modify_variables->resize(metadata.num_cores_per_replica(), false); in AddVariableUpdatesToCores()
240 for (int64 core = 0; core < metadata.num_cores_per_replica(); in AddVariableUpdatesToCores()
259 for (int64 core = 0; core < metadata.num_cores_per_replica(); ++core) { in AddVariableUpdatesToCores()
324 metadata.num_replicas(), metadata.num_cores_per_replica(), in CreateHloModules()
410 if (num_computations != metadata->num_cores_per_replica()) { in CompileOpMetadataFromContext()
415 num_computations, " vs ", metadata->num_cores_per_replica(), ")"); in CompileOpMetadataFromContext()
431 metadata->num_cores_per_replica()) { in CompileOpMetadataFromContext()
435 metadata->num_cores_per_replica()); in CompileOpMetadataFromContext()
Dtpu_program_group.cc268 count == compilation_request.metadata().num_cores_per_replica()); in CompileAndBuild()
301 int num_cores_per_replica = in CompileAndBuild() local
302 xrt_computation_proto.config().num_cores_per_replica() in CompileAndBuild()
303 ? xrt_computation_proto.config().num_cores_per_replica() in CompileAndBuild()
305 TF_RET_CHECK(count == 1 || count == num_cores_per_replica); in CompileAndBuild()
Dtpu_mesh_state_interface.h64 return metadata.num_cores_per_replica() * metadata.num_replicas() == in NeedsStaticDeviceAssignment()
/external/tensorflow/tensorflow/core/api_def/base_api/
Dapi_def_TPUReplicateMetadata.pbtxt11 name: "num_cores_per_replica"
37 DEPRECATED. Use num_cores_per_replica instead.
/external/tensorflow/tensorflow/core/profiler/convert/
Dop_stats_combiner.cc98 std::max(src.num_cores_per_replica(), dst->num_cores_per_replica())); in CombineRunEnvironment()

123