1syntax = "proto3";
2
3package xrt;
4
5import "tensorflow/compiler/tf2xla/host_compute_metadata.proto";
6import "tensorflow/compiler/xla/xla.proto";
7import "tensorflow/compiler/xla/xla_data.proto";
8import "tensorflow/compiler/xla/service/hlo.proto";
9
10message DeviceAssignment {
11  message ComputationDevice {
12    message DeviceMeshCoordinates {
13      // The mesh coordinates for the device. Usually (X, Y, Core), in the order
14      // in which they are returned in the TopologyProto.
15      //  X    = value(0)
16      //  Y    = value(1)
17      //  Core = value(2)
18      repeated int32 value = 1;
19    }
20    // As many replicas as there are in the replicated computation.
21    repeated DeviceMeshCoordinates replica_devices = 1;
22  }
23  // As many ComputationDevice as many there are computations (number
24  // of cores per replica).
25  repeated ComputationDevice computation_devices = 1;
26}
27
28// Options for an XLA compilation.
29message XLAComputationConfig {
30  // The number of replicas the computation will be run on. If this is
31  // default (0) it is interpreted as 1.
32  int32 num_replicas = 1;
33  // The number of "model-parallel" cores per replica. If this is
34  // default (0) it is interpreted as 1.
35  int32 num_cores_per_replica = 2;
36  // Optional metadata about host sends and recvs.
37  tensorflow.tf2xla.HostComputeMetadata host_compute_metadata = 3;
38
39  // The arg/result shapes for the whole computation.
40  xla.ProgramShapeProto program_shape = 4;
41  // The arg/result shapes for each core of a model-parallel
42  // computation. per_core_args_and_result_shapes is optional for a
43  // single-core computation.
44  repeated xla.ProgramShapeProto per_core_program_shape = 5;
45  // Describes how replicated computation instances should be assigned to
46  // devices. There are num_cores_per_replica computations, and each one will be
47  // sent and executed to the set of replica device numbers described in the
48  // DeviceAssignment proto.
49  DeviceAssignment device_assignment = 6;
50  // The debugging options to be passed to the XLA compilation process.
51  xla.DebugOptions debug_options = 7;
52}
53
54// Options and XLA computation for a compilation.
55message XLAComputation {
56  XLAComputationConfig config = 1;
57  xla.HloSnapshot hlo_snapshot = 2;
58}
59
60// Literal to allocate space for, and transfer to, device memory.
61message XLAAllocation {
62  reserved 1;
63  xla.LiteralProto value = 2;
64}
65
66// Node in a tree describing a tuple constructed from input handles. A
67// node is an internal node if tuples is non-empty, in which case
68// input_index and release_input_handle are ignored. Otherwise a node
69// is a leaf node. Each leaf XLATupleNode is the index of an input
70// which corresponds to a handle that will be grafted onto the output
71// tuple at that location. If release_input_handle is true that input
72// handle will be released and become invalid.  Inputs may be repeated
73// in which case leaves of the output tuple will alias. If an input is
74// repeated, release_input_handle must be false for every leaf where
75// that input appears.
76//
77// For example, if input 0 has shape {} and input 1 has shape {2,3}
78// then the XLATupleNode with structure {1,{0,1}} corresponds to a
79// tuple with shape {{2,3},{{},{2,3}}}.
80message XLATupleNode {
81  int32 input_index = 1;
82  bool release_input_handle = 2;
83  repeated XLATupleNode tuples = 3;
84}
85
86// Options for an XLA execution.
87message XRTExecutionConfig {
88  // Local device to run on. This is present because the execute Op
89  // may be placed on a device such as CPU or TPU_SYSTEM that
90  // logically manages multiple cores.
91  int32 device_ordinal = 1;
92  // Which model-parallel computation to run from the compiled bundle.
93  int32 core_index_in_replica = 2;
94  // Optional key to disambiguate between executions. This is only
95  // needed if multiple host send/recvs may be outstanding
96  // concurrently with executions.
97  string execution_instance_key = 3;
98  // If non-zero, rng_seed to reset the core with.
99  uint32 rng_seed = 4;
100  // If true, release allocation handles on the inputs after running.
101  bool release_input_handles = 5;
102  // If true, release the handle to the computation after running.
103  bool release_compilation_handle = 6;
104  // If set to true, and the result shape is a tuple, then instead of returning
105  // a single tuple allocation the execution will return a vector of
106  // allocations, one for each of the first-level elements of the result tuple.
107  bool return_exploded_tuple = 7;
108}
109