1syntax = "proto3";
2
3package tensorflow.profiler.op_profile;
4
5// Profile is the top-level data that summarizes a program.
6message Profile {
7  reserved 2;
8  reserved "by_program_structure";
9  reserved 3;
10  reserved "per_program";
11  // Root of a profile broken down by instruction category.
12  Node by_category = 1;
13  // Root of a profile broken down by program.
14  Node by_program = 4;
15}
16
17// An entry in the profile tree. (An instruction, or set of instructions).
18message Node {
19  string name = 1;      // Semantics depend on contents.
20  Metrics metrics = 2;  // May be omitted e.g. for fused instructions.
21  repeated Node children = 3;  // Subjected to pruning.
22
23  // Details about what this node represents.
24  oneof contents {
25    InstructionCategory category = 4;
26    XLAInstruction xla = 5;
27  }
28
29  int32 num_children = 6;  // Total number of children before pruning.
30  // A category of XLA instructions.
31  // name is a descriptive string, like "data formatting".
32  message InstructionCategory {
33  }
34  // A single XLA instruction.
35  // name is the unique instruction id, like "%multiply.5".
36  message XLAInstruction {
37    string op = 1;          // Opcode like %multiply
38    string expression = 2;  // %multiply = [shape]multiply(operand1, operand2)
39    string provenance = 3;  // Typically the TensorFlow operation name.
40    string category = 4;
41    // Describes the physical memory layout of the instruction's primary input.
42    // e.g. for a convolution, this analyzes the image and ignores the kernel.
43    LayoutAnalysis layout = 5;
44    message LayoutAnalysis {
45      // The physical data layout, from most-minor to most-major dimensions.
46      repeated Dimension dimensions = 1;
47      message Dimension {
48        int32 size = 1;       // Size of the data in this dimension.
49        int32 alignment = 2;  // Data must be padded to a multiple of alignment.
50        string semantics = 3;  // What the dimension represents, e.g. "spatial".
51      }
52    }
53  }
54}
55
56// Measurements of an operation (or aggregated set of operations).
57// Metrics are always "total" rather than "self".
58message Metrics {
59  // Core-time taken by this operation, as a fraction of all operations.
60  double time = 1;
61  // Floating point computations performed by this operation, as a fraction of
62  // peak core FLOPS * program time. This representation has useful properties:
63  //  - it is proportional to the number of floating point operations performed
64  //  - utilization is flops/time
65  //  - wasted potential flops is proportional to time - flops
66  //  - it does not reveal the peak core FLOPS of the hardware
67  double flops = 2;
68
69  // The memory bandwidth used to load operands, as a fraction of
70  // thereotical memory bandwidth on the specific hardware.
71  double memory_bandwidth = 3;
72
73  double raw_time = 11;   // Elapsed core-time in picoseconds.
74  double raw_flops = 12;  // Total floating-point operations performed.
75  double raw_bytes_accessed = 13;  // Total bytes accessed (include read/write).
76}
77