1syntax = "proto3";
2option cc_enable_arenas = true;
3
4package tensorflow.boosted_trees.trees;
5
6// TreeNode describes a node in a tree.
7message TreeNode {
8  oneof node {
9    Leaf leaf = 1;
10    DenseFloatBinarySplit dense_float_binary_split = 2;
11    SparseFloatBinarySplitDefaultLeft sparse_float_binary_split_default_left =
12        3;
13    SparseFloatBinarySplitDefaultRight sparse_float_binary_split_default_right =
14        4;
15    CategoricalIdBinarySplit categorical_id_binary_split = 5;
16    CategoricalIdSetMembershipBinarySplit
17        categorical_id_set_membership_binary_split = 6;
18    ObliviousDenseFloatBinarySplit oblivious_dense_float_binary_split = 7;
19    ObliviousCategoricalIdBinarySplit oblivious_categorical_id_binary_split = 8;
20  }
21  TreeNodeMetadata node_metadata = 777;
22}
23
24// TreeNodeMetadata encodes metadata associated with each node in a tree.
25message TreeNodeMetadata {
26  // The gain associated with this node.
27  float gain = 1;
28
29  // The original leaf node before this node was split.
30  Leaf original_leaf = 2;
31
32  // The original layer of leaves before that layer was converted to a split.
33  repeated Leaf original_oblivious_leaves = 3;
34}
35
36// Leaves can either hold dense or sparse information.
37message Leaf {
38  oneof leaf {
39    // See third_party/tensorflow/contrib/decision_trees/
40    // proto/generic_tree_model.proto
41    // for a description of how vector and sparse_vector might be used.
42    Vector vector = 1;
43    SparseVector sparse_vector = 2;
44  }
45}
46
47message Vector {
48  repeated float value = 1;
49}
50
51message SparseVector {
52  repeated int32 index = 1;
53  repeated float value = 2;
54}
55
56// Split rule for dense float features.
57message DenseFloatBinarySplit {
58  // Float feature column and split threshold describing
59  // the rule feature <= threshold.
60  int32 feature_column = 1;
61  // If feature column is multivalent, this holds the index of the dimension
62  // for the split. Defaults to 0.
63  int32 dimension_id = 5;
64  float threshold = 2;
65
66  // Node children indexing into a contiguous
67  // vector of nodes starting from the root.
68  int32 left_id = 3;
69  int32 right_id = 4;
70}
71
72// Split rule for sparse float features defaulting left for missing features.
73message SparseFloatBinarySplitDefaultLeft {
74  DenseFloatBinarySplit split = 1;
75}
76
77// Split rule for sparse float features defaulting right for missing features.
78message SparseFloatBinarySplitDefaultRight {
79  DenseFloatBinarySplit split = 1;
80}
81
82// Split rule for categorical features with a single feature Id.
83message CategoricalIdBinarySplit {
84  // Categorical feature column and Id describing
85  // the rule feature == Id.
86  int32 feature_column = 1;
87  int64 feature_id = 2;
88
89  // Node children indexing into a contiguous
90  // vector of nodes starting from the root.
91  int32 left_id = 3;
92  int32 right_id = 4;
93}
94
95// Split rule for categorical features with a set of feature Ids.
96message CategoricalIdSetMembershipBinarySplit {
97  // Categorical feature column and Id describing
98  // the rule feature ∈ feature_ids.
99  int32 feature_column = 1;
100  // Sorted list of Ids in the set.
101  repeated int64 feature_ids = 2;
102
103  // Node children indexing into a contiguous
104  // vector of nodes starting from the root.
105  int32 left_id = 3;
106  int32 right_id = 4;
107}
108
109// Split rule for dense float features in the oblivious case.
110message ObliviousDenseFloatBinarySplit {
111  // Float feature column and split threshold describing
112  // the rule feature <= threshold.
113  int32 feature_column = 1;
114  float threshold = 2;
115  // We don't store children ids, because either the next node represents the
116  // whole next layer of the tree or starting with the next node we only have
117  // leaves.
118}
119
120// Split rule for categorical features with a single feature Id in the oblivious
121// case.
122message ObliviousCategoricalIdBinarySplit {
123  // Categorical feature column and Id describing the rule feature == Id.
124  int32 feature_column = 1;
125  int64 feature_id = 2;
126  // We don't store children ids, because either the next node represents the
127  // whole next layer of the tree or starting with the next node we only have
128  // leaves.
129}
130
131// DecisionTreeConfig describes a list of connected nodes.
132// Node 0 must be the root and can carry any payload including a leaf
133// in the case of representing the bias.
134// Note that each node id is implicitly its index in the list of nodes.
135message DecisionTreeConfig {
136  repeated TreeNode nodes = 1;
137}
138
139message DecisionTreeMetadata {
140  // How many times tree weight was updated (due to reweighting of the final
141  // ensemble, dropout, shrinkage etc).
142  int32 num_tree_weight_updates = 1;
143
144  // Number of layers grown for this tree.
145  int32 num_layers_grown = 2;
146
147  // Whether the tree is finalized in that no more layers can be grown.
148  bool is_finalized = 3;
149}
150
151message GrowingMetadata {
152  // Number of trees that we have attempted to build. After pruning, these
153  // trees might have been removed.
154  int64 num_trees_attempted = 1;
155  // Number of layers that we have attempted to build. After pruning, these
156  // layers might have been removed.
157  int64 num_layers_attempted = 2;
158
159  // Sorted list of column handlers that have been used in at least one split
160  // so far.
161  repeated int64 used_handler_ids = 3;
162}
163
164// DecisionTreeEnsembleConfig describes an ensemble of decision trees.
165message DecisionTreeEnsembleConfig {
166  repeated DecisionTreeConfig trees = 1;
167  repeated float tree_weights = 2;
168  repeated DecisionTreeMetadata tree_metadata = 3;
169
170  // Metadata that is used during the training.
171  GrowingMetadata growing_metadata = 4;
172}
173