1syntax = "proto3";
2
3package tensorflow.tensorforest;
4
5import "tensorflow/contrib/decision_trees/proto/generic_tree_model.proto";
6
7// Leaf models specify what is returned at inference time, and how it is
8// stored in the decision_trees.Leaf protos.
9enum LeafModelType {
10  MODEL_DENSE_CLASSIFICATION = 0;
11  MODEL_SPARSE_CLASSIFICATION = 1;
12  MODEL_REGRESSION = 2;
13  MODEL_SPARSE_OR_DENSE_CLASSIFICATION = 3;
14}
15
16// Stats models generally specify information that is collected which is
17// necessary to choose a split at a node. Specifically, they operate on
18// a SplitCandidate::LeafStat proto.
19enum StatsModelType {
20  STATS_DENSE_GINI = 0;
21  STATS_SPARSE_GINI = 1;
22  STATS_LEAST_SQUARES_REGRESSION = 2;
23  // STATS_SPARSE_THEN_DENSE_GINI is deprecated and no longer supported.
24  STATS_SPARSE_THEN_DENSE_GINI = 3;
25  STATS_FIXED_SIZE_SPARSE_GINI = 4;
26}
27
28// Allows selection of operations on the collection of split candidates.
29// Basic infers right split stats from the leaf stats and each candidate's
30// left stats.
31enum SplitCollectionType {
32  COLLECTION_BASIC = 0;
33  GRAPH_RUNNER_COLLECTION = 1;
34}
35
36// Pruning strategies define how candidates are pruned over time.
37// SPLIT_PRUNE_HALF prunes the worst half of splits every prune_ever_samples,
38// etc.  Note that prune_every_samples plays against the depth-dependent
39// split_after_samples, so they should be set together.
40enum SplitPruningStrategyType {
41  SPLIT_PRUNE_NONE = 0;
42  SPLIT_PRUNE_HALF = 1;
43  SPLIT_PRUNE_QUARTER = 2;
44  SPLIT_PRUNE_10_PERCENT = 3;
45  // SPLIT_PRUNE_HOEFFDING prunes splits whose Gini impurity is worst than
46  // the best split's by more than the Hoeffding bound.
47  SPLIT_PRUNE_HOEFFDING = 4;
48}
49
50message SplitPruningConfig {
51  DepthDependentParam prune_every_samples = 1;
52  SplitPruningStrategyType type = 2;
53}
54
55// Finish strategies define when slots are considered finished.
56// Basic requires at least split_after_samples, and doesn't allow slots to
57// finish until the leaf has received more than one class. Hoeffding splits
58// early after min_split_samples if one split is dominating the rest according
59// to hoeffding bounds. Bootstrap does the same but compares gini's calculated
60// with sampled smoothed counts.
61enum SplitFinishStrategyType {
62  SPLIT_FINISH_BASIC = 0;
63  SPLIT_FINISH_DOMINATE_HOEFFDING = 2;
64  SPLIT_FINISH_DOMINATE_BOOTSTRAP = 3;
65}
66
67message SplitFinishConfig {
68  // Configure how often we check for finish, because some finish methods
69  // are expensive to perform.
70  DepthDependentParam check_every_steps = 1;
71  SplitFinishStrategyType type = 2;
72}
73
74// A parameter that changes linearly with depth, with upper and lower bounds.
75message LinearParam {
76  float slope = 1;
77  float y_intercept = 2;
78  float min_val = 3;
79  float max_val = 4;
80}
81
82// A parameter that changes expoentially with the form
83//     f = c + mb^(k*d)
84// where:
85//  c: constant bias
86//  b: base
87//  m: multiplier
88//  k: depth multiplier
89//  d: depth
90message ExponentialParam {
91  float bias = 1;
92  float base = 2;
93  float multiplier = 3;
94  float depth_multiplier = 4;
95}
96
97// A parameter that is 'off' until depth >= a threshold, then is 'on'.
98message ThresholdParam {
99  float on_value = 1;
100  float off_value = 2;
101  float threshold = 3;
102}
103
104// A parameter that may change with node depth.
105message DepthDependentParam {
106  oneof ParamType {
107    float constant_value = 1;
108    LinearParam linear = 2;
109    ExponentialParam exponential = 3;
110    ThresholdParam threshold = 4;
111 }
112}
113
114message TensorForestParams {
115  // ------------ Types that control training subsystems ------ //
116  LeafModelType leaf_type = 1;
117  StatsModelType stats_type = 2;
118  SplitCollectionType collection_type = 3;
119  SplitPruningConfig pruning_type = 4;
120  SplitFinishConfig finish_type = 5;
121
122  // --------- Parameters that can't change by definition --------------- //
123  int32 num_trees = 6;
124  int32 max_nodes = 7;
125  int32 num_features = 21;
126
127  decision_trees.InequalityTest.Type inequality_test_type = 19;
128
129  // Some booleans controlling execution
130  bool is_regression = 8;
131  bool drop_final_class = 9;
132  bool collate_examples = 10;
133  bool checkpoint_stats = 11;
134  bool use_running_stats_method = 20;
135  bool initialize_average_splits = 22;
136  bool inference_tree_paths = 23;
137
138  // Number of classes (classification) or targets (regression)
139  int32 num_outputs = 12;
140
141  // --------- Parameters that could be depth-dependent --------------- //
142  DepthDependentParam num_splits_to_consider = 13;
143  DepthDependentParam split_after_samples = 14;
144  DepthDependentParam dominate_fraction = 15;
145  DepthDependentParam min_split_samples = 18;
146
147  // --------- Parameters for experimental features ---------------------- //
148  string graph_dir = 16;
149  int32 num_select_features = 17;
150
151  // When using a FixedSizeSparseClassificationGrowStats, keep track of
152  // this many classes.
153  int32 num_classes_to_track = 24;
154}
155