1syntax = "proto3"; 2 3package tensorflow.tensorforest; 4 5import "tensorflow/contrib/decision_trees/proto/generic_tree_model.proto"; 6 7// Leaf models specify what is returned at inference time, and how it is 8// stored in the decision_trees.Leaf protos. 9enum LeafModelType { 10 MODEL_DENSE_CLASSIFICATION = 0; 11 MODEL_SPARSE_CLASSIFICATION = 1; 12 MODEL_REGRESSION = 2; 13 MODEL_SPARSE_OR_DENSE_CLASSIFICATION = 3; 14} 15 16// Stats models generally specify information that is collected which is 17// necessary to choose a split at a node. Specifically, they operate on 18// a SplitCandidate::LeafStat proto. 19enum StatsModelType { 20 STATS_DENSE_GINI = 0; 21 STATS_SPARSE_GINI = 1; 22 STATS_LEAST_SQUARES_REGRESSION = 2; 23 // STATS_SPARSE_THEN_DENSE_GINI is deprecated and no longer supported. 24 STATS_SPARSE_THEN_DENSE_GINI = 3; 25 STATS_FIXED_SIZE_SPARSE_GINI = 4; 26} 27 28// Allows selection of operations on the collection of split candidates. 29// Basic infers right split stats from the leaf stats and each candidate's 30// left stats. 31enum SplitCollectionType { 32 COLLECTION_BASIC = 0; 33 GRAPH_RUNNER_COLLECTION = 1; 34} 35 36// Pruning strategies define how candidates are pruned over time. 37// SPLIT_PRUNE_HALF prunes the worst half of splits every prune_ever_samples, 38// etc. Note that prune_every_samples plays against the depth-dependent 39// split_after_samples, so they should be set together. 40enum SplitPruningStrategyType { 41 SPLIT_PRUNE_NONE = 0; 42 SPLIT_PRUNE_HALF = 1; 43 SPLIT_PRUNE_QUARTER = 2; 44 SPLIT_PRUNE_10_PERCENT = 3; 45 // SPLIT_PRUNE_HOEFFDING prunes splits whose Gini impurity is worst than 46 // the best split's by more than the Hoeffding bound. 47 SPLIT_PRUNE_HOEFFDING = 4; 48} 49 50message SplitPruningConfig { 51 DepthDependentParam prune_every_samples = 1; 52 SplitPruningStrategyType type = 2; 53} 54 55// Finish strategies define when slots are considered finished. 56// Basic requires at least split_after_samples, and doesn't allow slots to 57// finish until the leaf has received more than one class. Hoeffding splits 58// early after min_split_samples if one split is dominating the rest according 59// to hoeffding bounds. Bootstrap does the same but compares gini's calculated 60// with sampled smoothed counts. 61enum SplitFinishStrategyType { 62 SPLIT_FINISH_BASIC = 0; 63 SPLIT_FINISH_DOMINATE_HOEFFDING = 2; 64 SPLIT_FINISH_DOMINATE_BOOTSTRAP = 3; 65} 66 67message SplitFinishConfig { 68 // Configure how often we check for finish, because some finish methods 69 // are expensive to perform. 70 DepthDependentParam check_every_steps = 1; 71 SplitFinishStrategyType type = 2; 72} 73 74// A parameter that changes linearly with depth, with upper and lower bounds. 75message LinearParam { 76 float slope = 1; 77 float y_intercept = 2; 78 float min_val = 3; 79 float max_val = 4; 80} 81 82// A parameter that changes expoentially with the form 83// f = c + mb^(k*d) 84// where: 85// c: constant bias 86// b: base 87// m: multiplier 88// k: depth multiplier 89// d: depth 90message ExponentialParam { 91 float bias = 1; 92 float base = 2; 93 float multiplier = 3; 94 float depth_multiplier = 4; 95} 96 97// A parameter that is 'off' until depth >= a threshold, then is 'on'. 98message ThresholdParam { 99 float on_value = 1; 100 float off_value = 2; 101 float threshold = 3; 102} 103 104// A parameter that may change with node depth. 105message DepthDependentParam { 106 oneof ParamType { 107 float constant_value = 1; 108 LinearParam linear = 2; 109 ExponentialParam exponential = 3; 110 ThresholdParam threshold = 4; 111 } 112} 113 114message TensorForestParams { 115 // ------------ Types that control training subsystems ------ // 116 LeafModelType leaf_type = 1; 117 StatsModelType stats_type = 2; 118 SplitCollectionType collection_type = 3; 119 SplitPruningConfig pruning_type = 4; 120 SplitFinishConfig finish_type = 5; 121 122 // --------- Parameters that can't change by definition --------------- // 123 int32 num_trees = 6; 124 int32 max_nodes = 7; 125 int32 num_features = 21; 126 127 decision_trees.InequalityTest.Type inequality_test_type = 19; 128 129 // Some booleans controlling execution 130 bool is_regression = 8; 131 bool drop_final_class = 9; 132 bool collate_examples = 10; 133 bool checkpoint_stats = 11; 134 bool use_running_stats_method = 20; 135 bool initialize_average_splits = 22; 136 bool inference_tree_paths = 23; 137 138 // Number of classes (classification) or targets (regression) 139 int32 num_outputs = 12; 140 141 // --------- Parameters that could be depth-dependent --------------- // 142 DepthDependentParam num_splits_to_consider = 13; 143 DepthDependentParam split_after_samples = 14; 144 DepthDependentParam dominate_fraction = 15; 145 DepthDependentParam min_split_samples = 18; 146 147 // --------- Parameters for experimental features ---------------------- // 148 string graph_dir = 16; 149 int32 num_select_features = 17; 150 151 // When using a FixedSizeSparseClassificationGrowStats, keep track of 152 // this many classes. 153 int32 num_classes_to_track = 24; 154} 155