1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // =============================================================================
15 #ifndef TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_DROPOUT_UTILS_H_
16 #define TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_DROPOUT_UTILS_H_
17 
18 #include <unordered_set>
19 #include <vector>
20 
21 #include "tensorflow/contrib/boosted_trees/proto/learner.pb.h"  // NOLINT
22 #include "tensorflow/core/lib/core/status.h"
23 #include "tensorflow/core/platform/types.h"
24 
25 namespace tensorflow {
26 namespace boosted_trees {
27 namespace utils {
28 
29 // Utils for deciding on what trees to be/were dropped when building a new tree.
30 class DropoutUtils {
31  public:
32   // This method determines what trees should be dropped and returns their
33   // indices and the weights they had when this method ran.
34   // seed: random seed to be used
35   // config: dropout config, that defines the probability of dropout etc
36   // trees_not_to_drop: indices of trees that can't be dropped, for example bias
37   // (0) and the last tree in the batch mode.
38   // number_of_trees_to_consider: how many trees are currently in the ensemble
39   // weights: weights of those trees
40   // Returns sorted vector of indices of trees to be dropped and their original
41   // weights.
42   static tensorflow::Status DropOutTrees(
43       const uint64 seed, const learner::LearningRateDropoutDrivenConfig& config,
44       const std::unordered_set<int32>& trees_not_to_drop,
45       const std::vector<float>& weights, std::vector<int32>* dropped_trees,
46       std::vector<float>* original_weights);
47 
48   // Recalculates the weights of the trees when the new trees are added to
49   // ensemble.
50   // dropped_trees: ids of trees that were dropped when trees to add were built.
51   // dropped_trees_original_weights: the weight dropped trees had during dropout
52   // new_trees_first_index: index of the last tree. If it is already in the
53   // ensemble, its weight and num updates are adjusted. Otherwise, its weight
54   // and num updates are added as new entries to current_weights and
55   // num_updates. num_trees_to_add: how many trees are being added to the
56   // ensemble. Returns current_weights: updated vector of the tree weights.
57   // Weights of dropped trees are updated. Note that the size of returned vector
58   // will be total_num_trees + num_trees_to_add (the last elements are the
59   // weights of the new trees to be added) if new_trees_first_index
60   // >=current_weights.size num_updates: updated vector with increased number of
61   // updates for dropped trees.
62   static void GetTreesWeightsForAddingTrees(
63       const std::vector<int32>& dropped_trees,
64       const std::vector<float>& dropped_trees_original_weights,
65       const int32 new_trees_first_index, const int32 num_trees_to_add,
66       // Current weights and num_updates will be updated as a result of this
67       // func
68       std::vector<float>* current_weights,
69       // How many weight assignments have been done for each tree already.
70       std::vector<int32>* num_updates);
71 };
72 
73 }  // namespace utils
74 }  // namespace boosted_trees
75 }  // namespace tensorflow
76 
77 #endif  // TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_DROPOUT_UTILS_H_
78