android-10.0.0_r2/s

// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "tensorflow/contrib/tensor_forest/kernels/tree_utils.h"
#include <algorithm>
#include <cfloat>
#include "tensorflow/core/lib/random/philox_random.h"
#include "tensorflow/core/platform/logging.h"

namespace tensorflow {
namespace tensorforest {

using tensorflow::Tensor;

DataColumnTypes FindDenseFeatureSpec(
    int32 input_feature, const tensorforest::TensorForestDataSpec& spec) {
  return static_cast<DataColumnTypes>(spec.GetDenseFeatureType(input_feature));
}

DataColumnTypes FindSparseFeatureSpec(
    int32 input_feature, const tensorforest::TensorForestDataSpec& spec) {
  // TODO(thomaswc): Binary search here, especially when we start using more
  // than one sparse column
  int32 size_sum = spec.sparse(0).size();
  int32 column_num = 0;
  while (input_feature >= size_sum && column_num < spec.sparse_size()) {
    ++column_num;
    size_sum += spec.sparse(column_num).size();
  }

  return static_cast<DataColumnTypes>(spec.sparse(column_num).original_type());
}

void GetTwoBest(int max, const std::function<float(int)>& score_fn,
                float* best_score, int* best_index, float* second_best_score,
                int* second_best_index) {
  *best_index = -1;
  *second_best_index = -1;
  *best_score = FLT_MAX;
  *second_best_score = FLT_MAX;
  for (int i = 0; i < max; i++) {
    float score = score_fn(i);
    if (score < *best_score) {
      *second_best_score = *best_score;
      *second_best_index = *best_index;
      *best_score = score;
      *best_index = i;
    } else if (score < *second_best_score) {
      *second_best_score = score;
      *second_best_index = i;
    }
  }
}

float ClassificationSplitScore(
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& rights, int32 num_classes,
    int i) {
  Eigen::array<Eigen::Index, 1> offsets;
  // Class counts are stored with the total in [0], so the length of each
  // count vector is num_classes + 1.
  offsets[0] = i * (num_classes + 1) + 1;
  Eigen::array<Eigen::Index, 1> extents;
  extents[0] = num_classes;
  return WeightedGiniImpurity(splits.slice(offsets, extents)) +
         WeightedGiniImpurity(rights.slice(offsets, extents));
}

void GetTwoBestClassification(const Tensor& total_counts,
                              const Tensor& split_counts, int32 accumulator,
                              float* best_score, int* best_index,
                              float* second_best_score,
                              int* second_best_index) {
  const int32 num_splits = static_cast<int32>(split_counts.shape().dim_size(1));
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  // Ideally, Eigen::Tensor::chip would be best to use here but it results
  // in seg faults, so we have to go with flat views of these tensors.  However,
  // it is still pretty efficient because we put off evaluation until the
  // score is actually returned.
  const auto tc =
      total_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();

  // TODO(gilberth): See if we can delay evaluation here by templating the
  // arguments to ClassificationSplitScore.
  const Eigen::Tensor<float, 1, Eigen::RowMajor> splits =
      split_counts.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  Eigen::array<Eigen::Index, 1> bcast;
  bcast[0] = num_splits;
  const Eigen::Tensor<float, 1, Eigen::RowMajor> rights =
      tc.broadcast(bcast) - splits;

  std::function<float(int)> score_fn =
      std::bind(ClassificationSplitScore, splits, rights, num_classes,
                std::placeholders::_1);

  GetTwoBest(num_splits, score_fn, best_score, best_index, second_best_score,
             second_best_index);
}

int32 BestFeatureClassification(const Tensor& total_counts,
                                const Tensor& split_counts, int32 accumulator) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  return best_feature_index;
}

float RegressionSplitScore(
    const Eigen::Tensor<float, 3, Eigen::RowMajor>& splits_count_accessor,
    const Eigen::Tensor<float, 2, Eigen::RowMajor>& totals_count_accessor,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_sum,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& splits_square,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_sums,
    const Eigen::Tensor<float, 1, Eigen::RowMajor>& right_squares,
    int32 accumulator, int32 num_regression_dims, int i) {
  Eigen::array<Eigen::Index, 1> offsets = {i * num_regression_dims + 1};
  Eigen::array<Eigen::Index, 1> extents = {num_regression_dims - 1};
  float left_count = splits_count_accessor(accumulator, i, 0);
  float right_count = totals_count_accessor(accumulator, 0) - left_count;

  float score = 0;

  // Guard against divide-by-zero.
  if (left_count > 0) {
    score +=
        WeightedVariance(splits_sum.slice(offsets, extents),
                         splits_square.slice(offsets, extents), left_count);
  }

  if (right_count > 0) {
    score +=
        WeightedVariance(right_sums.slice(offsets, extents),
                         right_squares.slice(offsets, extents), right_count);
  }
  return score;
}

void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares,
                          const Tensor& split_sums, const Tensor& split_squares,
                          int32 accumulator, float* best_score, int* best_index,
                          float* second_best_score, int* second_best_index) {
  const int32 num_splits = static_cast<int32>(split_sums.shape().dim_size(1));
  const int32 num_regression_dims =
      static_cast<int32>(split_sums.shape().dim_size(2));
  // Ideally, Eigen::Tensor::chip would be best to use here but it results
  // in seg faults, so we have to go with flat views of these tensors.  However,
  // it is still pretty efficient because we put off evaluation until the
  // score is actually returned.
  const auto tc_sum =
      total_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto tc_square =
      total_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto splits_sum =
      split_sums.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  const auto splits_square =
      split_squares.Slice(accumulator, accumulator + 1).unaligned_flat<float>();
  // Eigen is infuriating to work with, usually resulting in all kinds of
  // unhelpful compiler errors when trying something that seems sane.  This
  // helps us do a simple thing like access the first element (the counts)
  // of these tensors so we can calculate expected value in Variance().
  const auto splits_count_accessor = split_sums.tensor<float, 3>();
  const auto totals_count_accessor = total_sums.tensor<float, 2>();

  Eigen::array<Eigen::Index, 1> bcast;
  bcast[0] = num_splits;
  const auto right_sums = tc_sum.broadcast(bcast) - splits_sum;
  const auto right_squares = tc_square.broadcast(bcast) - splits_square;

  GetTwoBest(num_splits,
             std::bind(RegressionSplitScore, splits_count_accessor,
                       totals_count_accessor, splits_sum, splits_square,
                       right_sums, right_squares, accumulator,
                       num_regression_dims, std::placeholders::_1),
             best_score, best_index, second_best_score, second_best_index);
}

int32 BestFeatureRegression(const Tensor& total_sums,
                            const Tensor& total_squares,
                            const Tensor& split_sums,
                            const Tensor& split_squares, int32 accumulator) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestRegression(total_sums, total_squares, split_sums, split_squares,
                       accumulator, &best_score, &best_feature_index,
                       &second_best_score, &second_best_index);
  return best_feature_index;
}

bool BestSplitDominatesRegression(const Tensor& total_sums,
                                  const Tensor& total_squares,
                                  const Tensor& split_sums,
                                  const Tensor& split_squares,
                                  int32 accumulator) {
  // TODO(thomaswc): Implement this, probably as part of v3.
  return false;
}

int BootstrapGini(int n, int s, const random::DistributionSampler& ds,
                  random::SimplePhilox* rand) {
  std::vector<int> counts(s, 0);
  for (int i = 0; i < n; i++) {
    int j = ds.Sample(rand);
    counts[j] += 1;
  }
  int g = 0;
  for (int j = 0; j < s; j++) {
    g += counts[j] * counts[j];
  }
  // The true gini is 1 + (-g) / n^2
  return -g;
}

// Populate *weights with the smoothed per-class frequencies needed to
// initialize a DistributionSampler.  Returns the total number of samples
// seen by this accumulator.
int MakeBootstrapWeights(const Tensor& total_counts, const Tensor& split_counts,
                         int32 accumulator, int index,
                         std::vector<float>* weights) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  int n = tc(accumulator, 0);

  float denom = static_cast<float>(n) + static_cast<float>(num_classes);

  weights->resize(num_classes * 2);
  for (int i = 0; i < num_classes; i++) {
    // Use the Laplace smoothed per-class probabilities when generating the
    // bootstrap samples.
    float left_count = lc(accumulator, index, i + 1);
    (*weights)[i] = (left_count + 1.0) / denom;
    float right_count = tc(accumulator, i + 1) - left_count;
    (*weights)[num_classes + i] = (right_count + 1.0) / denom;
  }

  return n;
}

bool BestSplitDominatesClassificationBootstrap(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction,
                                               random::SimplePhilox* rand) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);

  std::vector<float> weights1;
  int n1 = MakeBootstrapWeights(total_counts, split_counts, accumulator,
                                best_feature_index, &weights1);
  random::DistributionSampler ds1(weights1);

  std::vector<float> weights2;
  int n2 = MakeBootstrapWeights(total_counts, split_counts, accumulator,
                                second_best_index, &weights2);
  random::DistributionSampler ds2(weights2);

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  float p = 1.0 - dominate_fraction;
  if (p <= 0 || p > 1.0) {
    LOG(FATAL) << "Invalid dominate fraction " << dominate_fraction;
  }

  int bootstrap_samples = 1;
  while (p < 1.0) {
    bootstrap_samples += 1;
    p = p * 2;
  }

  int worst_g1 = 0;
  for (int i = 0; i < bootstrap_samples; i++) {
    int g1 = BootstrapGini(n1, 2 * num_classes, ds1, rand);
    worst_g1 = std::max(worst_g1, g1);
  }

  int best_g2 = 99;
  for (int i = 0; i < bootstrap_samples; i++) {
    int g2 = BootstrapGini(n2, 2 * num_classes, ds2, rand);
    best_g2 = std::min(best_g2, g2);
  }

  return worst_g1 < best_g2;
}

bool BestSplitDominatesClassificationHoeffding(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  VLOG(1) << "BSDC for accumulator " << accumulator;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  VLOG(1) << "Best score = " << best_score;
  VLOG(1) << "2nd best score = " << second_best_score;

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;
  const float n = total_counts.Slice(accumulator, accumulator + 1)
                      .unaligned_flat<float>()(0);

  // Each term in the Gini impurity can range from 0 to 0.5 * 0.5.
  float range = 0.25 * static_cast<float>(num_classes) * n;

  float hoeffding_bound =
      range * sqrt(log(1.0 / (1.0 - dominate_fraction)) / (2.0 * n));

  VLOG(1) << "num_classes = " << num_classes;
  VLOG(1) << "n = " << n;
  VLOG(1) << "range = " << range;
  VLOG(1) << "hoeffding_bound = " << hoeffding_bound;
  return (second_best_score - best_score) > hoeffding_bound;
}

double DirichletCovarianceTrace(const Tensor& total_counts,
                                const Tensor& split_counts, int32 accumulator,
                                int index) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  double leftc = 0.0;
  double leftc2 = 0.0;
  double rightc = 0.0;
  double rightc2 = 0.0;
  for (int i = 1; i <= num_classes; i++) {
    double l = lc(accumulator, index, i) + 1.0;
    leftc += l;
    leftc2 += l * l;

    double r = tc(accumulator, i) - lc(accumulator, index, i) + 1.0;
    rightc += r;
    rightc2 += r * r;
  }

  double left_trace = (1.0 - leftc2 / (leftc * leftc)) / (leftc + 1.0);
  double right_trace = (1.0 - rightc2 / (rightc * rightc)) / (rightc + 1.0);
  return left_trace + right_trace;
}

void getDirichletMean(const Tensor& total_counts, const Tensor& split_counts,
                      int32 accumulator, int index, std::vector<float>* mu) {
  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;

  mu->resize(num_classes * 2);
  auto tc = total_counts.tensor<float, 2>();
  auto lc = split_counts.tensor<float, 3>();

  double total = tc(accumulator, 0);

  for (int i = 0; i < num_classes; i++) {
    double l = lc(accumulator, index, i + 1);
    mu->at(i) = (l + 1.0) / (total + num_classes);

    double r = tc(accumulator, i) - l;
    mu->at(i + num_classes) = (r + 1.) / (total + num_classes);
  }
}

// Given lambda3, returns the distance from (mu1, mu2) to the surface.
double getDistanceFromLambda3(double lambda3, const std::vector<float>& mu1,
                              const std::vector<float>& mu2) {
  if (fabs(lambda3) == 1.0) {
    return 0.0;
  }

  int n = mu1.size();
  double lambda1 = -2.0 * lambda3 / n;
  double lambda2 = 2.0 * lambda3 / n;
  // From below,
  //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
  //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
  double dist = 0.0;
  for (size_t i = 0; i < mu1.size(); i++) {
    double diff = (lambda1 + 2.0 * mu1[i]) / (2.0 - 2.0 * lambda3) - mu1[i];
    dist += diff * diff;
    diff = (lambda2 + 2.0 * mu2[i]) / (2.0 + 2.0 * lambda3) - mu2[i];
    dist += diff * diff;
  }
  return dist;
}

// Returns the distance between (mu1, mu2) and (x, y), where (x, y) is the
// nearest point that lies on the surface defined by
// {x dot 1 = 1, y dot 1 = 1, x dot x - y dot y = 0}.
double getChebyshevEpsilon(const std::vector<float>& mu1,
                           const std::vector<float>& mu2) {
  // Math time!!
  // We are trying to minimize d = |mu1 - x|^2 + |mu2 - y|^2 over the surface.
  // Using Lagrange multipliers, we get
  //   partial d / partial x = -2 mu1 + 2 x = lambda_1 1 + 2 lambda_3 x
  //   partial d / partial y = -2 mu2 + 2 y = lambda_2 1 - 2 lambda_3 y
  // or
  //   x = (lambda_1 1 + 2 mu1) / (2 - 2 lambda_3)
  //   y = (lambda_2 1 + 2 mu2) / (2 + 2 lambda_3)
  // which implies
  //   2 - 2 lambda_3 = lambda_1 1 dot 1 + 2 mu1 dot 1
  //   2 + 2 lambda_3 = lambda_2 1 dot 1 + 2 mu2 dot 1
  //   |lambda_1 1 + 2 mu1|^2 (2 + 2 lambda_3)^2 =
  //     |lambda_2 1 + 2 mu2|^2 (2 - 2 lambda_3)^2
  // So solving for the lambda's and using the fact that
  // mu1 dot 1 = 1 and mu2 dot 1 = 1,
  //   lambda_1 = -2 lambda_3 / (1 dot 1)
  //   lambda_2 = 2 lambda_3 / (1 dot 1)
  // and (letting n = 1 dot 1)
  //   | - lambda_3 1 + n mu1 |^2 (1 + lambda_3)^2 =
  //   | lambda_3 1 + n mu2 |^2 (1 - lambda_3)^2
  // or
  // (lambda_3^2 n - 2 n lambda_3 + n^2 mu1 dot mu1)(1 + lambda_3)^2 =
  // (lambda_3^2 n + 2 n lambda_3 + n^2 mu2 dot mu2)(1 - lambda_3)^2
  // or
  // (lambda_3^2 - 2 lambda_3 + n mu1 dot mu1)(1 + 2 lambda_3 + lambda_3^2) =
  // (lambda_3^2 + 2 lambda_3 + n mu2 dot mu2)(1 - 2 lambda_3 + lambda_3^2)
  // or
  // lambda_3^2 - 2 lambda_3 + n mu1 dot mu1
  // + 2 lambda_3^3 - 2 lambda_3^2 + 2n lambda_3 mu1 dot mu1
  // + lambda_3^4 - 2 lambda_3^3 + n lambda_3^2 mu1 dot mu1
  // =
  // lambda_3^2 + 2 lambda_3 + n mu2 dot mu2
  // - 2 lambda_3^3 -4 lambda_3^2 - 2n lambda_3 mu2 dot mu2
  // + lambda_3^4 + 2 lambda_3^3 + n lambda_3^2 mu2 dot mu2
  // or
  // - 2 lambda_3 + n mu1 dot mu1
  // - 2 lambda_3^2 + 2n lambda_3 mu1 dot mu1
  // + n lambda_3^2 mu1 dot mu1
  // =
  // + 2 lambda_3 + n mu2 dot mu2
  // -4 lambda_3^2 - 2n lambda_3 mu2 dot mu2
  // + n lambda_3^2 mu2 dot mu2
  // or
  // lambda_3^2 (2 + n mu1 dot mu1 + n mu2 dot mu2)
  // + lambda_3 (2n mu1 dot mu1 + 2n mu2 dot mu2 - 4)
  // + n mu1 dot mu1 - n mu2 dot mu2 = 0
  // which can be solved using the quadratic formula.
  int n = mu1.size();
  double len1 = 0.0;
  for (float m : mu1) {
    len1 += m * m;
  }
  double len2 = 0.0;
  for (float m : mu2) {
    len2 += m * m;
  }
  double a = 2 + n * (len1 + len2);
  double b = 2 * n * (len1 + len2) - 4;
  double c = n * (len1 - len2);
  double discrim = b * b - 4 * a * c;
  if (discrim < 0.0) {
    LOG(WARNING) << "Negative discriminant " << discrim;
    return 0.0;
  }

  double sdiscrim = sqrt(discrim);
  // TODO(thomaswc): Analyze whatever one of these is always closer.
  double v1 = (-b + sdiscrim) / (2 * a);
  double v2 = (-b - sdiscrim) / (2 * a);
  double dist1 = getDistanceFromLambda3(v1, mu1, mu2);
  double dist2 = getDistanceFromLambda3(v2, mu1, mu2);
  return std::min(dist1, dist2);
}

bool BestSplitDominatesClassificationChebyshev(const Tensor& total_counts,
                                               const Tensor& split_counts,
                                               int32 accumulator,
                                               float dominate_fraction) {
  float best_score;
  float second_best_score;
  int best_feature_index;
  int second_best_index;
  VLOG(1) << "BSDC for accumulator " << accumulator;
  GetTwoBestClassification(total_counts, split_counts, accumulator, &best_score,
                           &best_feature_index, &second_best_score,
                           &second_best_index);
  VLOG(1) << "Best score = " << best_score;
  VLOG(1) << "2nd best score = " << second_best_score;

  const int32 num_classes =
      static_cast<int32>(split_counts.shape().dim_size(2)) - 1;
  const float n = total_counts.Slice(accumulator, accumulator + 1)
                      .unaligned_flat<float>()(0);

  VLOG(1) << "num_classes = " << num_classes;
  VLOG(1) << "n = " << n;
  double trace = DirichletCovarianceTrace(total_counts, split_counts,
                                          accumulator, best_feature_index) +
                 DirichletCovarianceTrace(total_counts, split_counts,
                                          accumulator, second_best_index);

  std::vector<float> mu1;
  getDirichletMean(total_counts, split_counts, accumulator, best_feature_index,
                   &mu1);
  std::vector<float> mu2;
  getDirichletMean(total_counts, split_counts, accumulator, second_best_index,
                   &mu2);
  double epsilon = getChebyshevEpsilon(mu1, mu2);

  if (epsilon == 0.0) {
    return false;
  }

  double dirichlet_bound = 1.0 - trace / (epsilon * epsilon);
  return dirichlet_bound > dominate_fraction;
}

GetFeatureFnType GetDenseFunctor(const Tensor& dense) {
  if (dense.shape().dims() == 2) {
    const auto dense_features = dense.matrix<float>();
    // Here we capture by value, which shouldn't incur a copy of the data
    // because of the underlying use of Eigen::TensorMap.
    return [dense_features](int32 i, int32 feature) {
      return dense_features(i, feature);
    };
  } else {
    return [](int32 i, int32 feature) {
      LOG(ERROR) << "trying to access nonexistent dense features.";
      return 0;
    };
  }
}

GetFeatureFnType GetSparseFunctor(const Tensor& sparse_indices,
                                  const Tensor& sparse_values) {
  if (sparse_indices.shape().dims() == 2) {
    const auto indices = sparse_indices.matrix<int64>();
    const auto values = sparse_values.vec<float>();
    // Here we capture by value, which shouldn't incur a copy of the data
    // because of the underlying use of Eigen::TensorMap.
    return [indices, values](int32 i, int32 feature) {
      return tensorforest::FindSparseValue(indices, values, i, feature);
    };
  } else {
    return [](int32 i, int32 feature) {
      LOG(ERROR) << "trying to access nonexistent sparse features.";
      return 0;
    };
  }
}

bool DecideNode(const GetFeatureFnType& get_dense,
                const GetFeatureFnType& get_sparse, int32 i, int32 feature,
                float bias, const tensorforest::TensorForestDataSpec& spec) {
  if (feature < spec.dense_features_size()) {
    return Decide(get_dense(i, feature), bias,
                  FindDenseFeatureSpec(feature, spec));
  } else {
    const int32 sparse_feature = feature - spec.dense_features_size();
    return Decide(get_sparse(i, sparse_feature), bias,
                  FindSparseFeatureSpec(sparse_feature, spec));
  }
}

bool Decide(float value, float bias, DataColumnTypes type) {
  switch (type) {
    case kDataFloat:
      return value >= bias;

    case kDataCategorical:
      // We arbitrarily define categorical equality as going left.
      return value != bias;

    default:
      LOG(ERROR) << "Got unknown column type: " << type;
      return false;
  }
}

void GetParentWeightedMean(float leaf_sum, const float* leaf_data,
                           float parent_sum, const float* parent_data,
                           float valid_leaf_threshold, int num_outputs,
                           std::vector<float>* mean) {
  float parent_weight = 0.0;
  if (leaf_sum < valid_leaf_threshold && parent_sum >= 0) {
    VLOG(1) << "not enough samples at leaf, including parent counts."
            << "child sum = " << leaf_sum;
    // Weight the parent's counts just enough so that the new sum is
    // valid_leaf_threshold_, but never give any counts a weight of
    // more than 1.
    parent_weight =
        std::min(1.0f, (valid_leaf_threshold - leaf_sum) / parent_sum);
    leaf_sum += parent_weight * parent_sum;
    VLOG(1) << "Sum w/ parent included = " << leaf_sum;
  }

  for (int c = 0; c < num_outputs; c++) {
    float w = leaf_data[c];
    if (parent_weight > 0.0) {
      w += parent_weight * parent_data[c];
    }
    (*mean)[c] = w / leaf_sum;
  }
}

}  // namespace tensorforest
}  // namespace tensorflow