1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/math_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/bincount_op.h" 21 #include "tensorflow/core/framework/op_kernel.h" 22 #include "tensorflow/core/framework/register_types.h" 23 #include "tensorflow/core/framework/types.h" 24 #include "tensorflow/core/lib/core/threadpool.h" 25 #include "tensorflow/core/platform/types.h" 26 27 namespace tensorflow { 28 29 using thread::ThreadPool; 30 31 typedef Eigen::ThreadPoolDevice CPUDevice; 32 typedef Eigen::GpuDevice GPUDevice; 33 34 namespace functor { 35 36 template <typename T> 37 struct BincountFunctor<CPUDevice, T> { Computetensorflow::functor::BincountFunctor38 static Status Compute(OpKernelContext* context, 39 const typename TTypes<int32, 1>::ConstTensor& arr, 40 const typename TTypes<T, 1>::ConstTensor& weights, 41 typename TTypes<T, 1>::Tensor& output) { 42 int size = output.size(); 43 44 Tensor all_nonneg_t; 45 TF_RETURN_IF_ERROR(context->allocate_temp( 46 DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes())); 47 all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) = 48 (arr >= 0).all(); 49 if (!all_nonneg_t.scalar<bool>()()) { 50 return errors::InvalidArgument("Input arr must be non-negative!"); 51 } 52 53 // Allocate partial output bin sums for each worker thread. Worker ids in 54 // ParallelForWithWorkerId range from 0 to NumThreads() inclusive. 55 ThreadPool* thread_pool = 56 context->device()->tensorflow_cpu_worker_threads()->workers; 57 const int64 num_threads = thread_pool->NumThreads() + 1; 58 Tensor partial_bins_t; 59 TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum<T>::value, 60 TensorShape({num_threads, size}), 61 &partial_bins_t)); 62 auto partial_bins = partial_bins_t.matrix<T>(); 63 partial_bins.setZero(); 64 thread_pool->ParallelForWithWorkerId( 65 arr.size(), 8 /* cost */, 66 [&](int64 start_ind, int64 limit_ind, int64 worker_id) { 67 for (int64 i = start_ind; i < limit_ind; i++) { 68 int32 value = arr(i); 69 if (value < size) { 70 if (weights.size()) { 71 partial_bins(worker_id, value) += weights(i); 72 } else { 73 // Complex numbers don't support "++". 74 partial_bins(worker_id, value) += T(1); 75 } 76 } 77 } 78 }); 79 80 // Sum the partial bins along the 0th axis. 81 Eigen::array<int, 1> reduce_dims({0}); 82 output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims); 83 return Status::OK(); 84 } 85 }; 86 87 } // namespace functor 88 89 template <typename Device, typename T> 90 class BincountOp : public OpKernel { 91 public: BincountOp(OpKernelConstruction * ctx)92 explicit BincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} 93 Compute(OpKernelContext * ctx)94 void Compute(OpKernelContext* ctx) override { 95 const Tensor& arr_t = ctx->input(0); 96 const Tensor& size_tensor = ctx->input(1); 97 const Tensor& weights_t = ctx->input(2); 98 99 int32 size = size_tensor.scalar<int32>()(); 100 OP_REQUIRES( 101 ctx, size >= 0, 102 errors::InvalidArgument("size (", size, ") must be non-negative")); 103 104 const auto arr = arr_t.flat<int32>(); 105 const auto weights = weights_t.flat<T>(); 106 Tensor* output_t; 107 OP_REQUIRES_OK(ctx, 108 ctx->allocate_output(0, TensorShape({size}), &output_t)); 109 auto output = output_t->flat<T>(); 110 OP_REQUIRES_OK(ctx, functor::BincountFunctor<Device, T>::Compute( 111 ctx, arr, weights, output)); 112 } 113 }; 114 115 #define REGISTER_KERNELS(type) \ 116 REGISTER_KERNEL_BUILDER( \ 117 Name("Bincount").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 118 BincountOp<CPUDevice, type>) 119 120 TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); 121 #undef REGISTER_KERNELS 122 123 #if GOOGLE_CUDA 124 125 #define REGISTER_KERNELS(type) \ 126 REGISTER_KERNEL_BUILDER(Name("Bincount") \ 127 .Device(DEVICE_GPU) \ 128 .HostMemory("size") \ 129 .TypeConstraint<type>("T"), \ 130 BincountOp<GPUDevice, type>) 131 132 TF_CALL_int32(REGISTER_KERNELS); 133 TF_CALL_float(REGISTER_KERNELS); 134 #undef REGISTER_KERNELS 135 136 #endif // GOOGLE_CUDA 137 138 } // end namespace tensorflow 139