1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/image_ops.cc
17
18 #define EIGEN_USE_THREADS
19
20 #include "tensorflow/core/kernels/image/non_max_suppression_op.h"
21
22 #include <cmath>
23 #include <functional>
24 #include <queue>
25 #include <vector>
26
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/bounds_check.h"
29 #include "tensorflow/core/framework/op_kernel.h"
30 #include "tensorflow/core/framework/register_types.h"
31 #include "tensorflow/core/framework/tensor.h"
32 #include "tensorflow/core/framework/tensor_shape.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/lib/core/status.h"
35 #include "tensorflow/core/platform/logging.h"
36
37 namespace tensorflow {
38 namespace {
39
40 typedef Eigen::ThreadPoolDevice CPUDevice;
41
CheckScoreSizes(OpKernelContext * context,int num_boxes,const Tensor & scores)42 static inline void CheckScoreSizes(OpKernelContext* context, int num_boxes,
43 const Tensor& scores) {
44 // The shape of 'scores' is [num_boxes]
45 OP_REQUIRES(context, scores.dims() == 1,
46 errors::InvalidArgument(
47 "scores must be 1-D", scores.shape().DebugString(),
48 " (Shape must be rank 1 but is rank ", scores.dims(), ")"));
49 OP_REQUIRES(
50 context, scores.dim_size(0) == num_boxes,
51 errors::InvalidArgument("scores has incompatible shape (Dimensions must "
52 "be equal, but are ",
53 num_boxes, " and ", scores.dim_size(0), ")"));
54 }
55
ParseAndCheckOverlapSizes(OpKernelContext * context,const Tensor & overlaps,int * num_boxes)56 static inline void ParseAndCheckOverlapSizes(OpKernelContext* context,
57 const Tensor& overlaps,
58 int* num_boxes) {
59 // the shape of 'overlaps' is [num_boxes, num_boxes]
60 OP_REQUIRES(context, overlaps.dims() == 2,
61 errors::InvalidArgument("overlaps must be 2-D",
62 overlaps.shape().DebugString()));
63
64 *num_boxes = overlaps.dim_size(0);
65 OP_REQUIRES(context, overlaps.dim_size(1) == *num_boxes,
66 errors::InvalidArgument("overlaps must be square",
67 overlaps.shape().DebugString()));
68 }
69
ParseAndCheckBoxSizes(OpKernelContext * context,const Tensor & boxes,int * num_boxes)70 static inline void ParseAndCheckBoxSizes(OpKernelContext* context,
71 const Tensor& boxes, int* num_boxes) {
72 // The shape of 'boxes' is [num_boxes, 4]
73 OP_REQUIRES(context, boxes.dims() == 2,
74 errors::InvalidArgument(
75 "boxes must be 2-D", boxes.shape().DebugString(),
76 " (Shape must be rank 2 but is rank ", boxes.dims(), ")"));
77 *num_boxes = boxes.dim_size(0);
78 OP_REQUIRES(context, boxes.dim_size(1) == 4,
79 errors::InvalidArgument("boxes must have 4 columns (Dimension "
80 "must be 4 but is ",
81 boxes.dim_size(1), ")"));
82 }
83
CheckCombinedNMSScoreSizes(OpKernelContext * context,int num_boxes,const Tensor & scores)84 static inline void CheckCombinedNMSScoreSizes(OpKernelContext* context,
85 int num_boxes,
86 const Tensor& scores) {
87 // The shape of 'scores' is [batch_size, num_boxes, num_classes]
88 OP_REQUIRES(context, scores.dims() == 3,
89 errors::InvalidArgument("scores must be 3-D",
90 scores.shape().DebugString()));
91 OP_REQUIRES(context, scores.dim_size(1) == num_boxes,
92 errors::InvalidArgument("scores has incompatible shape"));
93 }
94
ParseAndCheckCombinedNMSBoxSizes(OpKernelContext * context,const Tensor & boxes,int * num_boxes,const int num_classes)95 static inline void ParseAndCheckCombinedNMSBoxSizes(OpKernelContext* context,
96 const Tensor& boxes,
97 int* num_boxes,
98 const int num_classes) {
99 // The shape of 'boxes' is [batch_size, num_boxes, q, 4]
100 OP_REQUIRES(context, boxes.dims() == 4,
101 errors::InvalidArgument("boxes must be 4-D",
102 boxes.shape().DebugString()));
103
104 bool box_check = boxes.dim_size(2) == 1 || boxes.dim_size(2) == num_classes;
105 OP_REQUIRES(context, box_check,
106 errors::InvalidArgument(
107 "third dimension of boxes must be either 1 or num classes"));
108 *num_boxes = boxes.dim_size(1);
109 OP_REQUIRES(context, boxes.dim_size(3) == 4,
110 errors::InvalidArgument("boxes must have 4 columns"));
111 }
112 // Return intersection-over-union overlap between boxes i and j
113 template <typename T>
IOU(typename TTypes<T,2>::ConstTensor boxes,int i,int j)114 static inline T IOU(typename TTypes<T, 2>::ConstTensor boxes, int i, int j) {
115 const T ymin_i = std::min<T>(boxes(i, 0), boxes(i, 2));
116 const T xmin_i = std::min<T>(boxes(i, 1), boxes(i, 3));
117 const T ymax_i = std::max<T>(boxes(i, 0), boxes(i, 2));
118 const T xmax_i = std::max<T>(boxes(i, 1), boxes(i, 3));
119 const T ymin_j = std::min<T>(boxes(j, 0), boxes(j, 2));
120 const T xmin_j = std::min<T>(boxes(j, 1), boxes(j, 3));
121 const T ymax_j = std::max<T>(boxes(j, 0), boxes(j, 2));
122 const T xmax_j = std::max<T>(boxes(j, 1), boxes(j, 3));
123 const T area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i);
124 const T area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j);
125 if (area_i <= static_cast<T>(0) || area_j <= static_cast<T>(0)) {
126 return static_cast<T>(0.0);
127 }
128 const T intersection_ymin = std::max<T>(ymin_i, ymin_j);
129 const T intersection_xmin = std::max<T>(xmin_i, xmin_j);
130 const T intersection_ymax = std::min<T>(ymax_i, ymax_j);
131 const T intersection_xmax = std::min<T>(xmax_i, xmax_j);
132 const T intersection_area =
133 std::max<T>(intersection_ymax - intersection_ymin, static_cast<T>(0.0)) *
134 std::max<T>(intersection_xmax - intersection_xmin, static_cast<T>(0.0));
135 return intersection_area / (area_i + area_j - intersection_area);
136 }
137
138 template <typename T>
Overlap(typename TTypes<T,2>::ConstTensor overlaps,int i,int j)139 static inline T Overlap(typename TTypes<T, 2>::ConstTensor overlaps, int i,
140 int j) {
141 return overlaps(i, j);
142 }
143
144 template <typename T>
CreateIOUSimilarityFn(const Tensor & boxes)145 static inline std::function<T(int, int)> CreateIOUSimilarityFn(
146 const Tensor& boxes) {
147 typename TTypes<T, 2>::ConstTensor boxes_data = boxes.tensor<T, 2>();
148 return std::bind(&IOU<T>, boxes_data, std::placeholders::_1,
149 std::placeholders::_2);
150 }
151
152 template <typename T>
CreateOverlapSimilarityFn(const Tensor & overlaps)153 static inline std::function<T(int, int)> CreateOverlapSimilarityFn(
154 const Tensor& overlaps) {
155 typename TTypes<T, 2>::ConstTensor overlaps_data =
156 overlaps.tensor<float, 2>();
157 return std::bind(&Overlap<T>, overlaps_data, std::placeholders::_1,
158 std::placeholders::_2);
159 }
160
161 template <typename T>
DoNonMaxSuppressionOp(OpKernelContext * context,const Tensor & scores,int num_boxes,const Tensor & max_output_size,const T similarity_threshold,const T score_threshold,const T soft_nms_sigma,const std::function<T (int,int)> & similarity_fn,bool return_scores_tensor=false,bool pad_to_max_output_size=false,int * ptr_num_valid_outputs=nullptr)162 void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
163 int num_boxes, const Tensor& max_output_size,
164 const T similarity_threshold,
165 const T score_threshold, const T soft_nms_sigma,
166 const std::function<T(int, int)>& similarity_fn,
167 bool return_scores_tensor = false,
168 bool pad_to_max_output_size = false,
169 int* ptr_num_valid_outputs = nullptr) {
170 const int output_size = max_output_size.scalar<int>()();
171
172 std::vector<T> scores_data(num_boxes);
173 std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
174
175 // Data structure for a selection candidate in NMS.
176 struct Candidate {
177 int box_index;
178 T score;
179 int suppress_begin_index;
180 };
181
182 auto cmp = [](const Candidate bs_i, const Candidate bs_j) {
183 return ((bs_i.score == bs_j.score) && (bs_i.box_index > bs_j.box_index)) ||
184 bs_i.score < bs_j.score;
185 };
186 std::priority_queue<Candidate, std::deque<Candidate>, decltype(cmp)>
187 candidate_priority_queue(cmp);
188 for (int i = 0; i < scores_data.size(); ++i) {
189 if (scores_data[i] > score_threshold) {
190 candidate_priority_queue.emplace(Candidate({i, scores_data[i], 0}));
191 }
192 }
193
194 T scale = static_cast<T>(0.0);
195 bool is_soft_nms = soft_nms_sigma > static_cast<T>(0.0);
196 if (is_soft_nms) {
197 scale = static_cast<T>(-0.5) / soft_nms_sigma;
198 }
199
200 auto suppress_weight = [similarity_threshold, scale,
201 is_soft_nms](const T sim) {
202 const T weight =
203 static_cast<T>(std::exp(static_cast<float>(scale * sim * sim)));
204 return is_soft_nms || sim <= similarity_threshold ? weight
205 : static_cast<T>(0.0);
206 };
207
208 std::vector<int> selected;
209 std::vector<T> selected_scores;
210 T similarity, original_score;
211 Candidate next_candidate;
212
213 while (selected.size() < output_size && !candidate_priority_queue.empty()) {
214 next_candidate = candidate_priority_queue.top();
215 original_score = next_candidate.score;
216 candidate_priority_queue.pop();
217
218 // Overlapping boxes are likely to have similar scores, therefore we
219 // iterate through the previously selected boxes backwards in order to
220 // see if `next_candidate` should be suppressed. We also enforce a property
221 // that a candidate can be suppressed by another candidate no more than
222 // once via `suppress_begin_index` which tracks which previously selected
223 // boxes have already been compared against next_candidate prior to a given
224 // iteration. These previous selected boxes are then skipped over in the
225 // following loop.
226 bool should_hard_suppress = false;
227 for (int j = static_cast<int>(selected.size()) - 1;
228 j >= next_candidate.suppress_begin_index; --j) {
229 similarity = similarity_fn(next_candidate.box_index, selected[j]);
230
231 next_candidate.score *= suppress_weight(similarity);
232
233 // First decide whether to perform hard suppression
234 if (!is_soft_nms && similarity > static_cast<T>(similarity_threshold)) {
235 should_hard_suppress = true;
236 break;
237 }
238
239 // If next_candidate survives hard suppression, apply soft suppression
240 if (next_candidate.score <= score_threshold) break;
241 }
242 // If `next_candidate.score` has not dropped below `score_threshold`
243 // by this point, then we know that we went through all of the previous
244 // selections and can safely update `suppress_begin_index` to
245 // `selected.size()`. If on the other hand `next_candidate.score`
246 // *has* dropped below the score threshold, then since `suppress_weight`
247 // always returns values in [0, 1], further suppression by items that were
248 // not covered in the above for loop would not have caused the algorithm
249 // to select this item. We thus do the same update to
250 // `suppress_begin_index`, but really, this element will not be added back
251 // into the priority queue in the following.
252 next_candidate.suppress_begin_index = selected.size();
253
254 if (!should_hard_suppress) {
255 if (next_candidate.score == original_score) {
256 // Suppression has not occurred, so select next_candidate
257 selected.push_back(next_candidate.box_index);
258 selected_scores.push_back(next_candidate.score);
259 continue;
260 }
261 if (next_candidate.score > score_threshold) {
262 // Soft suppression has occurred and current score is still greater than
263 // score_threshold; add next_candidate back onto priority queue.
264 candidate_priority_queue.push(next_candidate);
265 }
266 }
267 }
268
269 int num_valid_outputs = selected.size();
270 if (pad_to_max_output_size) {
271 selected.resize(output_size, 0);
272 selected_scores.resize(output_size, static_cast<T>(0));
273 }
274 if (ptr_num_valid_outputs) {
275 *ptr_num_valid_outputs = num_valid_outputs;
276 }
277
278 // Allocate output tensors
279 Tensor* output_indices = nullptr;
280 TensorShape output_shape({static_cast<int>(selected.size())});
281 OP_REQUIRES_OK(context,
282 context->allocate_output(0, output_shape, &output_indices));
283 TTypes<int, 1>::Tensor output_indices_data = output_indices->tensor<int, 1>();
284 std::copy_n(selected.begin(), selected.size(), output_indices_data.data());
285
286 if (return_scores_tensor) {
287 Tensor* output_scores = nullptr;
288 OP_REQUIRES_OK(context,
289 context->allocate_output(1, output_shape, &output_scores));
290 typename TTypes<T, 1>::Tensor output_scores_data =
291 output_scores->tensor<T, 1>();
292 std::copy_n(selected_scores.begin(), selected_scores.size(),
293 output_scores_data.data());
294 }
295 }
296
297 struct ResultCandidate {
298 int box_index;
299 float score;
300 int class_idx;
301 float box_coord[4];
302 };
303
DoNMSPerClass(int batch_idx,int class_idx,const float * boxes_data,const float * scores_data,int num_boxes,int q,int num_classes,const int size_per_class,const float score_threshold,const float iou_threshold,std::vector<ResultCandidate> & result_candidate_vec)304 void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
305 const float* scores_data, int num_boxes, int q,
306 int num_classes, const int size_per_class,
307 const float score_threshold, const float iou_threshold,
308 std::vector<ResultCandidate>& result_candidate_vec) {
309 std::vector<float> class_scores_data;
310 class_scores_data.reserve(num_boxes);
311 std::vector<float> class_boxes_data;
312 class_boxes_data.reserve(num_boxes * 4);
313
314 for (int box_idx = 0; box_idx < num_boxes; ++box_idx) {
315 class_scores_data.push_back(scores_data[box_idx * num_classes + class_idx]);
316 for (int cid = 0; cid < 4; ++cid) {
317 if (q > 1) {
318 class_boxes_data.push_back(
319 boxes_data[(box_idx * q + class_idx) * 4 + cid]);
320 } else {
321 class_boxes_data.push_back(boxes_data[box_idx * 4 + cid]);
322 }
323 }
324 }
325
326 // Do NMS, get the candidate indices of form vector<int>
327 // Data structure for selection candidate in NMS.
328 struct Candidate {
329 int box_index;
330 float score;
331 };
332 auto cmp = [](const Candidate bs_i, const Candidate bs_j) {
333 return bs_i.score < bs_j.score;
334 };
335 std::priority_queue<Candidate, std::vector<Candidate>, decltype(cmp)>
336 candidate_priority_queue(cmp);
337 for (int i = 0; i < num_boxes; ++i) {
338 if (class_scores_data[i] > score_threshold) {
339 candidate_priority_queue.emplace(Candidate({i, class_scores_data[i]}));
340 }
341 }
342
343 std::vector<int> selected;
344 std::vector<float> selected_boxes;
345 Candidate next_candidate;
346
347 // Move class_boxes_data to a tensor
348 Eigen::array<Eigen::DenseIndex, 2> boxesShape = {num_boxes, 4};
349 typename TTypes<float, 2>::ConstTensor boxes_data_t(class_boxes_data.data(),
350 boxesShape);
351 float iou;
352 while (selected.size() < size_per_class &&
353 !candidate_priority_queue.empty()) {
354 next_candidate = candidate_priority_queue.top();
355 candidate_priority_queue.pop();
356 // Overlapping boxes are likely to have similar scores,
357 // therefore we iterate through the previously selected boxes backwards
358 // in order to see if `next_candidate` should be suppressed.
359 bool should_select = true;
360 for (int j = selected.size() - 1; j >= 0; --j) {
361 iou = IOU<float>(boxes_data_t, next_candidate.box_index, selected[j]);
362 if (iou > iou_threshold) {
363 should_select = false;
364 break;
365 }
366 }
367
368 if (should_select) {
369 // Add the selected box to the result candidate. Sorted by score
370 int id = next_candidate.box_index;
371 result_candidate_vec[selected.size() + size_per_class * class_idx] = {
372 next_candidate.box_index,
373 next_candidate.score,
374 class_idx,
375 {boxes_data_t(id, 0), boxes_data_t(id, 1), boxes_data_t(id, 2),
376 boxes_data_t(id, 3)}};
377 selected.push_back(next_candidate.box_index);
378 }
379 }
380 }
381
SelectResultPerBatch(std::vector<float> & nmsed_boxes,std::vector<float> & nmsed_scores,std::vector<float> & nmsed_classes,std::vector<ResultCandidate> & result_candidate_vec,std::vector<int> & final_valid_detections,const int batch_idx,int total_size_per_batch,bool pad_per_class,int max_size_per_batch,bool clip_boxes,int per_batch_size)382 void SelectResultPerBatch(std::vector<float>& nmsed_boxes,
383 std::vector<float>& nmsed_scores,
384 std::vector<float>& nmsed_classes,
385 std::vector<ResultCandidate>& result_candidate_vec,
386 std::vector<int>& final_valid_detections,
387 const int batch_idx, int total_size_per_batch,
388 bool pad_per_class, int max_size_per_batch,
389 bool clip_boxes, int per_batch_size) {
390 auto rc_cmp = [](const ResultCandidate rc_i, const ResultCandidate rc_j) {
391 return rc_i.score > rc_j.score;
392 };
393 std::sort(result_candidate_vec.begin(), result_candidate_vec.end(), rc_cmp);
394
395 int max_detections = 0;
396 int result_candidate_size =
397 std::count_if(result_candidate_vec.begin(), result_candidate_vec.end(),
398 [](ResultCandidate rc) { return rc.box_index > -1; });
399 // If pad_per_class is false, we always pad to max_total_size
400 if (!pad_per_class) {
401 max_detections = std::min(result_candidate_size, total_size_per_batch);
402 } else {
403 max_detections = std::min(per_batch_size, result_candidate_size);
404 }
405
406 final_valid_detections[batch_idx] = max_detections;
407
408 int curr_total_size = max_detections;
409 int result_idx = 0;
410 // Pick the top max_detections values
411 while (curr_total_size > 0 && result_idx < result_candidate_vec.size()) {
412 ResultCandidate next_candidate = result_candidate_vec[result_idx++];
413 // Add to final output vectors
414 if (clip_boxes) {
415 const float box_min = 0.0;
416 const float box_max = 1.0;
417 nmsed_boxes.push_back(
418 std::max(std::min(next_candidate.box_coord[0], box_max), box_min));
419 nmsed_boxes.push_back(
420 std::max(std::min(next_candidate.box_coord[1], box_max), box_min));
421 nmsed_boxes.push_back(
422 std::max(std::min(next_candidate.box_coord[2], box_max), box_min));
423 nmsed_boxes.push_back(
424 std::max(std::min(next_candidate.box_coord[3], box_max), box_min));
425 } else {
426 nmsed_boxes.push_back(next_candidate.box_coord[0]);
427 nmsed_boxes.push_back(next_candidate.box_coord[1]);
428 nmsed_boxes.push_back(next_candidate.box_coord[2]);
429 nmsed_boxes.push_back(next_candidate.box_coord[3]);
430 }
431 nmsed_scores.push_back(next_candidate.score);
432 nmsed_classes.push_back(next_candidate.class_idx);
433 curr_total_size--;
434 }
435
436 nmsed_boxes.resize(per_batch_size * 4, 0);
437 nmsed_scores.resize(per_batch_size, 0);
438 nmsed_classes.resize(per_batch_size, 0);
439 }
440
BatchedNonMaxSuppressionOp(OpKernelContext * context,const Tensor & inp_boxes,const Tensor & inp_scores,int num_boxes,const int max_size_per_class,const int total_size_per_batch,const float score_threshold,const float iou_threshold,bool pad_per_class=false,bool clip_boxes=true)441 void BatchedNonMaxSuppressionOp(
442 OpKernelContext* context, const Tensor& inp_boxes, const Tensor& inp_scores,
443 int num_boxes, const int max_size_per_class, const int total_size_per_batch,
444 const float score_threshold, const float iou_threshold,
445 bool pad_per_class = false, bool clip_boxes = true) {
446 const int num_batches = inp_boxes.dim_size(0);
447 int num_classes = inp_scores.dim_size(2);
448 int q = inp_boxes.dim_size(2);
449
450 const float* scores_data =
451 const_cast<float*>(inp_scores.flat<float>().data());
452 const float* boxes_data = const_cast<float*>(inp_boxes.flat<float>().data());
453
454 int boxes_per_batch = num_boxes * q * 4;
455 int scores_per_batch = num_boxes * num_classes;
456 const int size_per_class = std::min(max_size_per_class, num_boxes);
457 std::vector<std::vector<ResultCandidate>> result_candidate_vec(
458 num_batches,
459 std::vector<ResultCandidate>(size_per_class * num_classes,
460 {-1, -1.0, -1, {0.0, 0.0, 0.0, 0.0}}));
461
462 // [num_batches, per_batch_size * 4]
463 std::vector<std::vector<float>> nmsed_boxes(num_batches);
464 // [num_batches, per_batch_size]
465 std::vector<std::vector<float>> nmsed_scores(num_batches);
466 // [num_batches, per_batch_size]
467 std::vector<std::vector<float>> nmsed_classes(num_batches);
468 // [num_batches]
469 std::vector<int> final_valid_detections(num_batches);
470
471 auto shard_nms = [&](int begin, int end) {
472 for (int idx = begin; idx < end; ++idx) {
473 int batch_idx = idx / num_classes;
474 int class_idx = idx % num_classes;
475 DoNMSPerClass(batch_idx, class_idx,
476 boxes_data + boxes_per_batch * batch_idx,
477 scores_data + scores_per_batch * batch_idx, num_boxes, q,
478 num_classes, size_per_class, score_threshold, iou_threshold,
479 result_candidate_vec[batch_idx]);
480 }
481 };
482
483 int length = num_batches * num_classes;
484 // Input data boxes_data, scores_data
485 int input_bytes = num_boxes * 10 * sizeof(float);
486 int output_bytes = num_boxes * 10 * sizeof(float);
487 int compute_cycles = Eigen::TensorOpCost::AddCost<int>() * num_boxes * 14 +
488 Eigen::TensorOpCost::MulCost<int>() * num_boxes * 9 +
489 Eigen::TensorOpCost::MulCost<float>() * num_boxes * 9 +
490 Eigen::TensorOpCost::AddCost<float>() * num_boxes * 8;
491 // The cost here is not the actual number of cycles, but rather a set of
492 // hand-tuned numbers that seem to work best.
493 const Eigen::TensorOpCost cost(input_bytes, output_bytes, compute_cycles);
494 const CPUDevice& d = context->eigen_device<CPUDevice>();
495 d.parallelFor(length, cost, shard_nms);
496
497 int per_batch_size = total_size_per_batch;
498 if (pad_per_class) {
499 per_batch_size =
500 std::min(total_size_per_batch, max_size_per_class * num_classes);
501 }
502
503 Tensor* valid_detections_t = nullptr;
504 TensorShape valid_detections_shape({num_batches});
505 OP_REQUIRES_OK(context, context->allocate_output(3, valid_detections_shape,
506 &valid_detections_t));
507 auto valid_detections_flat = valid_detections_t->template flat<int>();
508
509 auto shard_result = [&](int begin, int end) {
510 for (int batch_idx = begin; batch_idx < end; ++batch_idx) {
511 SelectResultPerBatch(
512 nmsed_boxes[batch_idx], nmsed_scores[batch_idx],
513 nmsed_classes[batch_idx], result_candidate_vec[batch_idx],
514 final_valid_detections, batch_idx, total_size_per_batch,
515 pad_per_class, max_size_per_class * num_classes, clip_boxes,
516 per_batch_size);
517 valid_detections_flat(batch_idx) = final_valid_detections[batch_idx];
518 }
519 };
520 length = num_batches;
521 // Input data boxes_data, scores_data
522 input_bytes =
523 num_boxes * 10 * sizeof(float) + per_batch_size * 6 * sizeof(float);
524 output_bytes =
525 num_boxes * 5 * sizeof(float) + per_batch_size * 6 * sizeof(float);
526 compute_cycles = Eigen::TensorOpCost::AddCost<int>() * num_boxes * 5 +
527 Eigen::TensorOpCost::AddCost<float>() * num_boxes * 5;
528 // The cost here is not the actual number of cycles, but rather a set of
529 // hand-tuned numbers that seem to work best.
530 const Eigen::TensorOpCost cost_result(input_bytes, output_bytes,
531 compute_cycles);
532 d.parallelFor(length, cost_result, shard_result);
533
534 Tensor* nmsed_boxes_t = nullptr;
535 TensorShape boxes_shape({num_batches, per_batch_size, 4});
536 OP_REQUIRES_OK(context,
537 context->allocate_output(0, boxes_shape, &nmsed_boxes_t));
538 auto nmsed_boxes_flat = nmsed_boxes_t->template flat<float>();
539
540 Tensor* nmsed_scores_t = nullptr;
541 TensorShape scores_shape({num_batches, per_batch_size});
542 OP_REQUIRES_OK(context,
543 context->allocate_output(1, scores_shape, &nmsed_scores_t));
544 auto nmsed_scores_flat = nmsed_scores_t->template flat<float>();
545
546 Tensor* nmsed_classes_t = nullptr;
547 OP_REQUIRES_OK(context,
548 context->allocate_output(2, scores_shape, &nmsed_classes_t));
549 auto nmsed_classes_flat = nmsed_classes_t->template flat<float>();
550
551 auto shard_copy_result = [&](int begin, int end) {
552 for (int idx = begin; idx < end; ++idx) {
553 int batch_idx = idx / per_batch_size;
554 int j = idx % per_batch_size;
555 nmsed_scores_flat(idx) = nmsed_scores[batch_idx][j];
556 nmsed_classes_flat(idx) = nmsed_classes[batch_idx][j];
557 for (int k = 0; k < 4; ++k) {
558 nmsed_boxes_flat(idx * 4 + k) = nmsed_boxes[batch_idx][j * 4 + k];
559 }
560 }
561 };
562 length = num_batches * per_batch_size;
563 // Input data boxes_data, scores_data
564 input_bytes = 6 * sizeof(float);
565 output_bytes = 6 * sizeof(float);
566 compute_cycles = Eigen::TensorOpCost::AddCost<int>() * 2 +
567 Eigen::TensorOpCost::MulCost<int>() * 2 +
568 Eigen::TensorOpCost::DivCost<float>() * 2;
569 const Eigen::TensorOpCost cost_copy_result(input_bytes, output_bytes,
570 compute_cycles);
571 d.parallelFor(length, cost_copy_result, shard_copy_result);
572 }
573
574 } // namespace
575
576 template <typename Device>
577 class NonMaxSuppressionOp : public OpKernel {
578 public:
NonMaxSuppressionOp(OpKernelConstruction * context)579 explicit NonMaxSuppressionOp(OpKernelConstruction* context)
580 : OpKernel(context) {
581 OP_REQUIRES_OK(context, context->GetAttr("iou_threshold", &iou_threshold_));
582 }
583
Compute(OpKernelContext * context)584 void Compute(OpKernelContext* context) override {
585 // boxes: [num_boxes, 4]
586 const Tensor& boxes = context->input(0);
587 // scores: [num_boxes]
588 const Tensor& scores = context->input(1);
589 // max_output_size: scalar
590 const Tensor& max_output_size = context->input(2);
591 OP_REQUIRES(
592 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
593 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
594 max_output_size.shape().DebugString()));
595
596 OP_REQUIRES(context, iou_threshold_ >= 0 && iou_threshold_ <= 1,
597 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
598 int num_boxes = 0;
599 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
600 CheckScoreSizes(context, num_boxes, scores);
601 if (!context->status().ok()) {
602 return;
603 }
604 auto similarity_fn = CreateIOUSimilarityFn<float>(boxes);
605
606 const float score_threshold_val = std::numeric_limits<float>::lowest();
607 const float dummy_soft_nms_sigma = static_cast<float>(0.0);
608 DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
609 iou_threshold_, score_threshold_val,
610 dummy_soft_nms_sigma, similarity_fn);
611 }
612
613 private:
614 float iou_threshold_;
615 };
616
617 template <typename Device, typename T>
618 class NonMaxSuppressionV2Op : public OpKernel {
619 public:
NonMaxSuppressionV2Op(OpKernelConstruction * context)620 explicit NonMaxSuppressionV2Op(OpKernelConstruction* context)
621 : OpKernel(context) {}
622
Compute(OpKernelContext * context)623 void Compute(OpKernelContext* context) override {
624 // boxes: [num_boxes, 4]
625 const Tensor& boxes = context->input(0);
626 // scores: [num_boxes]
627 const Tensor& scores = context->input(1);
628 // max_output_size: scalar
629 const Tensor& max_output_size = context->input(2);
630 OP_REQUIRES(
631 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
632 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
633 max_output_size.shape().DebugString()));
634 // iou_threshold: scalar
635 const Tensor& iou_threshold = context->input(3);
636 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
637 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
638 iou_threshold.shape().DebugString()));
639 const T iou_threshold_val = iou_threshold.scalar<T>()();
640
641 OP_REQUIRES(context,
642 iou_threshold_val >= static_cast<T>(0.0) &&
643 iou_threshold_val <= static_cast<T>(1.0),
644 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
645 int num_boxes = 0;
646 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
647 CheckScoreSizes(context, num_boxes, scores);
648 if (!context->status().ok()) {
649 return;
650 }
651 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
652
653 const T score_threshold_val = std::numeric_limits<T>::lowest();
654 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
655 DoNonMaxSuppressionOp<T>(context, scores, num_boxes, max_output_size,
656 iou_threshold_val, score_threshold_val,
657 dummy_soft_nms_sigma, similarity_fn);
658 }
659 };
660
661 template <typename Device, typename T>
662 class NonMaxSuppressionV3Op : public OpKernel {
663 public:
NonMaxSuppressionV3Op(OpKernelConstruction * context)664 explicit NonMaxSuppressionV3Op(OpKernelConstruction* context)
665 : OpKernel(context) {}
666
Compute(OpKernelContext * context)667 void Compute(OpKernelContext* context) override {
668 // boxes: [num_boxes, 4]
669 const Tensor& boxes = context->input(0);
670 // scores: [num_boxes]
671 const Tensor& scores = context->input(1);
672 // max_output_size: scalar
673 const Tensor& max_output_size = context->input(2);
674 OP_REQUIRES(
675 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
676 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
677 max_output_size.shape().DebugString(),
678 " (Shape must be rank 0 but is ", "rank ",
679 max_output_size.dims(), ")"));
680 // iou_threshold: scalar
681 const Tensor& iou_threshold = context->input(3);
682 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
683 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
684 iou_threshold.shape().DebugString(),
685 " (Shape must be rank 0 but is rank ",
686 iou_threshold.dims(), ")"));
687 const T iou_threshold_val = iou_threshold.scalar<T>()();
688 OP_REQUIRES(context,
689 iou_threshold_val >= static_cast<T>(0.0) &&
690 iou_threshold_val <= static_cast<T>(1.0),
691 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
692 // score_threshold: scalar
693 const Tensor& score_threshold = context->input(4);
694 OP_REQUIRES(
695 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
696 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
697 score_threshold.shape().DebugString()));
698 const T score_threshold_val = score_threshold.scalar<T>()();
699
700 int num_boxes = 0;
701 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
702 CheckScoreSizes(context, num_boxes, scores);
703 if (!context->status().ok()) {
704 return;
705 }
706
707 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
708
709 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
710 DoNonMaxSuppressionOp<T>(context, scores, num_boxes, max_output_size,
711 iou_threshold_val, score_threshold_val,
712 dummy_soft_nms_sigma, similarity_fn);
713 }
714 };
715
716 template <typename Device, typename T>
717 class NonMaxSuppressionV4Op : public OpKernel {
718 public:
NonMaxSuppressionV4Op(OpKernelConstruction * context)719 explicit NonMaxSuppressionV4Op(OpKernelConstruction* context)
720 : OpKernel(context) {
721 OP_REQUIRES_OK(context, context->GetAttr("pad_to_max_output_size",
722 &pad_to_max_output_size_));
723 }
724
Compute(OpKernelContext * context)725 void Compute(OpKernelContext* context) override {
726 // boxes: [num_boxes, 4]
727 const Tensor& boxes = context->input(0);
728 // scores: [num_boxes]
729 const Tensor& scores = context->input(1);
730 // max_output_size: scalar
731 const Tensor& max_output_size = context->input(2);
732 OP_REQUIRES(
733 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
734 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
735 max_output_size.shape().DebugString()));
736 // iou_threshold: scalar
737 const Tensor& iou_threshold = context->input(3);
738 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
739 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
740 iou_threshold.shape().DebugString()));
741 const T iou_threshold_val = iou_threshold.scalar<T>()();
742 OP_REQUIRES(context,
743 iou_threshold_val >= static_cast<T>(0.0) &&
744 iou_threshold_val <= static_cast<T>(1.0),
745 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
746 // score_threshold: scalar
747 const Tensor& score_threshold = context->input(4);
748 OP_REQUIRES(
749 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
750 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
751 score_threshold.shape().DebugString()));
752 const T score_threshold_val = score_threshold.scalar<T>()();
753
754 int num_boxes = 0;
755 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
756 CheckScoreSizes(context, num_boxes, scores);
757 if (!context->status().ok()) {
758 return;
759 }
760
761 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
762 int num_valid_outputs;
763
764 bool return_scores_tensor_ = false;
765 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
766 DoNonMaxSuppressionOp<T>(
767 context, scores, num_boxes, max_output_size, iou_threshold_val,
768 score_threshold_val, dummy_soft_nms_sigma, similarity_fn,
769 return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
770
771 // Allocate scalar output tensor for number of indices computed.
772 Tensor* num_outputs_t = nullptr;
773 OP_REQUIRES_OK(context, context->allocate_output(
774 1, tensorflow::TensorShape{}, &num_outputs_t));
775 num_outputs_t->scalar<int32>().setConstant(num_valid_outputs);
776 }
777
778 private:
779 bool pad_to_max_output_size_;
780 };
781
782 template <typename Device, typename T>
783 class NonMaxSuppressionV5Op : public OpKernel {
784 public:
NonMaxSuppressionV5Op(OpKernelConstruction * context)785 explicit NonMaxSuppressionV5Op(OpKernelConstruction* context)
786 : OpKernel(context) {
787 OP_REQUIRES_OK(context, context->GetAttr("pad_to_max_output_size",
788 &pad_to_max_output_size_));
789 }
790
Compute(OpKernelContext * context)791 void Compute(OpKernelContext* context) override {
792 // boxes: [num_boxes, 4]
793 const Tensor& boxes = context->input(0);
794 // scores: [num_boxes]
795 const Tensor& scores = context->input(1);
796 // max_output_size: scalar
797 const Tensor& max_output_size = context->input(2);
798 OP_REQUIRES(
799 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
800 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
801 max_output_size.shape().DebugString()));
802 // iou_threshold: scalar
803 const Tensor& iou_threshold = context->input(3);
804 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
805 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
806 iou_threshold.shape().DebugString()));
807 const T iou_threshold_val = iou_threshold.scalar<T>()();
808 OP_REQUIRES(context,
809 iou_threshold_val >= static_cast<T>(0.0) &&
810 iou_threshold_val <= static_cast<T>(1.0),
811 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
812 // score_threshold: scalar
813 const Tensor& score_threshold = context->input(4);
814 OP_REQUIRES(
815 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
816 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
817 score_threshold.shape().DebugString()));
818 const T score_threshold_val = score_threshold.scalar<T>()();
819
820 // soft_nms_sigma: scalar
821 const Tensor& soft_nms_sigma = context->input(5);
822 OP_REQUIRES(
823 context, TensorShapeUtils::IsScalar(soft_nms_sigma.shape()),
824 errors::InvalidArgument("soft_nms_sigma must be 0-D, got shape ",
825 soft_nms_sigma.shape().DebugString()));
826 const T soft_nms_sigma_val = soft_nms_sigma.scalar<T>()();
827 OP_REQUIRES(context, soft_nms_sigma_val >= static_cast<T>(0.0),
828 errors::InvalidArgument("soft_nms_sigma_val must be >= 0"));
829
830 int num_boxes = 0;
831 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
832 CheckScoreSizes(context, num_boxes, scores);
833 if (!context->status().ok()) {
834 return;
835 }
836
837 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
838 int num_valid_outputs;
839
840 // For NonMaxSuppressionV5Op, we always return a second output holding
841 // corresponding scores, so `return_scores_tensor` should never be false.
842 const bool return_scores_tensor_ = true;
843 DoNonMaxSuppressionOp<T>(
844 context, scores, num_boxes, max_output_size, iou_threshold_val,
845 score_threshold_val, soft_nms_sigma_val, similarity_fn,
846 return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
847
848 // Allocate scalar output tensor for number of indices computed.
849 Tensor* num_outputs_t = nullptr;
850 OP_REQUIRES_OK(context, context->allocate_output(
851 2, tensorflow::TensorShape{}, &num_outputs_t));
852 num_outputs_t->scalar<int32>().setConstant(num_valid_outputs);
853 }
854
855 private:
856 bool pad_to_max_output_size_;
857 };
858
859 template <typename Device>
860 class NonMaxSuppressionWithOverlapsOp : public OpKernel {
861 public:
NonMaxSuppressionWithOverlapsOp(OpKernelConstruction * context)862 explicit NonMaxSuppressionWithOverlapsOp(OpKernelConstruction* context)
863 : OpKernel(context) {}
864
Compute(OpKernelContext * context)865 void Compute(OpKernelContext* context) override {
866 // overlaps: [num_boxes, num_boxes]
867 const Tensor& overlaps = context->input(0);
868 // scores: [num_boxes]
869 const Tensor& scores = context->input(1);
870 // max_output_size: scalar
871 const Tensor& max_output_size = context->input(2);
872 OP_REQUIRES(
873 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
874 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
875 max_output_size.shape().DebugString()));
876 // overlap_threshold: scalar
877 const Tensor& overlap_threshold = context->input(3);
878 OP_REQUIRES(
879 context, TensorShapeUtils::IsScalar(overlap_threshold.shape()),
880 errors::InvalidArgument("overlap_threshold must be 0-D, got shape ",
881 overlap_threshold.shape().DebugString()));
882 const float overlap_threshold_val = overlap_threshold.scalar<float>()();
883
884 // score_threshold: scalar
885 const Tensor& score_threshold = context->input(4);
886 OP_REQUIRES(
887 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
888 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
889 score_threshold.shape().DebugString()));
890 const float score_threshold_val = score_threshold.scalar<float>()();
891
892 int num_boxes = 0;
893 ParseAndCheckOverlapSizes(context, overlaps, &num_boxes);
894 CheckScoreSizes(context, num_boxes, scores);
895 if (!context->status().ok()) {
896 return;
897 }
898 auto similarity_fn = CreateOverlapSimilarityFn<float>(overlaps);
899
900 const float dummy_soft_nms_sigma = static_cast<float>(0.0);
901 DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
902 overlap_threshold_val, score_threshold_val,
903 dummy_soft_nms_sigma, similarity_fn);
904 }
905 };
906
907 template <typename Device>
908 class CombinedNonMaxSuppressionOp : public OpKernel {
909 public:
CombinedNonMaxSuppressionOp(OpKernelConstruction * context)910 explicit CombinedNonMaxSuppressionOp(OpKernelConstruction* context)
911 : OpKernel(context) {
912 OP_REQUIRES_OK(context, context->GetAttr("pad_per_class", &pad_per_class_));
913 OP_REQUIRES_OK(context, context->GetAttr("clip_boxes", &clip_boxes_));
914 }
915
Compute(OpKernelContext * context)916 void Compute(OpKernelContext* context) override {
917 // boxes: [batch_size, num_anchors, q, 4]
918 const Tensor& boxes = context->input(0);
919 // scores: [batch_size, num_anchors, num_classes]
920 const Tensor& scores = context->input(1);
921 OP_REQUIRES(
922 context, (boxes.dim_size(0) == scores.dim_size(0)),
923 errors::InvalidArgument("boxes and scores must have same batch size"));
924
925 // max_output_size: scalar
926 const Tensor& max_output_size = context->input(2);
927 OP_REQUIRES(
928 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
929 errors::InvalidArgument("max_size_per_class must be 0-D, got shape ",
930 max_output_size.shape().DebugString()));
931 const int max_size_per_class = max_output_size.scalar<int>()();
932 // max_total_size: scalar
933 const Tensor& max_total_size = context->input(3);
934 OP_REQUIRES(
935 context, TensorShapeUtils::IsScalar(max_total_size.shape()),
936 errors::InvalidArgument("max_total_size must be 0-D, got shape ",
937 max_total_size.shape().DebugString()));
938 const int max_total_size_per_batch = max_total_size.scalar<int>()();
939 OP_REQUIRES(context, max_total_size_per_batch > 0,
940 errors::InvalidArgument("max_total_size must be > 0"));
941 // Throw warning when `max_total_size` is too large as it may cause OOM.
942 if (max_total_size_per_batch > pow(10, 6)) {
943 LOG(WARNING) << "Detected a large value for `max_total_size`. This may "
944 << "cause OOM error. (max_total_size: "
945 << max_total_size.scalar<int>()() << ")";
946 }
947 // iou_threshold: scalar
948 const Tensor& iou_threshold = context->input(4);
949 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
950 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
951 iou_threshold.shape().DebugString()));
952 const float iou_threshold_val = iou_threshold.scalar<float>()();
953
954 // score_threshold: scalar
955 const Tensor& score_threshold = context->input(5);
956 OP_REQUIRES(
957 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
958 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
959 score_threshold.shape().DebugString()));
960 const float score_threshold_val = score_threshold.scalar<float>()();
961
962 OP_REQUIRES(context, iou_threshold_val >= 0 && iou_threshold_val <= 1,
963 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
964 int num_boxes = 0;
965 const int num_classes = scores.dim_size(2);
966 ParseAndCheckCombinedNMSBoxSizes(context, boxes, &num_boxes, num_classes);
967 CheckCombinedNMSScoreSizes(context, num_boxes, scores);
968
969 if (!context->status().ok()) {
970 return;
971 }
972 BatchedNonMaxSuppressionOp(context, boxes, scores, num_boxes,
973 max_size_per_class, max_total_size_per_batch,
974 score_threshold_val, iou_threshold_val,
975 pad_per_class_, clip_boxes_);
976 }
977
978 private:
979 bool pad_per_class_;
980 bool clip_boxes_;
981 };
982
983 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppression").Device(DEVICE_CPU),
984 NonMaxSuppressionOp<CPUDevice>);
985
986 REGISTER_KERNEL_BUILDER(
987 Name("NonMaxSuppressionV2").TypeConstraint<float>("T").Device(DEVICE_CPU),
988 NonMaxSuppressionV2Op<CPUDevice, float>);
989 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV2")
990 .TypeConstraint<Eigen::half>("T")
991 .Device(DEVICE_CPU),
992 NonMaxSuppressionV2Op<CPUDevice, Eigen::half>);
993
994 REGISTER_KERNEL_BUILDER(
995 Name("NonMaxSuppressionV3").TypeConstraint<float>("T").Device(DEVICE_CPU),
996 NonMaxSuppressionV3Op<CPUDevice, float>);
997 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV3")
998 .TypeConstraint<Eigen::half>("T")
999 .Device(DEVICE_CPU),
1000 NonMaxSuppressionV3Op<CPUDevice, Eigen::half>);
1001
1002 REGISTER_KERNEL_BUILDER(
1003 Name("NonMaxSuppressionV4").TypeConstraint<float>("T").Device(DEVICE_CPU),
1004 NonMaxSuppressionV4Op<CPUDevice, float>);
1005 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV4")
1006 .TypeConstraint<Eigen::half>("T")
1007 .Device(DEVICE_CPU),
1008 NonMaxSuppressionV4Op<CPUDevice, Eigen::half>);
1009
1010 REGISTER_KERNEL_BUILDER(
1011 Name("NonMaxSuppressionV5").TypeConstraint<float>("T").Device(DEVICE_CPU),
1012 NonMaxSuppressionV5Op<CPUDevice, float>);
1013 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV5")
1014 .TypeConstraint<Eigen::half>("T")
1015 .Device(DEVICE_CPU),
1016 NonMaxSuppressionV5Op<CPUDevice, Eigen::half>);
1017
1018 REGISTER_KERNEL_BUILDER(
1019 Name("NonMaxSuppressionWithOverlaps").Device(DEVICE_CPU),
1020 NonMaxSuppressionWithOverlapsOp<CPUDevice>);
1021
1022 REGISTER_KERNEL_BUILDER(Name("CombinedNonMaxSuppression").Device(DEVICE_CPU),
1023 CombinedNonMaxSuppressionOp<CPUDevice>);
1024
1025 } // namespace tensorflow
1026