1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
17 #define TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
18 
19 // Connected component analysis. The op is described in ../ops/image_ops.cc. A
20 // description of the algorithm appears below.
21 
22 #define EIGEN_USE_THREADS
23 
24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/tensor_types.h"
27 #include "tensorflow/core/platform/types.h"
28 #include "tensorflow/core/util/work_sharder.h"
29 
30 namespace tensorflow {
31 
32 namespace functor {
33 
34 template <typename T>
is_nonzero(T value)35 bool is_nonzero(T value) {
36   return value != T(0);
37 }
38 
39 template <>
is_nonzero(string value)40 bool is_nonzero(string value) {
41   return value.size() != 0;
42 }
43 
44 // Processes each pixel of an image for union-find, in parallel blocks. This is
45 // loosely based on the algorithm in "GPU Computing Gems" by Ondrej Stava and
46 // Bedrich Benes, available here:
47 // http://hpcg.purdue.edu/bbenes/papers/Stava2011CCL.pdf
48 // The bulk of the process uses blocks of each image, which have each been
49 // processed separately. As long as there are multiple blocks in the image, we
50 // double the height and width of the blocks, creating new blocks which each
51 // consist of 2x2 previous sub-blocks. On each new block, we process adjacent
52 // pixels from the previous sub-blocks serially. However, the new blocks are not
53 // connected, so we can process each block in parallel.
54 // The GPU algorithm first processes blocks of a fixed size in GPU shared
55 // memory, with one image block per CUDA thread block. On the CPU, we just start
56 // with a block size of a single pixel, and borrow the rest of the algorithm
57 // unchanged.
58 template <typename T>
59 class BlockedImageUnionFindFunctor {
60  public:
61   using OutputType = int64;
62 
BlockedImageUnionFindFunctor(const T * images,const int64 num_rows,const int64 num_cols,OutputType * forest,OutputType * rank)63   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockedImageUnionFindFunctor(
64       const T* images, const int64 num_rows, const int64 num_cols,
65       OutputType* forest, OutputType* rank)
66       : images_(images),
67         num_rows_(num_rows),
68         num_cols_(num_cols),
69         block_height_(1),
70         block_width_(1),
71         forest_(forest),
72         rank_(rank) {}
73 
74   // Returns the root of the tree that the pixel at the given index belongs to.
75   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
find(OutputType index)76   find(OutputType index) const {
77     while (forest_[index] != index) {
78       index = forest_[index];
79     }
80     return index;
81   }
82 
83   // Returns the number of blocks along the y axis.
num_blocks_vertically()84   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_vertically() const {
85     return (num_rows_ + block_height_ - 1) / block_height_;
86   }
87 
88   // Returns the number of blocks along the x axis.
num_blocks_horizontally()89   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_horizontally() const {
90     return (num_cols_ + block_width_ - 1) / block_width_;
91   }
92 
93   // Returns the total number of blocks in each image.
num_blocks()94   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks() const {
95     return num_blocks_vertically() * num_blocks_horizontally();
96   }
97 
block_height()98   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_height() const {
99     return block_height_;
100   }
101 
block_width()102   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_width() const {
103     return block_width_;
104   }
105 
106   // Returns whether we may merge again (the image contains more than one
107   // block).
can_merge()108   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool can_merge() const {
109     return block_height_ < num_rows_ || block_width_ < num_cols_;
110   }
111 
112   // Doubles the block size. After this method, you must call
113   // `merge_internal_block_edges` for each image and each *new* block's xy
114   // coordinates (typically in parallel).
merge_blocks()115   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_blocks() {
116     block_height_ *= 2;
117     block_width_ *= 2;
118   }
119 
120   // Processes pairs of pixels within the block which were adjacent in the four
121   // sub-blocks. This must be done at each stage so that the connected
122   // components in each block are joined correctly.
merge_internal_block_edges(int64 image_index,int64 block_vertical_index,int64 block_horizontal_index)123   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_internal_block_edges(
124       int64 image_index, int64 block_vertical_index,
125       int64 block_horizontal_index) const {
126     int64 block_start_y = block_vertical_index * block_height_;
127     int64 block_start_x = block_horizontal_index * block_width_;
128     // Merge the 4 sub-blocks horizontally (fixing the vertical seam).
129     int64 block_center_x = block_start_x + block_width_ / 2 - 1;
130     if (0 <= block_center_x && block_center_x + 1 < num_cols_) {
131       int64 merge_blocks_limit_y =
132           std::min(num_rows_, block_start_y + block_height_);
133       for (int64 y = block_start_y; y < merge_blocks_limit_y; y++) {
134         union_right(image_index, y, block_center_x);
135       }
136     }
137     // Merge the 4 sub-blocks vertically (fixing the horizontal seam).
138     int64 block_center_y = block_start_y + block_height_ / 2 - 1;
139     if (0 <= block_center_y && block_center_y + 1 < num_rows_) {
140       int64 merge_blocks_limit_x =
141           std::min(num_cols_, block_start_x + block_width_);
142       for (int64 x = block_start_x; x < merge_blocks_limit_x; x++) {
143         union_down(image_index, block_center_y, x);
144       }
145     }
146   }
147 
148  private:
149   // The input image(s).
150   const T* const images_;
151   const int64 num_rows_;
152   const int64 num_cols_;
153   // Current height of each sub-block of the image.
154   int64 block_height_;
155   // Current width of each sub-block of the image.
156   int64 block_width_;
157   // Union-find forest. This has the same size as `images_`, and each entry
158   // holds the index of its parent in `images_` (roots hold their own index).
159   // Cycles should not occur.
160   OutputType* const forest_;
161   // Union-find rank of each pixel.
162   OutputType* const rank_;
163 
164   // Unions the pixel with the pixel below it if applicable (both pixels are
165   // true, and the pixel is not in the last row).
union_down(OutputType batch,OutputType row,OutputType col)166   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_down(OutputType batch,
167                                                         OutputType row,
168                                                         OutputType col) const {
169     T pixel = read_pixel(batch, row, col);
170     if (is_nonzero<T>(pixel)) {
171       const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
172       if (row + 1 < num_rows_ && read_pixel(batch, row + 1, col) == pixel) {
173         const int64 index_b = col + num_cols_ * (row + 1 + num_rows_ * batch);
174         do_union(index_a, index_b);
175       }
176     }
177   }
178 
179   // Unions the pixel with the pixel to the right of it if applicable.
union_right(OutputType batch,OutputType row,OutputType col)180   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_right(OutputType batch,
181                                                          OutputType row,
182                                                          OutputType col) const {
183     T pixel = read_pixel(batch, row, col);
184     if (is_nonzero<T>(pixel)) {
185       const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
186       if (col + 1 < num_cols_ && read_pixel(batch, row, col + 1) == pixel) {
187         const int64 index_b = col + 1 + num_cols_ * (row + num_rows_ * batch);
188         do_union(index_a, index_b);
189       }
190     }
191   }
192 
193   // Reads a pixel value in the images.
194   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
read_pixel(const OutputType batch,const OutputType row,const OutputType col)195   read_pixel(const OutputType batch, const OutputType row,
196              const OutputType col) const {
197     return images_[col + num_cols_ * (row + num_rows_ * batch)];
198   }
199 
200   // Unions the trees that the two pixels belong to, using their index in the
201   // `images_` array.
do_union(OutputType index_a,OutputType index_b)202   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void do_union(
203       OutputType index_a, OutputType index_b) const {
204     // Find the roots of index_a and index_b in the forest, and make one the
205     // child of the other.
206     index_a = find(index_a);
207     index_b = find(index_b);
208     const OutputType rank_a = rank_[index_a];
209     const OutputType rank_b = rank_[index_b];
210     OutputType parent, child;
211     if (index_a == index_b) {
212       return;
213     } else if (rank_a < rank_b) {
214       parent = index_a;
215       child = index_b;
216     } else {
217       parent = index_b;
218       child = index_a;
219       rank_[parent]++;
220     }
221     forest_[child] = parent;
222   }
223 };
224 
225 // Runs the ImageUnionFindFunctor on all pixels. Will require different CPU and
226 // GPU implementations.
227 template <typename Device, typename T>
228 class ImageConnectedComponentsFunctor {
229  public:
230   using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
231 
232   void operator()(OpKernelContext* ctx,
233                   typename TTypes<T, 3>::ConstTensor images,
234                   typename TTypes<OutputType, 3>::Tensor forest,
235                   typename TTypes<OutputType, 3>::Tensor rank);
236 };
237 
238 // Fills a flat Tensor with indices from 0 to n - 1.
239 template <typename Device>
240 class TensorRangeFunctor {
241  public:
242   using OutputType = typename BlockedImageUnionFindFunctor<bool>::OutputType;
243 
operator()244   void operator()(const Device& device,
245                   typename TTypes<OutputType>::Flat tensor) {
246     tensor.device(device) = tensor.generate(TensorRangeGenerator());
247   }
248 
249  private:
250   class TensorRangeGenerator {
251    public:
252     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
operator()253     operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
254       return coords[0];
255     }
256   };
257 };
258 
259 // Given the union-find forest, generates the root index for each node. This
260 // gives us arbitrary, usually non-consecutive ids for each connected component.
261 // The ids are massaged in Python to get deterministic, consecutive ids.
262 template <typename Device, typename T>
263 class FindRootFunctor {
264  public:
265   using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
266 
operator()267   void operator()(const Device& device,
268                   typename TTypes<OutputType>::Flat component_ids,
269                   const T* images,
270                   const BlockedImageUnionFindFunctor<T>& union_find) {
271     component_ids.device(device) =
272         component_ids.generate(FindRootGenerator(images, union_find));
273   }
274 
275  private:
276   class FindRootGenerator {
277     const T* const images_;
278     const BlockedImageUnionFindFunctor<T> union_find_;
279 
280    public:
FindRootGenerator(const T * images,BlockedImageUnionFindFunctor<T> union_find)281     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE FindRootGenerator(
282         const T* images, BlockedImageUnionFindFunctor<T> union_find)
283         : images_(images), union_find_(union_find) {}
284 
285     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
operator()286     operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
287       if (is_nonzero<T>(images_[coords[0]])) {
288         // True pixels have an arbitrary segment id > 0. The segment ids will be
289         // made contiguous later.
290         return union_find_.find(coords[0]) + 1;
291       } else {
292         // False pixels have a segment of 0.
293         return 0;
294       }
295     }
296   };
297 };
298 
299 }  // end namespace functor
300 
301 }  // namespace tensorflow
302 
303 #endif  // TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
304