1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
3 // http://code.google.com/p/ceres-solver/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 
31 #include "ceres/partitioned_matrix_view.h"
32 
33 #include <algorithm>
34 #include <cstring>
35 #include <vector>
36 #include "ceres/block_sparse_matrix.h"
37 #include "ceres/block_structure.h"
38 #include "ceres/internal/eigen.h"
39 #include "ceres/small_blas.h"
40 #include "glog/logging.h"
41 
42 namespace ceres {
43 namespace internal {
44 
45 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
46 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
PartitionedMatrixView(const BlockSparseMatrix & matrix,int num_col_blocks_e)47 PartitionedMatrixView(
48     const BlockSparseMatrix& matrix,
49     int num_col_blocks_e)
50     : matrix_(matrix),
51       num_col_blocks_e_(num_col_blocks_e) {
52   const CompressedRowBlockStructure* bs = matrix_.block_structure();
53   CHECK_NOTNULL(bs);
54 
55   num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
56 
57   // Compute the number of row blocks in E. The number of row blocks
58   // in E maybe less than the number of row blocks in the input matrix
59   // as some of the row blocks at the bottom may not have any
60   // e_blocks. For a definition of what an e_block is, please see
61   // explicit_schur_complement_solver.h
62   num_row_blocks_e_ = 0;
63   for (int r = 0; r < bs->rows.size(); ++r) {
64     const vector<Cell>& cells = bs->rows[r].cells;
65     if (cells[0].block_id < num_col_blocks_e_) {
66       ++num_row_blocks_e_;
67     }
68   }
69 
70   // Compute the number of columns in E and F.
71   num_cols_e_ = 0;
72   num_cols_f_ = 0;
73 
74   for (int c = 0; c < bs->cols.size(); ++c) {
75     const Block& block = bs->cols[c];
76     if (c < num_col_blocks_e_) {
77       num_cols_e_ += block.size;
78     } else {
79       num_cols_f_ += block.size;
80     }
81   }
82 
83   CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
84 }
85 
86 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
87 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
~PartitionedMatrixView()88 ~PartitionedMatrixView() {
89 }
90 
91 // The next four methods don't seem to be particularly cache
92 // friendly. This is an artifact of how the BlockStructure of the
93 // input matrix is constructed. These methods will benefit from
94 // multithreading as well as improved data layout.
95 
96 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
97 void
98 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyE(const double * x,double * y)99 RightMultiplyE(const double* x, double* y) const {
100   const CompressedRowBlockStructure* bs = matrix_.block_structure();
101 
102   // Iterate over the first num_row_blocks_e_ row blocks, and multiply
103   // by the first cell in each row block.
104   const double* values = matrix_.values();
105   for (int r = 0; r < num_row_blocks_e_; ++r) {
106     const Cell& cell = bs->rows[r].cells[0];
107     const int row_block_pos = bs->rows[r].block.position;
108     const int row_block_size = bs->rows[r].block.size;
109     const int col_block_id = cell.block_id;
110     const int col_block_pos = bs->cols[col_block_id].position;
111     const int col_block_size = bs->cols[col_block_id].size;
112     MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
113         values + cell.position, row_block_size, col_block_size,
114         x + col_block_pos,
115         y + row_block_pos);
116   }
117 }
118 
119 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
120 void
121 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyF(const double * x,double * y)122 RightMultiplyF(const double* x, double* y) const {
123   const CompressedRowBlockStructure* bs = matrix_.block_structure();
124 
125   // Iterate over row blocks, and if the row block is in E, then
126   // multiply by all the cells except the first one which is of type
127   // E. If the row block is not in E (i.e its in the bottom
128   // num_row_blocks - num_row_blocks_e row blocks), then all the cells
129   // are of type F and multiply by them all.
130   const double* values = matrix_.values();
131   for (int r = 0; r < num_row_blocks_e_; ++r) {
132     const int row_block_pos = bs->rows[r].block.position;
133     const int row_block_size = bs->rows[r].block.size;
134     const vector<Cell>& cells = bs->rows[r].cells;
135     for (int c = 1; c < cells.size(); ++c) {
136       const int col_block_id = cells[c].block_id;
137       const int col_block_pos = bs->cols[col_block_id].position;
138       const int col_block_size = bs->cols[col_block_id].size;
139       MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
140           values + cells[c].position, row_block_size, col_block_size,
141           x + col_block_pos - num_cols_e_,
142           y + row_block_pos);
143     }
144   }
145 
146   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
147     const int row_block_pos = bs->rows[r].block.position;
148     const int row_block_size = bs->rows[r].block.size;
149     const vector<Cell>& cells = bs->rows[r].cells;
150     for (int c = 0; c < cells.size(); ++c) {
151       const int col_block_id = cells[c].block_id;
152       const int col_block_pos = bs->cols[col_block_id].position;
153       const int col_block_size = bs->cols[col_block_id].size;
154       MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
155           values + cells[c].position, row_block_size, col_block_size,
156           x + col_block_pos - num_cols_e_,
157           y + row_block_pos);
158     }
159   }
160 }
161 
162 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
163 void
164 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyE(const double * x,double * y)165 LeftMultiplyE(const double* x, double* y) const {
166   const CompressedRowBlockStructure* bs = matrix_.block_structure();
167 
168   // Iterate over the first num_row_blocks_e_ row blocks, and multiply
169   // by the first cell in each row block.
170   const double* values = matrix_.values();
171   for (int r = 0; r < num_row_blocks_e_; ++r) {
172     const Cell& cell = bs->rows[r].cells[0];
173     const int row_block_pos = bs->rows[r].block.position;
174     const int row_block_size = bs->rows[r].block.size;
175     const int col_block_id = cell.block_id;
176     const int col_block_pos = bs->cols[col_block_id].position;
177     const int col_block_size = bs->cols[col_block_id].size;
178     MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
179         values + cell.position, row_block_size, col_block_size,
180         x + row_block_pos,
181         y + col_block_pos);
182   }
183 }
184 
185 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
186 void
187 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyF(const double * x,double * y)188 LeftMultiplyF(const double* x, double* y) const {
189   const CompressedRowBlockStructure* bs = matrix_.block_structure();
190 
191   // Iterate over row blocks, and if the row block is in E, then
192   // multiply by all the cells except the first one which is of type
193   // E. If the row block is not in E (i.e its in the bottom
194   // num_row_blocks - num_row_blocks_e row blocks), then all the cells
195   // are of type F and multiply by them all.
196   const double* values = matrix_.values();
197   for (int r = 0; r < num_row_blocks_e_; ++r) {
198     const int row_block_pos = bs->rows[r].block.position;
199     const int row_block_size = bs->rows[r].block.size;
200     const vector<Cell>& cells = bs->rows[r].cells;
201     for (int c = 1; c < cells.size(); ++c) {
202       const int col_block_id = cells[c].block_id;
203       const int col_block_pos = bs->cols[col_block_id].position;
204       const int col_block_size = bs->cols[col_block_id].size;
205       MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
206         values + cells[c].position, row_block_size, col_block_size,
207         x + row_block_pos,
208         y + col_block_pos - num_cols_e_);
209     }
210   }
211 
212   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
213     const int row_block_pos = bs->rows[r].block.position;
214     const int row_block_size = bs->rows[r].block.size;
215     const vector<Cell>& cells = bs->rows[r].cells;
216     for (int c = 0; c < cells.size(); ++c) {
217       const int col_block_id = cells[c].block_id;
218       const int col_block_pos = bs->cols[col_block_id].position;
219       const int col_block_size = bs->cols[col_block_id].size;
220       MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
221         values + cells[c].position, row_block_size, col_block_size,
222         x + row_block_pos,
223         y + col_block_pos - num_cols_e_);
224     }
225   }
226 }
227 
228 // Given a range of columns blocks of a matrix m, compute the block
229 // structure of the block diagonal of the matrix m(:,
230 // start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
231 // and return a BlockSparseMatrix with the this block structure. The
232 // caller owns the result.
233 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
234 BlockSparseMatrix*
235 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalMatrixLayout(int start_col_block,int end_col_block)236 CreateBlockDiagonalMatrixLayout(int start_col_block, int end_col_block) const {
237   const CompressedRowBlockStructure* bs = matrix_.block_structure();
238   CompressedRowBlockStructure* block_diagonal_structure =
239       new CompressedRowBlockStructure;
240 
241   int block_position = 0;
242   int diagonal_cell_position = 0;
243 
244   // Iterate over the column blocks, creating a new diagonal block for
245   // each column block.
246   for (int c = start_col_block; c < end_col_block; ++c) {
247     const Block& block = bs->cols[c];
248     block_diagonal_structure->cols.push_back(Block());
249     Block& diagonal_block = block_diagonal_structure->cols.back();
250     diagonal_block.size = block.size;
251     diagonal_block.position = block_position;
252 
253     block_diagonal_structure->rows.push_back(CompressedRow());
254     CompressedRow& row = block_diagonal_structure->rows.back();
255     row.block = diagonal_block;
256 
257     row.cells.push_back(Cell());
258     Cell& cell = row.cells.back();
259     cell.block_id = c - start_col_block;
260     cell.position = diagonal_cell_position;
261 
262     block_position += block.size;
263     diagonal_cell_position += block.size * block.size;
264   }
265 
266   // Build a BlockSparseMatrix with the just computed block
267   // structure.
268   return new BlockSparseMatrix(block_diagonal_structure);
269 }
270 
271 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
272 BlockSparseMatrix*
273 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalEtE()274 CreateBlockDiagonalEtE() const {
275   BlockSparseMatrix* block_diagonal =
276       CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_);
277   UpdateBlockDiagonalEtE(block_diagonal);
278   return block_diagonal;
279 }
280 
281 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
282 BlockSparseMatrix*
283 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalFtF()284 CreateBlockDiagonalFtF() const {
285   BlockSparseMatrix* block_diagonal =
286       CreateBlockDiagonalMatrixLayout(
287           num_col_blocks_e_, num_col_blocks_e_ + num_col_blocks_f_);
288   UpdateBlockDiagonalFtF(block_diagonal);
289   return block_diagonal;
290 }
291 
292 // Similar to the code in RightMultiplyE, except instead of the matrix
293 // vector multiply its an outer product.
294 //
295 //    block_diagonal = block_diagonal(E'E)
296 //
297 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
298 void
299 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtE(BlockSparseMatrix * block_diagonal)300 UpdateBlockDiagonalEtE(
301     BlockSparseMatrix* block_diagonal) const {
302   const CompressedRowBlockStructure* bs = matrix_.block_structure();
303   const CompressedRowBlockStructure* block_diagonal_structure =
304       block_diagonal->block_structure();
305 
306   block_diagonal->SetZero();
307   const double* values = matrix_.values();
308   for (int r = 0; r < num_row_blocks_e_ ; ++r) {
309     const Cell& cell = bs->rows[r].cells[0];
310     const int row_block_size = bs->rows[r].block.size;
311     const int block_id = cell.block_id;
312     const int col_block_size = bs->cols[block_id].size;
313     const int cell_position =
314         block_diagonal_structure->rows[block_id].cells[0].position;
315 
316     MatrixTransposeMatrixMultiply
317         <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
318             values + cell.position, row_block_size, col_block_size,
319             values + cell.position, row_block_size, col_block_size,
320             block_diagonal->mutable_values() + cell_position,
321             0, 0, col_block_size, col_block_size);
322   }
323 }
324 
325 // Similar to the code in RightMultiplyF, except instead of the matrix
326 // vector multiply its an outer product.
327 //
328 //   block_diagonal = block_diagonal(F'F)
329 //
330 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
331 void
332 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtF(BlockSparseMatrix * block_diagonal)333 UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
334   const CompressedRowBlockStructure* bs = matrix_.block_structure();
335   const CompressedRowBlockStructure* block_diagonal_structure =
336       block_diagonal->block_structure();
337 
338   block_diagonal->SetZero();
339   const double* values = matrix_.values();
340   for (int r = 0; r < num_row_blocks_e_; ++r) {
341     const int row_block_size = bs->rows[r].block.size;
342     const vector<Cell>& cells = bs->rows[r].cells;
343     for (int c = 1; c < cells.size(); ++c) {
344       const int col_block_id = cells[c].block_id;
345       const int col_block_size = bs->cols[col_block_id].size;
346       const int diagonal_block_id = col_block_id - num_col_blocks_e_;
347       const int cell_position =
348           block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
349 
350       MatrixTransposeMatrixMultiply
351           <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
352               values + cells[c].position, row_block_size, col_block_size,
353               values + cells[c].position, row_block_size, col_block_size,
354               block_diagonal->mutable_values() + cell_position,
355               0, 0, col_block_size, col_block_size);
356     }
357   }
358 
359   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
360     const int row_block_size = bs->rows[r].block.size;
361     const vector<Cell>& cells = bs->rows[r].cells;
362     for (int c = 0; c < cells.size(); ++c) {
363       const int col_block_id = cells[c].block_id;
364       const int col_block_size = bs->cols[col_block_id].size;
365       const int diagonal_block_id = col_block_id - num_col_blocks_e_;
366       const int cell_position =
367           block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
368 
369       MatrixTransposeMatrixMultiply
370           <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
371               values + cells[c].position, row_block_size, col_block_size,
372               values + cells[c].position, row_block_size, col_block_size,
373               block_diagonal->mutable_values() + cell_position,
374               0, 0, col_block_size, col_block_size);
375     }
376   }
377 }
378 
379 }  // namespace internal
380 }  // namespace ceres
381