1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2014 Google Inc. All rights reserved.
3 // http://code.google.com/p/ceres-solver/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 
31 #include "ceres/internal/port.h"
32 
33 #include <algorithm>
34 #include <ctime>
35 #include <set>
36 #include <vector>
37 
38 #include "ceres/block_random_access_dense_matrix.h"
39 #include "ceres/block_random_access_matrix.h"
40 #include "ceres/block_random_access_sparse_matrix.h"
41 #include "ceres/block_sparse_matrix.h"
42 #include "ceres/block_structure.h"
43 #include "ceres/cxsparse.h"
44 #include "ceres/detect_structure.h"
45 #include "ceres/internal/eigen.h"
46 #include "ceres/internal/scoped_ptr.h"
47 #include "ceres/lapack.h"
48 #include "ceres/linear_solver.h"
49 #include "ceres/schur_complement_solver.h"
50 #include "ceres/suitesparse.h"
51 #include "ceres/triplet_sparse_matrix.h"
52 #include "ceres/types.h"
53 #include "ceres/wall_time.h"
54 #include "Eigen/Dense"
55 #include "Eigen/SparseCore"
56 
57 namespace ceres {
58 namespace internal {
59 
SolveImpl(BlockSparseMatrix * A,const double * b,const LinearSolver::PerSolveOptions & per_solve_options,double * x)60 LinearSolver::Summary SchurComplementSolver::SolveImpl(
61     BlockSparseMatrix* A,
62     const double* b,
63     const LinearSolver::PerSolveOptions& per_solve_options,
64     double* x) {
65   EventLogger event_logger("SchurComplementSolver::Solve");
66 
67   if (eliminator_.get() == NULL) {
68     InitStorage(A->block_structure());
69     DetectStructure(*A->block_structure(),
70                     options_.elimination_groups[0],
71                     &options_.row_block_size,
72                     &options_.e_block_size,
73                     &options_.f_block_size);
74     eliminator_.reset(CHECK_NOTNULL(SchurEliminatorBase::Create(options_)));
75     eliminator_->Init(options_.elimination_groups[0], A->block_structure());
76   };
77   fill(x, x + A->num_cols(), 0.0);
78   event_logger.AddEvent("Setup");
79 
80   eliminator_->Eliminate(A, b, per_solve_options.D, lhs_.get(), rhs_.get());
81   event_logger.AddEvent("Eliminate");
82 
83   double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
84   const LinearSolver::Summary summary =
85       SolveReducedLinearSystem(reduced_solution);
86   event_logger.AddEvent("ReducedSolve");
87 
88   if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
89     eliminator_->BackSubstitute(A, b, per_solve_options.D, reduced_solution, x);
90     event_logger.AddEvent("BackSubstitute");
91   }
92 
93   return summary;
94 }
95 
96 // Initialize a BlockRandomAccessDenseMatrix to store the Schur
97 // complement.
InitStorage(const CompressedRowBlockStructure * bs)98 void DenseSchurComplementSolver::InitStorage(
99     const CompressedRowBlockStructure* bs) {
100   const int num_eliminate_blocks = options().elimination_groups[0];
101   const int num_col_blocks = bs->cols.size();
102 
103   vector<int> blocks(num_col_blocks - num_eliminate_blocks, 0);
104   for (int i = num_eliminate_blocks, j = 0;
105        i < num_col_blocks;
106        ++i, ++j) {
107     blocks[j] = bs->cols[i].size;
108   }
109 
110   set_lhs(new BlockRandomAccessDenseMatrix(blocks));
111   set_rhs(new double[lhs()->num_rows()]);
112 }
113 
114 // Solve the system Sx = r, assuming that the matrix S is stored in a
115 // BlockRandomAccessDenseMatrix. The linear system is solved using
116 // Eigen's Cholesky factorization.
117 LinearSolver::Summary
SolveReducedLinearSystem(double * solution)118 DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
119   LinearSolver::Summary summary;
120   summary.num_iterations = 0;
121   summary.termination_type = LINEAR_SOLVER_SUCCESS;
122   summary.message = "Success.";
123 
124   const BlockRandomAccessDenseMatrix* m =
125       down_cast<const BlockRandomAccessDenseMatrix*>(lhs());
126   const int num_rows = m->num_rows();
127 
128   // The case where there are no f blocks, and the system is block
129   // diagonal.
130   if (num_rows == 0) {
131     return summary;
132   }
133 
134   summary.num_iterations = 1;
135 
136   if (options().dense_linear_algebra_library_type == EIGEN) {
137     Eigen::LLT<Matrix, Eigen::Upper> llt =
138         ConstMatrixRef(m->values(), num_rows, num_rows)
139         .selfadjointView<Eigen::Upper>()
140         .llt();
141     if (llt.info() != Eigen::Success) {
142       summary.termination_type = LINEAR_SOLVER_FAILURE;
143       summary.message =
144           "Eigen failure. Unable to perform dense Cholesky factorization.";
145       return summary;
146     }
147 
148     VectorRef(solution, num_rows) = llt.solve(ConstVectorRef(rhs(), num_rows));
149   } else {
150     VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
151     summary.termination_type =
152         LAPACK::SolveInPlaceUsingCholesky(num_rows,
153                                           m->values(),
154                                           solution,
155                                           &summary.message);
156   }
157 
158   return summary;
159 }
160 
SparseSchurComplementSolver(const LinearSolver::Options & options)161 SparseSchurComplementSolver::SparseSchurComplementSolver(
162     const LinearSolver::Options& options)
163     : SchurComplementSolver(options),
164       factor_(NULL),
165       cxsparse_factor_(NULL) {
166 }
167 
~SparseSchurComplementSolver()168 SparseSchurComplementSolver::~SparseSchurComplementSolver() {
169   if (factor_ != NULL) {
170     ss_.Free(factor_);
171     factor_ = NULL;
172   }
173 
174   if (cxsparse_factor_ != NULL) {
175     cxsparse_.Free(cxsparse_factor_);
176     cxsparse_factor_ = NULL;
177   }
178 }
179 
180 // Determine the non-zero blocks in the Schur Complement matrix, and
181 // initialize a BlockRandomAccessSparseMatrix object.
InitStorage(const CompressedRowBlockStructure * bs)182 void SparseSchurComplementSolver::InitStorage(
183     const CompressedRowBlockStructure* bs) {
184   const int num_eliminate_blocks = options().elimination_groups[0];
185   const int num_col_blocks = bs->cols.size();
186   const int num_row_blocks = bs->rows.size();
187 
188   blocks_.resize(num_col_blocks - num_eliminate_blocks, 0);
189   for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) {
190     blocks_[i - num_eliminate_blocks] = bs->cols[i].size;
191   }
192 
193   set<pair<int, int> > block_pairs;
194   for (int i = 0; i < blocks_.size(); ++i) {
195     block_pairs.insert(make_pair(i, i));
196   }
197 
198   int r = 0;
199   while (r < num_row_blocks) {
200     int e_block_id = bs->rows[r].cells.front().block_id;
201     if (e_block_id >= num_eliminate_blocks) {
202       break;
203     }
204     vector<int> f_blocks;
205 
206     // Add to the chunk until the first block in the row is
207     // different than the one in the first row for the chunk.
208     for (; r < num_row_blocks; ++r) {
209       const CompressedRow& row = bs->rows[r];
210       if (row.cells.front().block_id != e_block_id) {
211         break;
212       }
213 
214       // Iterate over the blocks in the row, ignoring the first
215       // block since it is the one to be eliminated.
216       for (int c = 1; c < row.cells.size(); ++c) {
217         const Cell& cell = row.cells[c];
218         f_blocks.push_back(cell.block_id - num_eliminate_blocks);
219       }
220     }
221 
222     sort(f_blocks.begin(), f_blocks.end());
223     f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
224     for (int i = 0; i < f_blocks.size(); ++i) {
225       for (int j = i + 1; j < f_blocks.size(); ++j) {
226         block_pairs.insert(make_pair(f_blocks[i], f_blocks[j]));
227       }
228     }
229   }
230 
231   // Remaing rows do not contribute to the chunks and directly go
232   // into the schur complement via an outer product.
233   for (; r < num_row_blocks; ++r) {
234     const CompressedRow& row = bs->rows[r];
235     CHECK_GE(row.cells.front().block_id, num_eliminate_blocks);
236     for (int i = 0; i < row.cells.size(); ++i) {
237       int r_block1_id = row.cells[i].block_id - num_eliminate_blocks;
238       for (int j = 0; j < row.cells.size(); ++j) {
239         int r_block2_id = row.cells[j].block_id - num_eliminate_blocks;
240         if (r_block1_id <= r_block2_id) {
241           block_pairs.insert(make_pair(r_block1_id, r_block2_id));
242         }
243       }
244     }
245   }
246 
247   set_lhs(new BlockRandomAccessSparseMatrix(blocks_, block_pairs));
248   set_rhs(new double[lhs()->num_rows()]);
249 }
250 
251 LinearSolver::Summary
SolveReducedLinearSystem(double * solution)252 SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
253   switch (options().sparse_linear_algebra_library_type) {
254     case SUITE_SPARSE:
255       return SolveReducedLinearSystemUsingSuiteSparse(solution);
256     case CX_SPARSE:
257       return SolveReducedLinearSystemUsingCXSparse(solution);
258     case EIGEN_SPARSE:
259       return SolveReducedLinearSystemUsingEigen(solution);
260     default:
261       LOG(FATAL) << "Unknown sparse linear algebra library : "
262                  << options().sparse_linear_algebra_library_type;
263   }
264 
265   return LinearSolver::Summary();
266 }
267 
268 // Solve the system Sx = r, assuming that the matrix S is stored in a
269 // BlockRandomAccessSparseMatrix.  The linear system is solved using
270 // CHOLMOD's sparse cholesky factorization routines.
271 LinearSolver::Summary
SolveReducedLinearSystemUsingSuiteSparse(double * solution)272 SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
273     double* solution) {
274 #ifdef CERES_NO_SUITESPARSE
275 
276   LinearSolver::Summary summary;
277   summary.num_iterations = 0;
278   summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
279   summary.message = "Ceres was not built with SuiteSparse support. "
280       "Therefore, SPARSE_SCHUR cannot be used with SUITE_SPARSE";
281   return summary;
282 
283 #else
284 
285   LinearSolver::Summary summary;
286   summary.num_iterations = 0;
287   summary.termination_type = LINEAR_SOLVER_SUCCESS;
288   summary.message = "Success.";
289 
290   TripletSparseMatrix* tsm =
291       const_cast<TripletSparseMatrix*>(
292           down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
293   const int num_rows = tsm->num_rows();
294 
295   // The case where there are no f blocks, and the system is block
296   // diagonal.
297   if (num_rows == 0) {
298     return summary;
299   }
300 
301   summary.num_iterations = 1;
302   cholmod_sparse* cholmod_lhs = NULL;
303   if (options().use_postordering) {
304     // If we are going to do a full symbolic analysis of the schur
305     // complement matrix from scratch and not rely on the
306     // pre-ordering, then the fastest path in cholmod_factorize is the
307     // one corresponding to upper triangular matrices.
308 
309     // Create a upper triangular symmetric matrix.
310     cholmod_lhs = ss_.CreateSparseMatrix(tsm);
311     cholmod_lhs->stype = 1;
312 
313     if (factor_ == NULL) {
314       factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs,
315                                          blocks_,
316                                          blocks_,
317                                          &summary.message);
318     }
319   } else {
320     // If we are going to use the natural ordering (i.e. rely on the
321     // pre-ordering computed by solver_impl.cc), then the fastest
322     // path in cholmod_factorize is the one corresponding to lower
323     // triangular matrices.
324 
325     // Create a upper triangular symmetric matrix.
326     cholmod_lhs = ss_.CreateSparseMatrixTranspose(tsm);
327     cholmod_lhs->stype = -1;
328 
329     if (factor_ == NULL) {
330       factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(cholmod_lhs,
331                                                        &summary.message);
332     }
333   }
334 
335   if (factor_ == NULL) {
336     ss_.Free(cholmod_lhs);
337     summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
338     // No need to set message as it has already been set by the
339     // symbolic analysis routines above.
340     return summary;
341   }
342 
343   summary.termination_type =
344     ss_.Cholesky(cholmod_lhs, factor_, &summary.message);
345 
346   ss_.Free(cholmod_lhs);
347 
348   if (summary.termination_type != LINEAR_SOLVER_SUCCESS) {
349     // No need to set message as it has already been set by the
350     // numeric factorization routine above.
351     return summary;
352   }
353 
354   cholmod_dense*  cholmod_rhs =
355       ss_.CreateDenseVector(const_cast<double*>(rhs()), num_rows, num_rows);
356   cholmod_dense* cholmod_solution = ss_.Solve(factor_,
357                                               cholmod_rhs,
358                                               &summary.message);
359   ss_.Free(cholmod_rhs);
360 
361   if (cholmod_solution == NULL) {
362     summary.message =
363         "SuiteSparse failure. Unable to perform triangular solve.";
364     summary.termination_type = LINEAR_SOLVER_FAILURE;
365     return summary;
366   }
367 
368   VectorRef(solution, num_rows)
369       = VectorRef(static_cast<double*>(cholmod_solution->x), num_rows);
370   ss_.Free(cholmod_solution);
371   return summary;
372 #endif  // CERES_NO_SUITESPARSE
373 }
374 
375 // Solve the system Sx = r, assuming that the matrix S is stored in a
376 // BlockRandomAccessSparseMatrix.  The linear system is solved using
377 // CXSparse's sparse cholesky factorization routines.
378 LinearSolver::Summary
SolveReducedLinearSystemUsingCXSparse(double * solution)379 SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
380     double* solution) {
381 #ifdef CERES_NO_CXSPARSE
382 
383   LinearSolver::Summary summary;
384   summary.num_iterations = 0;
385   summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
386   summary.message = "Ceres was not built with CXSparse support. "
387       "Therefore, SPARSE_SCHUR cannot be used with CX_SPARSE";
388   return summary;
389 
390 #else
391 
392   LinearSolver::Summary summary;
393   summary.num_iterations = 0;
394   summary.termination_type = LINEAR_SOLVER_SUCCESS;
395   summary.message = "Success.";
396 
397   // Extract the TripletSparseMatrix that is used for actually storing S.
398   TripletSparseMatrix* tsm =
399       const_cast<TripletSparseMatrix*>(
400           down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
401   const int num_rows = tsm->num_rows();
402 
403   // The case where there are no f blocks, and the system is block
404   // diagonal.
405   if (num_rows == 0) {
406     return summary;
407   }
408 
409   cs_di* lhs = CHECK_NOTNULL(cxsparse_.CreateSparseMatrix(tsm));
410   VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
411 
412   // Compute symbolic factorization if not available.
413   if (cxsparse_factor_ == NULL) {
414     cxsparse_factor_ = cxsparse_.BlockAnalyzeCholesky(lhs, blocks_, blocks_);
415   }
416 
417   if (cxsparse_factor_ == NULL) {
418     summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
419     summary.message =
420         "CXSparse failure. Unable to find symbolic factorization.";
421   } else if (!cxsparse_.SolveCholesky(lhs, cxsparse_factor_, solution)) {
422     summary.termination_type = LINEAR_SOLVER_FAILURE;
423     summary.message = "CXSparse::SolveCholesky failed.";
424   }
425 
426   cxsparse_.Free(lhs);
427   return summary;
428 #endif  // CERES_NO_CXPARSE
429 }
430 
431 // Solve the system Sx = r, assuming that the matrix S is stored in a
432 // BlockRandomAccessSparseMatrix.  The linear system is solved using
433 // Eigen's sparse cholesky factorization routines.
434 LinearSolver::Summary
SolveReducedLinearSystemUsingEigen(double * solution)435 SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen(
436     double* solution) {
437 #ifndef CERES_USE_EIGEN_SPARSE
438 
439   LinearSolver::Summary summary;
440   summary.num_iterations = 0;
441   summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
442   summary.message =
443       "SPARSE_SCHUR cannot be used with EIGEN_SPARSE. "
444       "Ceres was not built with support for "
445       "Eigen's SimplicialLDLT decomposition. "
446       "This requires enabling building with -DEIGENSPARSE=ON.";
447   return summary;
448 
449 #else
450   EventLogger event_logger("SchurComplementSolver::EigenSolve");
451   LinearSolver::Summary summary;
452   summary.num_iterations = 0;
453   summary.termination_type = LINEAR_SOLVER_SUCCESS;
454   summary.message = "Success.";
455 
456   // Extract the TripletSparseMatrix that is used for actually storing S.
457   TripletSparseMatrix* tsm =
458       const_cast<TripletSparseMatrix*>(
459           down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix());
460   const int num_rows = tsm->num_rows();
461 
462   // The case where there are no f blocks, and the system is block
463   // diagonal.
464   if (num_rows == 0) {
465     return summary;
466   }
467 
468   // This is an upper triangular matrix.
469   CompressedRowSparseMatrix crsm(*tsm);
470   // Map this to a column major, lower triangular matrix.
471   Eigen::MappedSparseMatrix<double, Eigen::ColMajor> eigen_lhs(
472       crsm.num_rows(),
473       crsm.num_rows(),
474       crsm.num_nonzeros(),
475       crsm.mutable_rows(),
476       crsm.mutable_cols(),
477       crsm.mutable_values());
478   event_logger.AddEvent("ToCompressedRowSparseMatrix");
479 
480   // Compute symbolic factorization if one does not exist.
481   if (simplicial_ldlt_.get() == NULL) {
482     simplicial_ldlt_.reset(new SimplicialLDLT);
483     // This ordering is quite bad. The scalar ordering produced by the
484     // AMD algorithm is quite bad and can be an order of magnitude
485     // worse than the one computed using the block version of the
486     // algorithm.
487     simplicial_ldlt_->analyzePattern(eigen_lhs);
488     event_logger.AddEvent("Analysis");
489     if (simplicial_ldlt_->info() != Eigen::Success) {
490       summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
491       summary.message =
492           "Eigen failure. Unable to find symbolic factorization.";
493       return summary;
494     }
495   }
496 
497   simplicial_ldlt_->factorize(eigen_lhs);
498   event_logger.AddEvent("Factorize");
499   if (simplicial_ldlt_->info() != Eigen::Success) {
500     summary.termination_type = LINEAR_SOLVER_FAILURE;
501     summary.message = "Eigen failure. Unable to find numeric factoriztion.";
502     return summary;
503   }
504 
505   VectorRef(solution, num_rows) =
506       simplicial_ldlt_->solve(ConstVectorRef(rhs(), num_rows));
507   event_logger.AddEvent("Solve");
508   if (simplicial_ldlt_->info() != Eigen::Success) {
509     summary.termination_type = LINEAR_SOLVER_FAILURE;
510     summary.message = "Eigen failure. Unable to do triangular solve.";
511   }
512 
513   return summary;
514 #endif  // CERES_USE_EIGEN_SPARSE
515 }
516 
517 }  // namespace internal
518 }  // namespace ceres
519