1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2014 Google Inc. All rights reserved.
3 // http://code.google.com/p/ceres-solver/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 
31 #ifndef CERES_PUBLIC_SOLVER_H_
32 #define CERES_PUBLIC_SOLVER_H_
33 
34 #include <cmath>
35 #include <string>
36 #include <vector>
37 #include "ceres/crs_matrix.h"
38 #include "ceres/internal/macros.h"
39 #include "ceres/internal/port.h"
40 #include "ceres/iteration_callback.h"
41 #include "ceres/ordered_groups.h"
42 #include "ceres/types.h"
43 #include "ceres/internal/disable_warnings.h"
44 
45 namespace ceres {
46 
47 class Problem;
48 
49 // Interface for non-linear least squares solvers.
50 class CERES_EXPORT Solver {
51  public:
52   virtual ~Solver();
53 
54   // The options structure contains, not surprisingly, options that control how
55   // the solver operates. The defaults should be suitable for a wide range of
56   // problems; however, better performance is often obtainable with tweaking.
57   //
58   // The constants are defined inside types.h
59   struct CERES_EXPORT Options {
60     // Default constructor that sets up a generic sparse problem.
OptionsOptions61     Options() {
62       minimizer_type = TRUST_REGION;
63       line_search_direction_type = LBFGS;
64       line_search_type = WOLFE;
65       nonlinear_conjugate_gradient_type = FLETCHER_REEVES;
66       max_lbfgs_rank = 20;
67       use_approximate_eigenvalue_bfgs_scaling = false;
68       line_search_interpolation_type = CUBIC;
69       min_line_search_step_size = 1e-9;
70       line_search_sufficient_function_decrease = 1e-4;
71       max_line_search_step_contraction = 1e-3;
72       min_line_search_step_contraction = 0.6;
73       max_num_line_search_step_size_iterations = 20;
74       max_num_line_search_direction_restarts = 5;
75       line_search_sufficient_curvature_decrease = 0.9;
76       max_line_search_step_expansion = 10.0;
77       trust_region_strategy_type = LEVENBERG_MARQUARDT;
78       dogleg_type = TRADITIONAL_DOGLEG;
79       use_nonmonotonic_steps = false;
80       max_consecutive_nonmonotonic_steps = 5;
81       max_num_iterations = 50;
82       max_solver_time_in_seconds = 1e9;
83       num_threads = 1;
84       initial_trust_region_radius = 1e4;
85       max_trust_region_radius = 1e16;
86       min_trust_region_radius = 1e-32;
87       min_relative_decrease = 1e-3;
88       min_lm_diagonal = 1e-6;
89       max_lm_diagonal = 1e32;
90       max_num_consecutive_invalid_steps = 5;
91       function_tolerance = 1e-6;
92       gradient_tolerance = 1e-10;
93       parameter_tolerance = 1e-8;
94 
95 #if defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CXSPARSE) && !defined(CERES_ENABLE_LGPL_CODE)
96       linear_solver_type = DENSE_QR;
97 #else
98       linear_solver_type = SPARSE_NORMAL_CHOLESKY;
99 #endif
100 
101       preconditioner_type = JACOBI;
102       visibility_clustering_type = CANONICAL_VIEWS;
103       dense_linear_algebra_library_type = EIGEN;
104 
105       // Choose a default sparse linear algebra library in the order:
106       //
107       //   SUITE_SPARSE > CX_SPARSE > EIGEN_SPARSE
108 #if !defined(CERES_NO_SUITESPARSE)
109       sparse_linear_algebra_library_type = SUITE_SPARSE;
110 #else
111   #if !defined(CERES_NO_CXSPARSE)
112       sparse_linear_algebra_library_type = CX_SPARSE;
113   #else
114     #if defined(CERES_USE_EIGEN_SPARSE)
115       sparse_linear_algebra_library_type = EIGEN_SPARSE;
116     #endif
117   #endif
118 #endif
119 
120       num_linear_solver_threads = 1;
121       use_postordering = false;
122       dynamic_sparsity = false;
123       min_linear_solver_iterations = 1;
124       max_linear_solver_iterations = 500;
125       eta = 1e-1;
126       jacobi_scaling = true;
127       use_inner_iterations = false;
128       inner_iteration_tolerance = 1e-3;
129       logging_type = PER_MINIMIZER_ITERATION;
130       minimizer_progress_to_stdout = false;
131       trust_region_problem_dump_directory = "/tmp";
132       trust_region_problem_dump_format_type = TEXTFILE;
133       check_gradients = false;
134       gradient_check_relative_precision = 1e-8;
135       numeric_derivative_relative_step_size = 1e-6;
136       update_state_every_iteration = false;
137     }
138 
139     // Returns true if the options struct has a valid
140     // configuration. Returns false otherwise, and fills in *error
141     // with a message describing the problem.
142     bool IsValid(string* error) const;
143 
144     // Minimizer options ----------------------------------------
145 
146     // Ceres supports the two major families of optimization strategies -
147     // Trust Region and Line Search.
148     //
149     // 1. The line search approach first finds a descent direction
150     // along which the objective function will be reduced and then
151     // computes a step size that decides how far should move along
152     // that direction. The descent direction can be computed by
153     // various methods, such as gradient descent, Newton's method and
154     // Quasi-Newton method. The step size can be determined either
155     // exactly or inexactly.
156     //
157     // 2. The trust region approach approximates the objective
158     // function using using a model function (often a quadratic) over
159     // a subset of the search space known as the trust region. If the
160     // model function succeeds in minimizing the true objective
161     // function the trust region is expanded; conversely, otherwise it
162     // is contracted and the model optimization problem is solved
163     // again.
164     //
165     // Trust region methods are in some sense dual to line search methods:
166     // trust region methods first choose a step size (the size of the
167     // trust region) and then a step direction while line search methods
168     // first choose a step direction and then a step size.
169     MinimizerType minimizer_type;
170 
171     LineSearchDirectionType line_search_direction_type;
172     LineSearchType line_search_type;
173     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
174 
175     // The LBFGS hessian approximation is a low rank approximation to
176     // the inverse of the Hessian matrix. The rank of the
177     // approximation determines (linearly) the space and time
178     // complexity of using the approximation. Higher the rank, the
179     // better is the quality of the approximation. The increase in
180     // quality is however is bounded for a number of reasons.
181     //
182     // 1. The method only uses secant information and not actual
183     // derivatives.
184     //
185     // 2. The Hessian approximation is constrained to be positive
186     // definite.
187     //
188     // So increasing this rank to a large number will cost time and
189     // space complexity without the corresponding increase in solution
190     // quality. There are no hard and fast rules for choosing the
191     // maximum rank. The best choice usually requires some problem
192     // specific experimentation.
193     //
194     // For more theoretical and implementation details of the LBFGS
195     // method, please see:
196     //
197     // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with
198     // Limited Storage". Mathematics of Computation 35 (151): 773–782.
199     int max_lbfgs_rank;
200 
201     // As part of the (L)BFGS update step (BFGS) / right-multiply step (L-BFGS),
202     // the initial inverse Hessian approximation is taken to be the Identity.
203     // However, Oren showed that using instead I * \gamma, where \gamma is
204     // chosen to approximate an eigenvalue of the true inverse Hessian can
205     // result in improved convergence in a wide variety of cases. Setting
206     // use_approximate_eigenvalue_bfgs_scaling to true enables this scaling.
207     //
208     // It is important to note that approximate eigenvalue scaling does not
209     // always improve convergence, and that it can in fact significantly degrade
210     // performance for certain classes of problem, which is why it is disabled
211     // by default.  In particular it can degrade performance when the
212     // sensitivity of the problem to different parameters varies significantly,
213     // as in this case a single scalar factor fails to capture this variation
214     // and detrimentally downscales parts of the jacobian approximation which
215     // correspond to low-sensitivity parameters. It can also reduce the
216     // robustness of the solution to errors in the jacobians.
217     //
218     // Oren S.S., Self-scaling variable metric (SSVM) algorithms
219     // Part II: Implementation and experiments, Management Science,
220     // 20(5), 863-874, 1974.
221     bool use_approximate_eigenvalue_bfgs_scaling;
222 
223     // Degree of the polynomial used to approximate the objective
224     // function. Valid values are BISECTION, QUADRATIC and CUBIC.
225     //
226     // BISECTION corresponds to pure backtracking search with no
227     // interpolation.
228     LineSearchInterpolationType line_search_interpolation_type;
229 
230     // If during the line search, the step_size falls below this
231     // value, it is truncated to zero.
232     double min_line_search_step_size;
233 
234     // Line search parameters.
235 
236     // Solving the line search problem exactly is computationally
237     // prohibitive. Fortunately, line search based optimization
238     // algorithms can still guarantee convergence if instead of an
239     // exact solution, the line search algorithm returns a solution
240     // which decreases the value of the objective function
241     // sufficiently. More precisely, we are looking for a step_size
242     // s.t.
243     //
244     //   f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
245     //
246     double line_search_sufficient_function_decrease;
247 
248     // In each iteration of the line search,
249     //
250     //  new_step_size >= max_line_search_step_contraction * step_size
251     //
252     // Note that by definition, for contraction:
253     //
254     //  0 < max_step_contraction < min_step_contraction < 1
255     //
256     double max_line_search_step_contraction;
257 
258     // In each iteration of the line search,
259     //
260     //  new_step_size <= min_line_search_step_contraction * step_size
261     //
262     // Note that by definition, for contraction:
263     //
264     //  0 < max_step_contraction < min_step_contraction < 1
265     //
266     double min_line_search_step_contraction;
267 
268     // Maximum number of trial step size iterations during each line search,
269     // if a step size satisfying the search conditions cannot be found within
270     // this number of trials, the line search will terminate.
271     int max_num_line_search_step_size_iterations;
272 
273     // Maximum number of restarts of the line search direction algorithm before
274     // terminating the optimization. Restarts of the line search direction
275     // algorithm occur when the current algorithm fails to produce a new descent
276     // direction. This typically indicates a numerical failure, or a breakdown
277     // in the validity of the approximations used.
278     int max_num_line_search_direction_restarts;
279 
280     // The strong Wolfe conditions consist of the Armijo sufficient
281     // decrease condition, and an additional requirement that the
282     // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
283     // conditions) of the gradient along the search direction
284     // decreases sufficiently. Precisely, this second condition
285     // is that we seek a step_size s.t.
286     //
287     //   |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
288     //
289     // Where f() is the line search objective and f'() is the derivative
290     // of f w.r.t step_size (d f / d step_size).
291     double line_search_sufficient_curvature_decrease;
292 
293     // During the bracketing phase of the Wolfe search, the step size is
294     // increased until either a point satisfying the Wolfe conditions is
295     // found, or an upper bound for a bracket containing a point satisfying
296     // the conditions is found.  Precisely, at each iteration of the
297     // expansion:
298     //
299     //   new_step_size <= max_step_expansion * step_size.
300     //
301     // By definition for expansion, max_step_expansion > 1.0.
302     double max_line_search_step_expansion;
303 
304     TrustRegionStrategyType trust_region_strategy_type;
305 
306     // Type of dogleg strategy to use.
307     DoglegType dogleg_type;
308 
309     // The classical trust region methods are descent methods, in that
310     // they only accept a point if it strictly reduces the value of
311     // the objective function.
312     //
313     // Relaxing this requirement allows the algorithm to be more
314     // efficient in the long term at the cost of some local increase
315     // in the value of the objective function.
316     //
317     // This is because allowing for non-decreasing objective function
318     // values in a princpled manner allows the algorithm to "jump over
319     // boulders" as the method is not restricted to move into narrow
320     // valleys while preserving its convergence properties.
321     //
322     // Setting use_nonmonotonic_steps to true enables the
323     // non-monotonic trust region algorithm as described by Conn,
324     // Gould & Toint in "Trust Region Methods", Section 10.1.
325     //
326     // The parameter max_consecutive_nonmonotonic_steps controls the
327     // window size used by the step selection algorithm to accept
328     // non-monotonic steps.
329     //
330     // Even though the value of the objective function may be larger
331     // than the minimum value encountered over the course of the
332     // optimization, the final parameters returned to the user are the
333     // ones corresponding to the minimum cost over all iterations.
334     bool use_nonmonotonic_steps;
335     int max_consecutive_nonmonotonic_steps;
336 
337     // Maximum number of iterations for the minimizer to run for.
338     int max_num_iterations;
339 
340     // Maximum time for which the minimizer should run for.
341     double max_solver_time_in_seconds;
342 
343     // Number of threads used by Ceres for evaluating the cost and
344     // jacobians.
345     int num_threads;
346 
347     // Trust region minimizer settings.
348     double initial_trust_region_radius;
349     double max_trust_region_radius;
350 
351     // Minimizer terminates when the trust region radius becomes
352     // smaller than this value.
353     double min_trust_region_radius;
354 
355     // Lower bound for the relative decrease before a step is
356     // accepted.
357     double min_relative_decrease;
358 
359     // For the Levenberg-Marquadt algorithm, the scaled diagonal of
360     // the normal equations J'J is used to control the size of the
361     // trust region. Extremely small and large values along the
362     // diagonal can make this regularization scheme
363     // fail. max_lm_diagonal and min_lm_diagonal, clamp the values of
364     // diag(J'J) from above and below. In the normal course of
365     // operation, the user should not have to modify these parameters.
366     double min_lm_diagonal;
367     double max_lm_diagonal;
368 
369     // Sometimes due to numerical conditioning problems or linear
370     // solver flakiness, the trust region strategy may return a
371     // numerically invalid step that can be fixed by reducing the
372     // trust region size. So the TrustRegionMinimizer allows for a few
373     // successive invalid steps before it declares NUMERICAL_FAILURE.
374     int max_num_consecutive_invalid_steps;
375 
376     // Minimizer terminates when
377     //
378     //   (new_cost - old_cost) < function_tolerance * old_cost;
379     //
380     double function_tolerance;
381 
382     // Minimizer terminates when
383     //
384     //   max_i |x - Project(Plus(x, -g(x))| < gradient_tolerance
385     //
386     // This value should typically be 1e-4 * function_tolerance.
387     double gradient_tolerance;
388 
389     // Minimizer terminates when
390     //
391     //   |step|_2 <= parameter_tolerance * ( |x|_2 +  parameter_tolerance)
392     //
393     double parameter_tolerance;
394 
395     // Linear least squares solver options -------------------------------------
396 
397     LinearSolverType linear_solver_type;
398 
399     // Type of preconditioner to use with the iterative linear solvers.
400     PreconditionerType preconditioner_type;
401 
402     // Type of clustering algorithm to use for visibility based
403     // preconditioning. This option is used only when the
404     // preconditioner_type is CLUSTER_JACOBI or CLUSTER_TRIDIAGONAL.
405     VisibilityClusteringType visibility_clustering_type;
406 
407     // Ceres supports using multiple dense linear algebra libraries
408     // for dense matrix factorizations. Currently EIGEN and LAPACK are
409     // the valid choices. EIGEN is always available, LAPACK refers to
410     // the system BLAS + LAPACK library which may or may not be
411     // available.
412     //
413     // This setting affects the DENSE_QR, DENSE_NORMAL_CHOLESKY and
414     // DENSE_SCHUR solvers. For small to moderate sized probem EIGEN
415     // is a fine choice but for large problems, an optimized LAPACK +
416     // BLAS implementation can make a substantial difference in
417     // performance.
418     DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
419 
420     // Ceres supports using multiple sparse linear algebra libraries
421     // for sparse matrix ordering and factorizations. Currently,
422     // SUITE_SPARSE and CX_SPARSE are the valid choices, depending on
423     // whether they are linked into Ceres at build time.
424     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
425 
426     // Number of threads used by Ceres to solve the Newton
427     // step. Currently only the SPARSE_SCHUR solver is capable of
428     // using this setting.
429     int num_linear_solver_threads;
430 
431     // The order in which variables are eliminated in a linear solver
432     // can have a significant of impact on the efficiency and accuracy
433     // of the method. e.g., when doing sparse Cholesky factorization,
434     // there are matrices for which a good ordering will give a
435     // Cholesky factor with O(n) storage, where as a bad ordering will
436     // result in an completely dense factor.
437     //
438     // Ceres allows the user to provide varying amounts of hints to
439     // the solver about the variable elimination ordering to use. This
440     // can range from no hints, where the solver is free to decide the
441     // best possible ordering based on the user's choices like the
442     // linear solver being used, to an exact order in which the
443     // variables should be eliminated, and a variety of possibilities
444     // in between.
445     //
446     // Instances of the ParameterBlockOrdering class are used to
447     // communicate this information to Ceres.
448     //
449     // Formally an ordering is an ordered partitioning of the
450     // parameter blocks, i.e, each parameter block belongs to exactly
451     // one group, and each group has a unique non-negative integer
452     // associated with it, that determines its order in the set of
453     // groups.
454     //
455     // Given such an ordering, Ceres ensures that the parameter blocks in
456     // the lowest numbered group are eliminated first, and then the
457     // parmeter blocks in the next lowest numbered group and so on. Within
458     // each group, Ceres is free to order the parameter blocks as it
459     // chooses.
460     //
461     // If NULL, then all parameter blocks are assumed to be in the
462     // same group and the solver is free to decide the best
463     // ordering.
464     //
465     // e.g. Consider the linear system
466     //
467     //   x + y = 3
468     //   2x + 3y = 7
469     //
470     // There are two ways in which it can be solved. First eliminating x
471     // from the two equations, solving for y and then back substituting
472     // for x, or first eliminating y, solving for x and back substituting
473     // for y. The user can construct three orderings here.
474     //
475     //   {0: x}, {1: y} - eliminate x first.
476     //   {0: y}, {1: x} - eliminate y first.
477     //   {0: x, y}      - Solver gets to decide the elimination order.
478     //
479     // Thus, to have Ceres determine the ordering automatically using
480     // heuristics, put all the variables in group 0 and to control the
481     // ordering for every variable, create groups 0..N-1, one per
482     // variable, in the desired order.
483     //
484     // Bundle Adjustment
485     // -----------------
486     //
487     // A particular case of interest is bundle adjustment, where the user
488     // has two options. The default is to not specify an ordering at all,
489     // the solver will see that the user wants to use a Schur type solver
490     // and figure out the right elimination ordering.
491     //
492     // But if the user already knows what parameter blocks are points and
493     // what are cameras, they can save preprocessing time by partitioning
494     // the parameter blocks into two groups, one for the points and one
495     // for the cameras, where the group containing the points has an id
496     // smaller than the group containing cameras.
497     shared_ptr<ParameterBlockOrdering> linear_solver_ordering;
498 
499     // Sparse Cholesky factorization algorithms use a fill-reducing
500     // ordering to permute the columns of the Jacobian matrix. There
501     // are two ways of doing this.
502 
503     // 1. Compute the Jacobian matrix in some order and then have the
504     //    factorization algorithm permute the columns of the Jacobian.
505 
506     // 2. Compute the Jacobian with its columns already permuted.
507 
508     // The first option incurs a significant memory penalty. The
509     // factorization algorithm has to make a copy of the permuted
510     // Jacobian matrix, thus Ceres pre-permutes the columns of the
511     // Jacobian matrix and generally speaking, there is no performance
512     // penalty for doing so.
513 
514     // In some rare cases, it is worth using a more complicated
515     // reordering algorithm which has slightly better runtime
516     // performance at the expense of an extra copy of the Jacobian
517     // matrix. Setting use_postordering to true enables this tradeoff.
518     bool use_postordering;
519 
520     // Some non-linear least squares problems are symbolically dense but
521     // numerically sparse. i.e. at any given state only a small number
522     // of jacobian entries are non-zero, but the position and number of
523     // non-zeros is different depending on the state. For these problems
524     // it can be useful to factorize the sparse jacobian at each solver
525     // iteration instead of including all of the zero entries in a single
526     // general factorization.
527     //
528     // If your problem does not have this property (or you do not know),
529     // then it is probably best to keep this false, otherwise it will
530     // likely lead to worse performance.
531 
532     // This settings affects the SPARSE_NORMAL_CHOLESKY solver.
533     bool dynamic_sparsity;
534 
535     // Some non-linear least squares problems have additional
536     // structure in the way the parameter blocks interact that it is
537     // beneficial to modify the way the trust region step is computed.
538     //
539     // e.g., consider the following regression problem
540     //
541     //   y = a_1 exp(b_1 x) + a_2 exp(b_3 x^2 + c_1)
542     //
543     // Given a set of pairs{(x_i, y_i)}, the user wishes to estimate
544     // a_1, a_2, b_1, b_2, and c_1.
545     //
546     // Notice here that the expression on the left is linear in a_1
547     // and a_2, and given any value for b_1, b_2 and c_1, it is
548     // possible to use linear regression to estimate the optimal
549     // values of a_1 and a_2. Indeed, its possible to analytically
550     // eliminate the variables a_1 and a_2 from the problem all
551     // together. Problems like these are known as separable least
552     // squares problem and the most famous algorithm for solving them
553     // is the Variable Projection algorithm invented by Golub &
554     // Pereyra.
555     //
556     // Similar structure can be found in the matrix factorization with
557     // missing data problem. There the corresponding algorithm is
558     // known as Wiberg's algorithm.
559     //
560     // Ruhe & Wedin (Algorithms for Separable Nonlinear Least Squares
561     // Problems, SIAM Reviews, 22(3), 1980) present an analyis of
562     // various algorithms for solving separable non-linear least
563     // squares problems and refer to "Variable Projection" as
564     // Algorithm I in their paper.
565     //
566     // Implementing Variable Projection is tedious and expensive, and
567     // they present a simpler algorithm, which they refer to as
568     // Algorithm II, where once the Newton/Trust Region step has been
569     // computed for the whole problem (a_1, a_2, b_1, b_2, c_1) and
570     // additional optimization step is performed to estimate a_1 and
571     // a_2 exactly.
572     //
573     // This idea can be generalized to cases where the residual is not
574     // linear in a_1 and a_2, i.e., Solve for the trust region step
575     // for the full problem, and then use it as the starting point to
576     // further optimize just a_1 and a_2. For the linear case, this
577     // amounts to doing a single linear least squares solve. For
578     // non-linear problems, any method for solving the a_1 and a_2
579     // optimization problems will do. The only constraint on a_1 and
580     // a_2 is that they do not co-occur in any residual block.
581     //
582     // This idea can be further generalized, by not just optimizing
583     // (a_1, a_2), but decomposing the graph corresponding to the
584     // Hessian matrix's sparsity structure in a collection of
585     // non-overlapping independent sets and optimizing each of them.
586     //
587     // Setting "use_inner_iterations" to true enables the use of this
588     // non-linear generalization of Ruhe & Wedin's Algorithm II.  This
589     // version of Ceres has a higher iteration complexity, but also
590     // displays better convergence behaviour per iteration. Setting
591     // Solver::Options::num_threads to the maximum number possible is
592     // highly recommended.
593     bool use_inner_iterations;
594 
595     // If inner_iterations is true, then the user has two choices.
596     //
597     // 1. Let the solver heuristically decide which parameter blocks
598     //    to optimize in each inner iteration. To do this leave
599     //    Solver::Options::inner_iteration_ordering untouched.
600     //
601     // 2. Specify a collection of of ordered independent sets. Where
602     //    the lower numbered groups are optimized before the higher
603     //    number groups. Each group must be an independent set. Not
604     //    all parameter blocks need to be present in the ordering.
605     shared_ptr<ParameterBlockOrdering> inner_iteration_ordering;
606 
607     // Generally speaking, inner iterations make significant progress
608     // in the early stages of the solve and then their contribution
609     // drops down sharply, at which point the time spent doing inner
610     // iterations is not worth it.
611     //
612     // Once the relative decrease in the objective function due to
613     // inner iterations drops below inner_iteration_tolerance, the use
614     // of inner iterations in subsequent trust region minimizer
615     // iterations is disabled.
616     double inner_iteration_tolerance;
617 
618     // Minimum number of iterations for which the linear solver should
619     // run, even if the convergence criterion is satisfied.
620     int min_linear_solver_iterations;
621 
622     // Maximum number of iterations for which the linear solver should
623     // run. If the solver does not converge in less than
624     // max_linear_solver_iterations, then it returns MAX_ITERATIONS,
625     // as its termination type.
626     int max_linear_solver_iterations;
627 
628     // Forcing sequence parameter. The truncated Newton solver uses
629     // this number to control the relative accuracy with which the
630     // Newton step is computed.
631     //
632     // This constant is passed to ConjugateGradientsSolver which uses
633     // it to terminate the iterations when
634     //
635     //  (Q_i - Q_{i-1})/Q_i < eta/i
636     double eta;
637 
638     // Normalize the jacobian using Jacobi scaling before calling
639     // the linear least squares solver.
640     bool jacobi_scaling;
641 
642     // Logging options ---------------------------------------------------------
643 
644     LoggingType logging_type;
645 
646     // By default the Minimizer progress is logged to VLOG(1), which
647     // is sent to STDERR depending on the vlog level. If this flag is
648     // set to true, and logging_type is not SILENT, the logging output
649     // is sent to STDOUT.
650     bool minimizer_progress_to_stdout;
651 
652     // List of iterations at which the minimizer should dump the trust
653     // region problem. Useful for testing and benchmarking. If empty
654     // (default), no problems are dumped.
655     vector<int> trust_region_minimizer_iterations_to_dump;
656 
657     // Directory to which the problems should be written to. Should be
658     // non-empty if trust_region_minimizer_iterations_to_dump is
659     // non-empty and trust_region_problem_dump_format_type is not
660     // CONSOLE.
661     string trust_region_problem_dump_directory;
662     DumpFormatType trust_region_problem_dump_format_type;
663 
664     // Finite differences options ----------------------------------------------
665 
666     // Check all jacobians computed by each residual block with finite
667     // differences. This is expensive since it involves computing the
668     // derivative by normal means (e.g. user specified, autodiff,
669     // etc), then also computing it using finite differences. The
670     // results are compared, and if they differ substantially, details
671     // are printed to the log.
672     bool check_gradients;
673 
674     // Relative precision to check for in the gradient checker. If the
675     // relative difference between an element in a jacobian exceeds
676     // this number, then the jacobian for that cost term is dumped.
677     double gradient_check_relative_precision;
678 
679     // Relative shift used for taking numeric derivatives. For finite
680     // differencing, each dimension is evaluated at slightly shifted
681     // values; for the case of central difference, this is what gets
682     // evaluated:
683     //
684     //   delta = numeric_derivative_relative_step_size;
685     //   f_initial  = f(x)
686     //   f_forward  = f((1 + delta) * x)
687     //   f_backward = f((1 - delta) * x)
688     //
689     // The finite differencing is done along each dimension. The
690     // reason to use a relative (rather than absolute) step size is
691     // that this way, numeric differentation works for functions where
692     // the arguments are typically large (e.g. 1e9) and when the
693     // values are small (e.g. 1e-5). It is possible to construct
694     // "torture cases" which break this finite difference heuristic,
695     // but they do not come up often in practice.
696     //
697     // TODO(keir): Pick a smarter number than the default above! In
698     // theory a good choice is sqrt(eps) * x, which for doubles means
699     // about 1e-8 * x. However, I have found this number too
700     // optimistic. This number should be exposed for users to change.
701     double numeric_derivative_relative_step_size;
702 
703     // If true, the user's parameter blocks are updated at the end of
704     // every Minimizer iteration, otherwise they are updated when the
705     // Minimizer terminates. This is useful if, for example, the user
706     // wishes to visualize the state of the optimization every
707     // iteration.
708     bool update_state_every_iteration;
709 
710     // Callbacks that are executed at the end of each iteration of the
711     // Minimizer. An iteration may terminate midway, either due to
712     // numerical failures or because one of the convergence tests has
713     // been satisfied. In this case none of the callbacks are
714     // executed.
715 
716     // Callbacks are executed in the order that they are specified in
717     // this vector. By default, parameter blocks are updated only at
718     // the end of the optimization, i.e when the Minimizer
719     // terminates. This behaviour is controlled by
720     // update_state_every_variable. If the user wishes to have access
721     // to the update parameter blocks when his/her callbacks are
722     // executed, then set update_state_every_iteration to true.
723     //
724     // The solver does NOT take ownership of these pointers.
725     vector<IterationCallback*> callbacks;
726   };
727 
728   struct CERES_EXPORT Summary {
729     Summary();
730 
731     // A brief one line description of the state of the solver after
732     // termination.
733     string BriefReport() const;
734 
735     // A full multiline description of the state of the solver after
736     // termination.
737     string FullReport() const;
738 
739     bool IsSolutionUsable() const;
740 
741     // Minimizer summary -------------------------------------------------
742     MinimizerType minimizer_type;
743 
744     TerminationType termination_type;
745 
746     // Reason why the solver terminated.
747     string message;
748 
749     // Cost of the problem (value of the objective function) before
750     // the optimization.
751     double initial_cost;
752 
753     // Cost of the problem (value of the objective function) after the
754     // optimization.
755     double final_cost;
756 
757     // The part of the total cost that comes from residual blocks that
758     // were held fixed by the preprocessor because all the parameter
759     // blocks that they depend on were fixed.
760     double fixed_cost;
761 
762     // IterationSummary for each minimizer iteration in order.
763     vector<IterationSummary> iterations;
764 
765     // Number of minimizer iterations in which the step was
766     // accepted. Unless use_non_monotonic_steps is true this is also
767     // the number of steps in which the objective function value/cost
768     // went down.
769     int num_successful_steps;
770 
771     // Number of minimizer iterations in which the step was rejected
772     // either because it did not reduce the cost enough or the step
773     // was not numerically valid.
774     int num_unsuccessful_steps;
775 
776     // Number of times inner iterations were performed.
777     int num_inner_iteration_steps;
778 
779     // All times reported below are wall times.
780 
781     // When the user calls Solve, before the actual optimization
782     // occurs, Ceres performs a number of preprocessing steps. These
783     // include error checks, memory allocations, and reorderings. This
784     // time is accounted for as preprocessing time.
785     double preprocessor_time_in_seconds;
786 
787     // Time spent in the TrustRegionMinimizer.
788     double minimizer_time_in_seconds;
789 
790     // After the Minimizer is finished, some time is spent in
791     // re-evaluating residuals etc. This time is accounted for in the
792     // postprocessor time.
793     double postprocessor_time_in_seconds;
794 
795     // Some total of all time spent inside Ceres when Solve is called.
796     double total_time_in_seconds;
797 
798     // Time (in seconds) spent in the linear solver computing the
799     // trust region step.
800     double linear_solver_time_in_seconds;
801 
802     // Time (in seconds) spent evaluating the residual vector.
803     double residual_evaluation_time_in_seconds;
804 
805     // Time (in seconds) spent evaluating the jacobian matrix.
806     double jacobian_evaluation_time_in_seconds;
807 
808     // Time (in seconds) spent doing inner iterations.
809     double inner_iteration_time_in_seconds;
810 
811     // Number of parameter blocks in the problem.
812     int num_parameter_blocks;
813 
814     // Number of parameters in the probem.
815     int num_parameters;
816 
817     // Dimension of the tangent space of the problem (or the number of
818     // columns in the Jacobian for the problem). This is different
819     // from num_parameters if a parameter block is associated with a
820     // LocalParameterization
821     int num_effective_parameters;
822 
823     // Number of residual blocks in the problem.
824     int num_residual_blocks;
825 
826     // Number of residuals in the problem.
827     int num_residuals;
828 
829     // Number of parameter blocks in the problem after the inactive
830     // and constant parameter blocks have been removed. A parameter
831     // block is inactive if no residual block refers to it.
832     int num_parameter_blocks_reduced;
833 
834     // Number of parameters in the reduced problem.
835     int num_parameters_reduced;
836 
837     // Dimension of the tangent space of the reduced problem (or the
838     // number of columns in the Jacobian for the reduced
839     // problem). This is different from num_parameters_reduced if a
840     // parameter block in the reduced problem is associated with a
841     // LocalParameterization.
842     int num_effective_parameters_reduced;
843 
844     // Number of residual blocks in the reduced problem.
845     int num_residual_blocks_reduced;
846 
847     //  Number of residuals in the reduced problem.
848     int num_residuals_reduced;
849 
850     //  Number of threads specified by the user for Jacobian and
851     //  residual evaluation.
852     int num_threads_given;
853 
854     // Number of threads actually used by the solver for Jacobian and
855     // residual evaluation. This number is not equal to
856     // num_threads_given if OpenMP is not available.
857     int num_threads_used;
858 
859     //  Number of threads specified by the user for solving the trust
860     // region problem.
861     int num_linear_solver_threads_given;
862 
863     // Number of threads actually used by the solver for solving the
864     // trust region problem. This number is not equal to
865     // num_threads_given if OpenMP is not available.
866     int num_linear_solver_threads_used;
867 
868     // Type of the linear solver requested by the user.
869     LinearSolverType linear_solver_type_given;
870 
871     // Type of the linear solver actually used. This may be different
872     // from linear_solver_type_given if Ceres determines that the
873     // problem structure is not compatible with the linear solver
874     // requested or if the linear solver requested by the user is not
875     // available, e.g. The user requested SPARSE_NORMAL_CHOLESKY but
876     // no sparse linear algebra library was available.
877     LinearSolverType linear_solver_type_used;
878 
879     // Size of the elimination groups given by the user as hints to
880     // the linear solver.
881     vector<int> linear_solver_ordering_given;
882 
883     // Size of the parameter groups used by the solver when ordering
884     // the columns of the Jacobian.  This maybe different from
885     // linear_solver_ordering_given if the user left
886     // linear_solver_ordering_given blank and asked for an automatic
887     // ordering, or if the problem contains some constant or inactive
888     // parameter blocks.
889     vector<int> linear_solver_ordering_used;
890 
891     // True if the user asked for inner iterations to be used as part
892     // of the optimization.
893     bool inner_iterations_given;
894 
895     // True if the user asked for inner iterations to be used as part
896     // of the optimization and the problem structure was such that
897     // they were actually performed. e.g., in a problem with just one
898     // parameter block, inner iterations are not performed.
899     bool inner_iterations_used;
900 
901     // Size of the parameter groups given by the user for performing
902     // inner iterations.
903     vector<int> inner_iteration_ordering_given;
904 
905     // Size of the parameter groups given used by the solver for
906     // performing inner iterations. This maybe different from
907     // inner_iteration_ordering_given if the user left
908     // inner_iteration_ordering_given blank and asked for an automatic
909     // ordering, or if the problem contains some constant or inactive
910     // parameter blocks.
911     vector<int> inner_iteration_ordering_used;
912 
913     //  Type of preconditioner used for solving the trust region
914     //  step. Only meaningful when an iterative linear solver is used.
915     PreconditionerType preconditioner_type;
916 
917     // Type of clustering algorithm used for visibility based
918     // preconditioning. Only meaningful when the preconditioner_type
919     // is CLUSTER_JACOBI or CLUSTER_TRIDIAGONAL.
920     VisibilityClusteringType visibility_clustering_type;
921 
922     //  Type of trust region strategy.
923     TrustRegionStrategyType trust_region_strategy_type;
924 
925     //  Type of dogleg strategy used for solving the trust region
926     //  problem.
927     DoglegType dogleg_type;
928 
929     //  Type of the dense linear algebra library used.
930     DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
931 
932     // Type of the sparse linear algebra library used.
933     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
934 
935     // Type of line search direction used.
936     LineSearchDirectionType line_search_direction_type;
937 
938     // Type of the line search algorithm used.
939     LineSearchType line_search_type;
940 
941     //  When performing line search, the degree of the polynomial used
942     //  to approximate the objective function.
943     LineSearchInterpolationType line_search_interpolation_type;
944 
945     // If the line search direction is NONLINEAR_CONJUGATE_GRADIENT,
946     // then this indicates the particular variant of non-linear
947     // conjugate gradient used.
948     NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
949 
950     // If the type of the line search direction is LBFGS, then this
951     // indicates the rank of the Hessian approximation.
952     int max_lbfgs_rank;
953   };
954 
955   // Once a least squares problem has been built, this function takes
956   // the problem and optimizes it based on the values of the options
957   // parameters. Upon return, a detailed summary of the work performed
958   // by the preprocessor, the non-linear minmizer and the linear
959   // solver are reported in the summary object.
960   virtual void Solve(const Options& options,
961                      Problem* problem,
962                      Solver::Summary* summary);
963 };
964 
965 // Helper function which avoids going through the interface.
966 CERES_EXPORT void Solve(const Solver::Options& options,
967            Problem* problem,
968            Solver::Summary* summary);
969 
970 }  // namespace ceres
971 
972 #include "ceres/internal/reenable_warnings.h"
973 
974 #endif  // CERES_PUBLIC_SOLVER_H_
975