1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal@google.com (Sameer Agarwal) 30 // 31 // The LossFunction interface is the way users describe how residuals 32 // are converted to cost terms for the overall problem cost function. 33 // For the exact manner in which loss functions are converted to the 34 // overall cost for a problem, see problem.h. 35 // 36 // For least squares problem where there are no outliers and standard 37 // squared loss is expected, it is not necessary to create a loss 38 // function; instead passing a NULL to the problem when adding 39 // residuals implies a standard squared loss. 40 // 41 // For least squares problems where the minimization may encounter 42 // input terms that contain outliers, that is, completely bogus 43 // measurements, it is important to use a loss function that reduces 44 // their associated penalty. 45 // 46 // Consider a structure from motion problem. The unknowns are 3D 47 // points and camera parameters, and the measurements are image 48 // coordinates describing the expected reprojected position for a 49 // point in a camera. For example, we want to model the geometry of a 50 // street scene with fire hydrants and cars, observed by a moving 51 // camera with unknown parameters, and the only 3D points we care 52 // about are the pointy tippy-tops of the fire hydrants. Our magic 53 // image processing algorithm, which is responsible for producing the 54 // measurements that are input to Ceres, has found and matched all 55 // such tippy-tops in all image frames, except that in one of the 56 // frame it mistook a car's headlight for a hydrant. If we didn't do 57 // anything special (i.e. if we used a basic quadratic loss), the 58 // residual for the erroneous measurement will result in extreme error 59 // due to the quadratic nature of squared loss. This results in the 60 // entire solution getting pulled away from the optimimum to reduce 61 // the large error that would otherwise be attributed to the wrong 62 // measurement. 63 // 64 // Using a robust loss function, the cost for large residuals is 65 // reduced. In the example above, this leads to outlier terms getting 66 // downweighted so they do not overly influence the final solution. 67 // 68 // What cost function is best? 69 // 70 // In general, there isn't a principled way to select a robust loss 71 // function. The authors suggest starting with a non-robust cost, then 72 // only experimenting with robust loss functions if standard squared 73 // loss doesn't work. 74 75 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_ 76 #define CERES_PUBLIC_LOSS_FUNCTION_H_ 77 78 #include "glog/logging.h" 79 #include "ceres/internal/macros.h" 80 #include "ceres/internal/scoped_ptr.h" 81 #include "ceres/types.h" 82 #include "ceres/internal/disable_warnings.h" 83 84 namespace ceres { 85 86 class CERES_EXPORT LossFunction { 87 public: ~LossFunction()88 virtual ~LossFunction() {} 89 90 // For a residual vector with squared 2-norm 'sq_norm', this method 91 // is required to fill in the value and derivatives of the loss 92 // function (rho in this example): 93 // 94 // out[0] = rho(sq_norm), 95 // out[1] = rho'(sq_norm), 96 // out[2] = rho''(sq_norm), 97 // 98 // Here the convention is that the contribution of a term to the 99 // cost function is given by 1/2 rho(s), where 100 // 101 // s = ||residuals||^2. 102 // 103 // Calling the method with a negative value of 's' is an error and 104 // the implementations are not required to handle that case. 105 // 106 // Most sane choices of rho() satisfy: 107 // 108 // rho(0) = 0, 109 // rho'(0) = 1, 110 // rho'(s) < 1 in outlier region, 111 // rho''(s) < 0 in outlier region, 112 // 113 // so that they mimic the least squares cost for small residuals. 114 virtual void Evaluate(double sq_norm, double out[3]) const = 0; 115 }; 116 117 // Some common implementations follow below. 118 // 119 // Note: in the region of interest (i.e. s < 3) we have: 120 // TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss 121 122 123 // This corresponds to no robustification. 124 // 125 // rho(s) = s 126 // 127 // At s = 0: rho = [0, 1, 0]. 128 // 129 // It is not normally necessary to use this, as passing NULL for the 130 // loss function when building the problem accomplishes the same 131 // thing. 132 class CERES_EXPORT TrivialLoss : public LossFunction { 133 public: 134 virtual void Evaluate(double, double*) const; 135 }; 136 137 // Scaling 138 // ------- 139 // Given one robustifier 140 // s -> rho(s) 141 // one can change the length scale at which robustification takes 142 // place, by adding a scale factor 'a' as follows: 143 // 144 // s -> a^2 rho(s / a^2). 145 // 146 // The first and second derivatives are: 147 // 148 // s -> rho'(s / a^2), 149 // s -> (1 / a^2) rho''(s / a^2), 150 // 151 // but the behaviour near s = 0 is the same as the original function, 152 // i.e. 153 // 154 // rho(s) = s + higher order terms, 155 // a^2 rho(s / a^2) = s + higher order terms. 156 // 157 // The scalar 'a' should be positive. 158 // 159 // The reason for the appearance of squaring is that 'a' is in the 160 // units of the residual vector norm whereas 's' is a squared 161 // norm. For applications it is more convenient to specify 'a' than 162 // its square. The commonly used robustifiers below are described in 163 // un-scaled format (a = 1) but their implementations work for any 164 // non-zero value of 'a'. 165 166 // Huber. 167 // 168 // rho(s) = s for s <= 1, 169 // rho(s) = 2 sqrt(s) - 1 for s >= 1. 170 // 171 // At s = 0: rho = [0, 1, 0]. 172 // 173 // The scaling parameter 'a' corresponds to 'delta' on this page: 174 // http://en.wikipedia.org/wiki/Huber_Loss_Function 175 class CERES_EXPORT HuberLoss : public LossFunction { 176 public: HuberLoss(double a)177 explicit HuberLoss(double a) : a_(a), b_(a * a) { } 178 virtual void Evaluate(double, double*) const; 179 180 private: 181 const double a_; 182 // b = a^2. 183 const double b_; 184 }; 185 186 // Soft L1, similar to Huber but smooth. 187 // 188 // rho(s) = 2 (sqrt(1 + s) - 1). 189 // 190 // At s = 0: rho = [0, 1, -1/2]. 191 class CERES_EXPORT SoftLOneLoss : public LossFunction { 192 public: SoftLOneLoss(double a)193 explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { } 194 virtual void Evaluate(double, double*) const; 195 196 private: 197 // b = a^2. 198 const double b_; 199 // c = 1 / a^2. 200 const double c_; 201 }; 202 203 // Inspired by the Cauchy distribution 204 // 205 // rho(s) = log(1 + s). 206 // 207 // At s = 0: rho = [0, 1, -1]. 208 class CERES_EXPORT CauchyLoss : public LossFunction { 209 public: CauchyLoss(double a)210 explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { } 211 virtual void Evaluate(double, double*) const; 212 213 private: 214 // b = a^2. 215 const double b_; 216 // c = 1 / a^2. 217 const double c_; 218 }; 219 220 // Loss that is capped beyond a certain level using the arc-tangent function. 221 // The scaling parameter 'a' determines the level where falloff occurs. 222 // For costs much smaller than 'a', the loss function is linear and behaves like 223 // TrivialLoss, and for values much larger than 'a' the value asymptotically 224 // approaches the constant value of a * PI / 2. 225 // 226 // rho(s) = a atan(s / a). 227 // 228 // At s = 0: rho = [0, 1, 0]. 229 class CERES_EXPORT ArctanLoss : public LossFunction { 230 public: ArctanLoss(double a)231 explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { } 232 virtual void Evaluate(double, double*) const; 233 234 private: 235 const double a_; 236 // b = 1 / a^2. 237 const double b_; 238 }; 239 240 // Loss function that maps to approximately zero cost in a range around the 241 // origin, and reverts to linear in error (quadratic in cost) beyond this range. 242 // The tolerance parameter 'a' sets the nominal point at which the 243 // transition occurs, and the transition size parameter 'b' sets the nominal 244 // distance over which most of the transition occurs. Both a and b must be 245 // greater than zero, and typically b will be set to a fraction of a. 246 // The slope rho'[s] varies smoothly from about 0 at s <= a - b to 247 // about 1 at s >= a + b. 248 // 249 // The term is computed as: 250 // 251 // rho(s) = b log(1 + exp((s - a) / b)) - c0. 252 // 253 // where c0 is chosen so that rho(0) == 0 254 // 255 // c0 = b log(1 + exp(-a / b) 256 // 257 // This has the following useful properties: 258 // 259 // rho(s) == 0 for s = 0 260 // rho'(s) ~= 0 for s << a - b 261 // rho'(s) ~= 1 for s >> a + b 262 // rho''(s) > 0 for all s 263 // 264 // In addition, all derivatives are continuous, and the curvature is 265 // concentrated in the range a - b to a + b. 266 // 267 // At s = 0: rho = [0, ~0, ~0]. 268 class CERES_EXPORT TolerantLoss : public LossFunction { 269 public: 270 explicit TolerantLoss(double a, double b); 271 virtual void Evaluate(double, double*) const; 272 273 private: 274 const double a_, b_, c_; 275 }; 276 277 // Composition of two loss functions. The error is the result of first 278 // evaluating g followed by f to yield the composition f(g(s)). 279 // The loss functions must not be NULL. 280 class ComposedLoss : public LossFunction { 281 public: 282 explicit ComposedLoss(const LossFunction* f, Ownership ownership_f, 283 const LossFunction* g, Ownership ownership_g); 284 virtual ~ComposedLoss(); 285 virtual void Evaluate(double, double*) const; 286 287 private: 288 internal::scoped_ptr<const LossFunction> f_, g_; 289 const Ownership ownership_f_, ownership_g_; 290 }; 291 292 // The discussion above has to do with length scaling: it affects the space 293 // in which s is measured. Sometimes you want to simply scale the output 294 // value of the robustifier. For example, you might want to weight 295 // different error terms differently (e.g., weight pixel reprojection 296 // errors differently from terrain errors). 297 // 298 // If rho is the wrapped robustifier, then this simply outputs 299 // s -> a * rho(s) 300 // 301 // The first and second derivatives are, not surprisingly 302 // s -> a * rho'(s) 303 // s -> a * rho''(s) 304 // 305 // Since we treat the a NULL Loss function as the Identity loss 306 // function, rho = NULL is a valid input and will result in the input 307 // being scaled by a. This provides a simple way of implementing a 308 // scaled ResidualBlock. 309 class CERES_EXPORT ScaledLoss : public LossFunction { 310 public: 311 // Constructs a ScaledLoss wrapping another loss function. Takes 312 // ownership of the wrapped loss function or not depending on the 313 // ownership parameter. ScaledLoss(const LossFunction * rho,double a,Ownership ownership)314 ScaledLoss(const LossFunction* rho, double a, Ownership ownership) : 315 rho_(rho), a_(a), ownership_(ownership) { } 316 ~ScaledLoss()317 virtual ~ScaledLoss() { 318 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 319 rho_.release(); 320 } 321 } 322 virtual void Evaluate(double, double*) const; 323 324 private: 325 internal::scoped_ptr<const LossFunction> rho_; 326 const double a_; 327 const Ownership ownership_; 328 CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss); 329 }; 330 331 // Sometimes after the optimization problem has been constructed, we 332 // wish to mutate the scale of the loss function. For example, when 333 // performing estimation from data which has substantial outliers, 334 // convergence can be improved by starting out with a large scale, 335 // optimizing the problem and then reducing the scale. This can have 336 // better convergence behaviour than just using a loss function with a 337 // small scale. 338 // 339 // This templated class allows the user to implement a loss function 340 // whose scale can be mutated after an optimization problem has been 341 // constructed. 342 // 343 // Example usage 344 // 345 // Problem problem; 346 // 347 // // Add parameter blocks 348 // 349 // CostFunction* cost_function = 350 // new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>( 351 // new UW_Camera_Mapper(feature_x, feature_y)); 352 // 353 // LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP); 354 // 355 // problem.AddResidualBlock(cost_function, loss_function, parameters); 356 // 357 // Solver::Options options; 358 // Solger::Summary summary; 359 // 360 // Solve(options, &problem, &summary) 361 // 362 // loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP); 363 // 364 // Solve(options, &problem, &summary) 365 // 366 class CERES_EXPORT LossFunctionWrapper : public LossFunction { 367 public: LossFunctionWrapper(LossFunction * rho,Ownership ownership)368 LossFunctionWrapper(LossFunction* rho, Ownership ownership) 369 : rho_(rho), ownership_(ownership) { 370 } 371 ~LossFunctionWrapper()372 virtual ~LossFunctionWrapper() { 373 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 374 rho_.release(); 375 } 376 } 377 Evaluate(double sq_norm,double out[3])378 virtual void Evaluate(double sq_norm, double out[3]) const { 379 CHECK_NOTNULL(rho_.get()); 380 rho_->Evaluate(sq_norm, out); 381 } 382 Reset(LossFunction * rho,Ownership ownership)383 void Reset(LossFunction* rho, Ownership ownership) { 384 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { 385 rho_.release(); 386 } 387 rho_.reset(rho); 388 ownership_ = ownership; 389 } 390 391 private: 392 internal::scoped_ptr<const LossFunction> rho_; 393 Ownership ownership_; 394 CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper); 395 }; 396 397 } // namespace ceres 398 399 #include "ceres/internal/disable_warnings.h" 400 401 #endif // CERES_PUBLIC_LOSS_FUNCTION_H_ 402