1 /*
2  * Library:   lmfit (Levenberg-Marquardt least squares fitting)
3  *
4  * File:      lmmin.c
5  *
6  * Contents:  Levenberg-Marquardt minimization.
7  *
8  * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9  *            Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
10  *
11  * License:   see ../COPYING (FreeBSD)
12  *
13  * Homepage:  apps.jcns.fz-juelich.de/lmfit
14  */
15 
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <math.h>
20 #include <float.h>
21 #include "lmmin.h"
22 
23 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
24 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
25 #define SQR(x) (x) * (x)
26 
27 /* Declare functions that do the heavy numerics.
28    Implementions are in this source file, below lmmin.
29    Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
30 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
31               const double* diag, const double* qtb, const double delta,
32               double* par, double* x, double* Sdiag, double* aux, double* xdi);
33 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
34               double* Acnorm, double* W);
35 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
36                const double* diag, const double* qtb, double* x,
37                double* Sdiag, double* W);
38 
39 /******************************************************************************/
40 /*  Numeric constants                                                         */
41 /******************************************************************************/
42 
43 /* Set machine-dependent constants to values from float.h. */
44 #define LM_MACHEP DBL_EPSILON       /* resolution of arithmetic */
45 #define LM_DWARF DBL_MIN            /* smallest nonzero number */
46 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
47 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
48 #define LM_USERTOL 30 * LM_MACHEP   /* users are recommended to require this */
49 
50 /* If the above values do not work, the following seem good for an x86:
51  LM_MACHEP     .555e-16
52  LM_DWARF      9.9e-324
53  LM_SQRT_DWARF 1.e-160
54  LM_SQRT_GIANT 1.e150
55  LM_USER_TOL   1.e-14
56    The following values should work on any machine:
57  LM_MACHEP     1.2e-16
58  LM_DWARF      1.0e-38
59  LM_SQRT_DWARF 3.834e-20
60  LM_SQRT_GIANT 1.304e19
61  LM_USER_TOL   1.e-14
62 */
63 
64 /* Predefined control parameter sets (msgfile=NULL means stdout). */
65 const lm_control_struct lm_control_double = {
66     LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
67     100., 100, 1, NULL, 0, -1, -1};
68 const lm_control_struct lm_control_float = {
69     1.e-7, 1.e-7, 1.e-7, 1.e-7,
70     100., 100, 1, NULL, 0, -1, -1};
71 
72 /******************************************************************************/
73 /*  Message texts (indexed by status.info)                                    */
74 /******************************************************************************/
75 
76 const char* lm_infmsg[] = {
77     "found zero (sum of squares below underflow limit)",
78     "converged  (the relative error in the sum of squares is at most tol)",
79     "converged  (the relative error of the parameter vector is at most tol)",
80     "converged  (both errors are at most tol)",
81     "trapped    (by degeneracy; increasing epsilon might help)",
82     "exhausted  (number of function calls exceeding preset patience)",
83     "failed     (ftol<tol: cannot reduce sum of squares any further)",
84     "failed     (xtol<tol: cannot improve approximate solution any further)",
85     "failed     (gtol<tol: cannot improve approximate solution any further)",
86     "crashed    (not enough memory)",
87     "exploded   (fatal coding error: improper input parameters)",
88     "stopped    (break requested within function evaluation)",
89     "found nan  (function value is not-a-number or infinite)"};
90 
91 const char* lm_shortmsg[] = {
92     "found zero",
93     "converged (f)",
94     "converged (p)",
95     "converged (2)",
96     "degenerate",
97     "call limit",
98     "failed (f)",
99     "failed (p)",
100     "failed (o)",
101     "no memory",
102     "invalid input",
103     "user break",
104     "found nan"};
105 
106 /******************************************************************************/
107 /*  Monitoring auxiliaries.                                                   */
108 /******************************************************************************/
109 
lm_print_pars(int nout,const double * par,FILE * fout)110 void lm_print_pars(int nout, const double* par, FILE* fout)
111 {
112     int i;
113     for (i = 0; i < nout; ++i)
114         fprintf(fout, " %16.9g", par[i]);
115     fprintf(fout, "\n");
116 }
117 
118 /******************************************************************************/
119 /*  lmmin (main minimization routine)                                         */
120 /******************************************************************************/
121 
lmmin(const int n,double * x,const int m,const void * data,void (* evaluate)(const double * par,const int m_dat,const void * data,double * fvec,int * userbreak),const lm_control_struct * C,lm_status_struct * S)122 void lmmin(const int n, double* x, const int m, const void* data,
123            void (*evaluate)(const double* par, const int m_dat,
124                             const void* data, double* fvec, int* userbreak),
125            const lm_control_struct* C, lm_status_struct* S)
126 {
127     int j, i;
128     double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
129         sum, temp, temp1, temp2, temp3;
130 
131     /***  Initialize internal variables.  ***/
132 
133     int maxfev = C->patience * (n+1);
134 
135     int inner_success; /* flag for loop control */
136     double lmpar = 0;  /* Levenberg-Marquardt parameter */
137     double delta = 0;
138     double xnorm = 0;
139     double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
140 
141     int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
142 
143     /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
144        compile-time initialization of lm_control_double and similar). */
145     FILE* msgfile = C->msgfile ? C->msgfile : stdout;
146 
147     /***  Default status info; must be set before first return statement.  ***/
148 
149     S->outcome = 0; /* status code */
150     S->userbreak = 0;
151     S->nfev = 0; /* function evaluation counter */
152 
153     /***  Check input parameters for errors.  ***/
154 
155     if (n <= 0) {
156         fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
157         S->outcome = 10;
158         return;
159     }
160     if (m < n) {
161         fprintf(stderr, "lmmin: number of data points (%i) "
162                         "smaller than number of parameters (%i)\n",
163                 m, n);
164         S->outcome = 10;
165         return;
166     }
167     if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
168         fprintf(stderr,
169                 "lmmin: negative tolerance (at least one of %g %g %g)\n",
170                 C->ftol, C->xtol, C->gtol);
171         S->outcome = 10;
172         return;
173     }
174     if (maxfev <= 0) {
175         fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
176                 maxfev);
177         S->outcome = 10;
178         return;
179     }
180     if (C->stepbound <= 0) {
181         fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
182         S->outcome = 10;
183         return;
184     }
185     if (C->scale_diag != 0 && C->scale_diag != 1) {
186         fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
187                         "should be 0 or 1\n",
188                 C->scale_diag);
189         S->outcome = 10;
190         return;
191     }
192 
193     /***  Allocate work space.  ***/
194 
195     /* Allocate total workspace with just one system call */
196     char* ws;
197     if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
198                      n * sizeof(int))) == NULL) {
199         S->outcome = 9;
200         return;
201     }
202 
203     /* Assign workspace segments. */
204     char* pws = ws;
205     double* fvec = (double*)pws;
206     pws += m * sizeof(double) / sizeof(char);
207     double* diag = (double*)pws;
208     pws += n * sizeof(double) / sizeof(char);
209     double* qtf = (double*)pws;
210     pws += n * sizeof(double) / sizeof(char);
211     double* fjac = (double*)pws;
212     pws += n * m * sizeof(double) / sizeof(char);
213     double* wa1 = (double*)pws;
214     pws += n * sizeof(double) / sizeof(char);
215     double* wa2 = (double*)pws;
216     pws += n * sizeof(double) / sizeof(char);
217     double* wa3 = (double*)pws;
218     pws += n * sizeof(double) / sizeof(char);
219     double* wf = (double*)pws;
220     pws += m * sizeof(double) / sizeof(char);
221     int* Pivot = (int*)pws;
222     pws += n * sizeof(int) / sizeof(char);
223 
224     /* Initialize diag. */
225     if (!C->scale_diag)
226         for (j = 0; j < n; j++)
227             diag[j] = 1;
228 
229     /***  Evaluate function at starting point and calculate norm.  ***/
230 
231     if (C->verbosity) {
232         fprintf(msgfile, "lmmin start ");
233         lm_print_pars(nout, x, msgfile);
234     }
235     (*evaluate)(x, m, data, fvec, &(S->userbreak));
236     if (C->verbosity > 4)
237         for (i = 0; i < m; ++i)
238             fprintf(msgfile, "    fvec[%4i] = %18.8g\n", i, fvec[i]);
239     S->nfev = 1;
240     if (S->userbreak)
241         goto terminate;
242     fnorm = lm_enorm(m, fvec);
243     if (C->verbosity)
244         fprintf(msgfile, "  fnorm = %18.8g\n", fnorm);
245 
246     if (!isfinite(fnorm)) {
247         S->outcome = 12; /* nan */
248         goto terminate;
249     } else if (fnorm <= LM_DWARF) {
250         S->outcome = 0; /* sum of squares almost zero, nothing to do */
251         goto terminate;
252     }
253 
254     /***  The outer loop: compute gradient, then descend.  ***/
255 
256     for (int outer = 0;; ++outer) {
257 
258         /** Calculate the Jacobian. **/
259         for (j = 0; j < n; j++) {
260             temp = x[j];
261             step = MAX(eps * eps, eps * fabs(temp));
262             x[j] += step; /* replace temporarily */
263             (*evaluate)(x, m, data, wf, &(S->userbreak));
264             ++(S->nfev);
265             if (S->userbreak)
266                 goto terminate;
267             for (i = 0; i < m; i++)
268                 fjac[j*m+i] = (wf[i] - fvec[i]) / step;
269             x[j] = temp; /* restore */
270         }
271         if (C->verbosity >= 10) {
272             /* print the entire matrix */
273             printf("\nlmmin Jacobian\n");
274             for (i = 0; i < m; i++) {
275                 printf("  ");
276                 for (j = 0; j < n; j++)
277                     printf("%.5e ", fjac[j*m+i]);
278                 printf("\n");
279             }
280         }
281 
282         /** Compute the QR factorization of the Jacobian. **/
283 
284         /* fjac is an m by n array. The upper n by n submatrix of fjac is made
285          *   to contain an upper triangular matrix R with diagonal elements of
286          *   nonincreasing magnitude such that
287          *
288          *         P^T*(J^T*J)*P = R^T*R
289          *
290          *         (NOTE: ^T stands for matrix transposition),
291          *
292          *   where P is a permutation matrix and J is the final calculated
293          *   Jacobian. Column j of P is column Pivot(j) of the identity matrix.
294          *   The lower trapezoidal part of fjac contains information generated
295          *   during the computation of R.
296          *
297          * Pivot is an integer array of length n. It defines a permutation
298          *   matrix P such that jac*P = Q*R, where jac is the final calculated
299          *   Jacobian, Q is orthogonal (not stored), and R is upper triangular
300          *   with diagonal elements of nonincreasing magnitude. Column j of P
301          *   is column Pivot(j) of the identity matrix.
302          */
303 
304         lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
305         /* return values are Pivot, wa1=rdiag, wa2=acnorm */
306 
307         /** Form Q^T * fvec, and store first n components in qtf. **/
308         for (i = 0; i < m; i++)
309             wf[i] = fvec[i];
310 
311         for (j = 0; j < n; j++) {
312             temp3 = fjac[j*m+j];
313             if (temp3 != 0) {
314                 sum = 0;
315                 for (i = j; i < m; i++)
316                     sum += fjac[j*m+i] * wf[i];
317                 temp = -sum / temp3;
318                 for (i = j; i < m; i++)
319                     wf[i] += fjac[j*m+i] * temp;
320             }
321             fjac[j*m+j] = wa1[j];
322             qtf[j] = wf[j];
323         }
324 
325         /**  Compute norm of scaled gradient and detect degeneracy. **/
326         gnorm = 0;
327         for (j = 0; j < n; j++) {
328             if (wa2[Pivot[j]] == 0)
329                 continue;
330             sum = 0;
331             for (i = 0; i <= j; i++)
332                 sum += fjac[j*m+i] * qtf[i];
333             gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
334         }
335 
336         if (gnorm <= C->gtol) {
337             S->outcome = 4;
338             goto terminate;
339         }
340 
341         /** Initialize or update diag and delta. **/
342         if (!outer) { /* first iteration only */
343             if (C->scale_diag) {
344                 /* diag := norms of the columns of the initial Jacobian */
345                 for (j = 0; j < n; j++)
346                     diag[j] = wa2[j] ? wa2[j] : 1;
347                 /* xnorm := || D x || */
348                 for (j = 0; j < n; j++)
349                     wa3[j] = diag[j] * x[j];
350                 xnorm = lm_enorm(n, wa3);
351                 if (C->verbosity >= 2) {
352                     fprintf(msgfile, "lmmin diag  ");
353                     lm_print_pars(nout, x, msgfile); // xnorm
354                     fprintf(msgfile, "  xnorm = %18.8g\n", xnorm);
355                 }
356                 /* Only now print the header for the loop table. */
357                 if (C->verbosity >= 3) {
358                     fprintf(msgfile, "  o  i     lmpar    prered"
359                                      "          ratio    dirder      delta"
360                                      "      pnorm                 fnorm");
361                     for (i = 0; i < nout; ++i)
362                         fprintf(msgfile, "               p%i", i);
363                     fprintf(msgfile, "\n");
364                 }
365             } else {
366                 xnorm = lm_enorm(n, x);
367             }
368             if (!isfinite(xnorm)) {
369                 S->outcome = 12; /* nan */
370                 goto terminate;
371             }
372             /* Initialize the step bound delta. */
373             if (xnorm)
374                 delta = C->stepbound * xnorm;
375             else
376                 delta = C->stepbound;
377         } else {
378             if (C->scale_diag) {
379                 for (j = 0; j < n; j++)
380                     diag[j] = MAX(diag[j], wa2[j]);
381             }
382         }
383 
384         /** The inner loop. **/
385         int inner = 0;
386         do {
387 
388             /** Determine the Levenberg-Marquardt parameter. **/
389             lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
390                      wa1, wa2, wf, wa3);
391             /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
392 
393             /* Predict scaled reduction. */
394             pnorm = lm_enorm(n, wa3);
395             if (!isfinite(pnorm)) {
396                 S->outcome = 12; /* nan */
397                 goto terminate;
398             }
399             temp2 = lmpar * SQR(pnorm / fnorm);
400             for (j = 0; j < n; j++) {
401                 wa3[j] = 0;
402                 for (i = 0; i <= j; i++)
403                     wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
404             }
405             temp1 = SQR(lm_enorm(n, wa3) / fnorm);
406             if (!isfinite(temp1)) {
407                 S->outcome = 12; /* nan */
408                 goto terminate;
409             }
410             prered = temp1 + 2*temp2;
411             dirder = -temp1 + temp2; /* scaled directional derivative */
412 
413             /* At first call, adjust the initial step bound. */
414             if (!outer && pnorm < delta)
415                 delta = pnorm;
416 
417             /** Evaluate the function at x + p. **/
418             for (j = 0; j < n; j++)
419                 wa2[j] = x[j] - wa1[j];
420             (*evaluate)(wa2, m, data, wf, &(S->userbreak));
421             ++(S->nfev);
422             if (S->userbreak)
423                 goto terminate;
424             fnorm1 = lm_enorm(m, wf);
425             if (!isfinite(fnorm1)) {
426                 S->outcome = 12; /* nan */
427                 goto terminate;
428             }
429 
430             /** Evaluate the scaled reduction. **/
431 
432             /* Actual scaled reduction. */
433             actred = 1 - SQR(fnorm1 / fnorm);
434 
435             /* Ratio of actual to predicted reduction. */
436             ratio = prered ? actred / prered : 0;
437 
438             if (C->verbosity == 2) {
439                 fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
440                 lm_print_pars(nout, wa2, msgfile); // fnorm1,
441             } else if (C->verbosity >= 3) {
442                 printf("%3i %2i %9.2g %9.2g %14.6g"
443                        " %9.2g %10.3e %10.3e %21.15e",
444                        outer, inner, lmpar, prered, ratio,
445                        dirder, delta, pnorm, fnorm1);
446                 for (i = 0; i < nout; ++i)
447                     fprintf(msgfile, " %16.9g", wa2[i]);
448                 fprintf(msgfile, "\n");
449             }
450 
451             /* Update the step bound. */
452             if (ratio <= 0.25) {
453                 if (actred >= 0)
454                     temp = 0.5;
455                 else if (actred > -99) /* -99 = 1-1/0.1^2 */
456                     temp = MAX(dirder / (2*dirder + actred), 0.1);
457                 else
458                     temp = 0.1;
459                 delta = temp * MIN(delta, pnorm / 0.1);
460                 lmpar /= temp;
461             } else if (ratio >= 0.75) {
462                 delta = 2 * pnorm;
463                 lmpar *= 0.5;
464             } else if (!lmpar) {
465                 delta = 2 * pnorm;
466             }
467 
468             /**  On success, update solution, and test for convergence. **/
469 
470             inner_success = ratio >= 1e-4;
471             if (inner_success) {
472 
473                 /* Update x, fvec, and their norms. */
474                 if (C->scale_diag) {
475                     for (j = 0; j < n; j++) {
476                         x[j] = wa2[j];
477                         wa2[j] = diag[j] * x[j];
478                     }
479                 } else {
480                     for (j = 0; j < n; j++)
481                         x[j] = wa2[j];
482                 }
483                 for (i = 0; i < m; i++)
484                     fvec[i] = wf[i];
485                 xnorm = lm_enorm(n, wa2);
486                 if (!isfinite(xnorm)) {
487                     S->outcome = 12; /* nan */
488                     goto terminate;
489                 }
490                 fnorm = fnorm1;
491             }
492 
493             /* Convergence tests. */
494             S->outcome = 0;
495             if (fnorm <= LM_DWARF)
496                 goto terminate; /* success: sum of squares almost zero */
497             /* Test two criteria (both may be fulfilled). */
498             if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
499                 S->outcome = 1; /* success: x almost stable */
500             if (delta <= C->xtol * xnorm)
501                 S->outcome += 2; /* success: sum of squares almost stable */
502             if (S->outcome != 0) {
503                 goto terminate;
504             }
505 
506             /** Tests for termination and stringent tolerances. **/
507             if (S->nfev >= maxfev) {
508                 S->outcome = 5;
509                 goto terminate;
510             }
511             if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
512                 ratio <= 2) {
513                 S->outcome = 6;
514                 goto terminate;
515             }
516             if (delta <= LM_MACHEP * xnorm) {
517                 S->outcome = 7;
518                 goto terminate;
519             }
520             if (gnorm <= LM_MACHEP) {
521                 S->outcome = 8;
522                 goto terminate;
523             }
524 
525             /** End of the inner loop. Repeat if iteration unsuccessful. **/
526             ++inner;
527         } while (!inner_success);
528 
529     }; /***  End of the outer loop.  ***/
530 
531 terminate:
532     S->fnorm = lm_enorm(m, fvec);
533     if (C->verbosity >= 2)
534         printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
535                xnorm, C->ftol, C->xtol);
536     if (C->verbosity & 1) {
537         fprintf(msgfile, "lmmin final ");
538         lm_print_pars(nout, x, msgfile); // S->fnorm,
539         fprintf(msgfile, "  fnorm = %18.8g\n", S->fnorm);
540     }
541     if (S->userbreak) /* user-requested break */
542         S->outcome = 11;
543 
544     /***  Deallocate the workspace.  ***/
545     free(ws);
546 
547 } /*** lmmin. ***/
548 
549 /******************************************************************************/
550 /*  lm_lmpar (determine Levenberg-Marquardt parameter)                        */
551 /******************************************************************************/
552 
lm_lmpar(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,const double delta,double * par,double * x,double * Sdiag,double * aux,double * xdi)553 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
554               const double* diag, const double* qtb, const double delta,
555               double* par, double* x, double* Sdiag, double* aux, double* xdi)
556 /*     Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
557  *     an m-vector b, and a positive number delta, the problem is to
558  *     determine a parameter value par such that if x solves the system
559  *
560  *          A*x = b  and  sqrt(par)*D*x = 0
561  *
562  *     in the least squares sense, and dxnorm is the Euclidean norm of D*x,
563  *     then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
564  *     abs(dxnorm-delta) < 0.1*delta.
565  *
566  *     Using lm_qrsolv, this subroutine completes the solution of the
567  *     problem if it is provided with the necessary information from the
568  *     QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
569  *     where P is a permutation matrix, Q has orthogonal columns, and R is
570  *     an upper triangular matrix with diagonal elements of nonincreasing
571  *     magnitude, then lmpar expects the full upper triangle of R, the
572  *     permutation matrix P, and the first n components of Q^T*b. On output
573  *     lmpar also provides an upper triangular matrix S such that
574  *
575  *          P^T*(A^T*A + par*D*D)*P = S^T*S.
576  *
577  *     S is employed within lmpar and may be of separate interest.
578  *
579  *     Only a few iterations are generally needed for convergence of the
580  *     algorithm. If, however, the limit of 10 iterations is reached, then
581  *     the output par will contain the best value obtained so far.
582  *
583  *     Parameters:
584  *
585  *      n is a positive integer INPUT variable set to the order of r.
586  *
587  *      r is an n by n array. On INPUT the full upper triangle must contain
588  *        the full upper triangle of the matrix R. On OUTPUT the full upper
589  *        triangle is unaltered, and the strict lower triangle contains the
590  *        strict upper triangle (transposed) of the upper triangular matrix S.
591  *
592  *      ldr is a positive integer INPUT variable not less than n which
593  *        specifies the leading dimension of the array R.
594  *
595  *      Pivot is an integer INPUT array of length n which defines the
596  *        permutation matrix P such that A*P = Q*R. Column j of P is column
597  *        Pivot(j) of the identity matrix.
598  *
599  *      diag is an INPUT array of length n which must contain the diagonal
600  *        elements of the matrix D.
601  *
602  *      qtb is an INPUT array of length n which must contain the first
603  *        n elements of the vector Q^T*b.
604  *
605  *      delta is a positive INPUT variable which specifies an upper bound
606  *        on the Euclidean norm of D*x.
607  *
608  *      par is a nonnegative variable. On INPUT par contains an initial
609  *        estimate of the Levenberg-Marquardt parameter. On OUTPUT par
610  *        contains the final estimate.
611  *
612  *      x is an OUTPUT array of length n which contains the least-squares
613  *        solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
614  *
615  *      Sdiag is an array of length n needed as workspace; on OUTPUT it
616  *        contains the diagonal elements of the upper triangular matrix S.
617  *
618  *      aux is a multi-purpose work array of length n.
619  *
620  *      xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
621  *
622  */
623 {
624     int i, iter, j, nsing;
625     double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
626     double sum, temp;
627     static double p1 = 0.1;
628 
629     /*** Compute and store in x the Gauss-Newton direction. If the Jacobian
630          is rank-deficient, obtain a least-squares solution. ***/
631 
632     nsing = n;
633     for (j = 0; j < n; j++) {
634         aux[j] = qtb[j];
635         if (r[j*ldr+j] == 0 && nsing == n)
636             nsing = j;
637         if (nsing < n)
638             aux[j] = 0;
639     }
640     for (j = nsing-1; j >= 0; j--) {
641         aux[j] = aux[j] / r[j+ldr*j];
642         temp = aux[j];
643         for (i = 0; i < j; i++)
644             aux[i] -= r[j*ldr+i] * temp;
645     }
646 
647     for (j = 0; j < n; j++)
648         x[Pivot[j]] = aux[j];
649 
650     /*** Initialize the iteration counter, evaluate the function at the origin,
651          and test for acceptance of the Gauss-Newton direction. ***/
652 
653     for (j = 0; j < n; j++)
654         xdi[j] = diag[j] * x[j];
655     dxnorm = lm_enorm(n, xdi);
656     fp = dxnorm - delta;
657     if (fp <= p1 * delta) {
658 #ifdef LMFIT_DEBUG_MESSAGES
659         printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
660 #endif
661         *par = 0;
662         return;
663     }
664 
665     /*** If the Jacobian is not rank deficient, the Newton step provides a
666          lower bound, parl, for the zero of the function. Otherwise set this
667          bound to zero. ***/
668 
669     parl = 0;
670     if (nsing >= n) {
671         for (j = 0; j < n; j++)
672             aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
673 
674         for (j = 0; j < n; j++) {
675             sum = 0;
676             for (i = 0; i < j; i++)
677                 sum += r[j*ldr+i] * aux[i];
678             aux[j] = (aux[j] - sum) / r[j+ldr*j];
679         }
680         temp = lm_enorm(n, aux);
681         parl = fp / delta / temp / temp;
682     }
683 
684     /*** Calculate an upper bound, paru, for the zero of the function. ***/
685 
686     for (j = 0; j < n; j++) {
687         sum = 0;
688         for (i = 0; i <= j; i++)
689             sum += r[j*ldr+i] * qtb[i];
690         aux[j] = sum / diag[Pivot[j]];
691     }
692     gnorm = lm_enorm(n, aux);
693     paru = gnorm / delta;
694     if (paru == 0)
695         paru = LM_DWARF / MIN(delta, p1);
696 
697     /*** If the input par lies outside of the interval (parl,paru),
698          set par to the closer endpoint. ***/
699 
700     *par = MAX(*par, parl);
701     *par = MIN(*par, paru);
702     if (*par == 0)
703         *par = gnorm / dxnorm;
704 
705     /*** Iterate. ***/
706 
707     for (iter = 0;; iter++) {
708 
709         /** Evaluate the function at the current value of par. **/
710         if (*par == 0)
711             *par = MAX(LM_DWARF, 0.001 * paru);
712         temp = sqrt(*par);
713         for (j = 0; j < n; j++)
714             aux[j] = temp * diag[j];
715 
716         lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
717         /* return values are r, x, Sdiag */
718 
719         for (j = 0; j < n; j++)
720             xdi[j] = diag[j] * x[j]; /* used as output */
721         dxnorm = lm_enorm(n, xdi);
722         fp_old = fp;
723         fp = dxnorm - delta;
724 
725         /** If the function is small enough, accept the current value
726             of par. Also test for the exceptional cases where parl
727             is zero or the number of iterations has reached 10. **/
728         if (fabs(fp) <= p1 * delta ||
729             (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
730 #ifdef LMFIT_DEBUG_MESSAGES
731             printf("debug lmpar nsing=%d, iter=%d, "
732                    "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
733                    nsing, iter, *par, parl, paru, delta, fp);
734 #endif
735             break; /* the only exit from the iteration. */
736         }
737 
738         /** Compute the Newton correction. **/
739         for (j = 0; j < n; j++)
740             aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
741 
742         for (j = 0; j < n; j++) {
743             aux[j] = aux[j] / Sdiag[j];
744             for (i = j+1; i < n; i++)
745                 aux[i] -= r[j*ldr+i] * aux[j];
746         }
747         temp = lm_enorm(n, aux);
748         parc = fp / delta / temp / temp;
749 
750         /** Depending on the sign of the function, update parl or paru. **/
751         if (fp > 0)
752             parl = MAX(parl, *par);
753         else /* fp < 0 [the case fp==0 is precluded by the break condition] */
754             paru = MIN(paru, *par);
755 
756         /** Compute an improved estimate for par. **/
757         *par = MAX(parl, *par + parc);
758     }
759 
760 } /*** lm_lmpar. ***/
761 
762 /******************************************************************************/
763 /*  lm_qrfac (QR factorization, from lapack)                                  */
764 /******************************************************************************/
765 
lm_qrfac(const int m,const int n,double * A,int * Pivot,double * Rdiag,double * Acnorm,double * W)766 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
767               double* Acnorm, double* W)
768 /*
769  *     This subroutine uses Householder transformations with column pivoting
770  *     to compute a QR factorization of the m by n matrix A. That is, qrfac
771  *     determines an orthogonal matrix Q, a permutation matrix P, and an
772  *     upper trapezoidal matrix R with diagonal elements of nonincreasing
773  *     magnitude, such that A*P = Q*R. The Householder transformation for
774  *     column k, k = 1,2,...,n, is of the form
775  *
776  *          I - 2*w*wT/|w|^2
777  *
778  *     where w has zeroes in the first k-1 positions.
779  *
780  *     Parameters:
781  *
782  *      m is an INPUT parameter set to the number of rows of A.
783  *
784  *      n is an INPUT parameter set to the number of columns of A.
785  *
786  *      A is an m by n array. On INPUT, A contains the matrix for which the
787  *        QR factorization is to be computed. On OUTPUT the strict upper
788  *        trapezoidal part of A contains the strict upper trapezoidal part
789  *        of R, and the lower trapezoidal part of A contains a factored form
790  *        of Q (the non-trivial elements of the vectors w described above).
791  *
792  *      Pivot is an integer OUTPUT array of length n that describes the
793  *        permutation matrix P. Column j of P is column Pivot(j) of the
794  *        identity matrix.
795  *
796  *      Rdiag is an OUTPUT array of length n which contains the diagonal
797  *        elements of R.
798  *
799  *      Acnorm is an OUTPUT array of length n which contains the norms of
800  *        the corresponding columns of the input matrix A. If this information
801  *        is not needed, then Acnorm can share storage with Rdiag.
802  *
803  *      W is a work array of length n.
804  *
805  */
806 {
807     int i, j, k, kmax;
808     double ajnorm, sum, temp;
809 
810 #ifdef LMFIT_DEBUG_MESSAGES
811     printf("debug qrfac\n");
812 #endif
813 
814     /** Compute initial column norms;
815         initialize Pivot with identity permutation. ***/
816     for (j = 0; j < n; j++) {
817         W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
818         Pivot[j] = j;
819     }
820 
821     /** Loop over columns of A. **/
822     assert(n <= m);
823     for (j = 0; j < n; j++) {
824 
825         /** Bring the column of largest norm into the pivot position. **/
826         kmax = j;
827         for (k = j+1; k < n; k++)
828             if (Rdiag[k] > Rdiag[kmax])
829                 kmax = k;
830 
831         if (kmax != j) {
832             /* Swap columns j and kmax. */
833             k = Pivot[j];
834             Pivot[j] = Pivot[kmax];
835             Pivot[kmax] = k;
836             for (i = 0; i < m; i++) {
837                 temp = A[j*m+i];
838                 A[j*m+i] = A[kmax*m+i];
839                 A[kmax*m+i] = temp;
840             }
841             /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
842             Rdiag[kmax] = Rdiag[j];
843             W[kmax] = W[j];
844         }
845 
846         /** Compute the Householder reflection vector w_j to reduce the
847             j-th column of A to a multiple of the j-th unit vector. **/
848         ajnorm = lm_enorm(m-j, &A[j*m+j]);
849         if (ajnorm == 0) {
850             Rdiag[j] = 0;
851             continue;
852         }
853 
854         /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
855            where the sign +- is chosen to avoid cancellation in w_jj. */
856         if (A[j*m+j] < 0)
857             ajnorm = -ajnorm;
858         for (i = j; i < m; i++)
859             A[j*m+i] /= ajnorm;
860         A[j*m+j] += 1;
861 
862         /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
863             to the remaining columns, and update the norms. **/
864         for (k = j+1; k < n; k++) {
865             /* Compute scalar product w_j * a_j. */
866             sum = 0;
867             for (i = j; i < m; i++)
868                 sum += A[j*m+i] * A[k*m+i];
869 
870             /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
871             temp = sum / A[j*m+j];
872 
873             /* Carry out transform U_w_j * a_k. */
874             for (i = j; i < m; i++)
875                 A[k*m+i] -= temp * A[j*m+i];
876 
877             /* No idea what happens here. */
878             if (Rdiag[k] != 0) {
879                 temp = A[m*k+j] / Rdiag[k];
880                 if (fabs(temp) < 1) {
881                     Rdiag[k] *= sqrt(1 - SQR(temp));
882                     temp = Rdiag[k] / W[k];
883                 } else
884                     temp = 0;
885                 if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
886                     Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
887                     W[k] = Rdiag[k];
888                 }
889             }
890         }
891 
892         Rdiag[j] = -ajnorm;
893     }
894 } /*** lm_qrfac. ***/
895 
896 /******************************************************************************/
897 /*  lm_qrsolv (linear least-squares)                                          */
898 /******************************************************************************/
899 
lm_qrsolv(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,double * x,double * Sdiag,double * W)900 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
901                const double* diag, const double* qtb, double* x,
902                double* Sdiag, double* W)
903 /*
904  *     Given an m by n matrix A, an n by n diagonal matrix D, and an
905  *     m-vector b, the problem is to determine an x which solves the
906  *     system
907  *
908  *          A*x = b  and  D*x = 0
909  *
910  *     in the least squares sense.
911  *
912  *     This subroutine completes the solution of the problem if it is
913  *     provided with the necessary information from the QR factorization,
914  *     with column pivoting, of A. That is, if A*P = Q*R, where P is a
915  *     permutation matrix, Q has orthogonal columns, and R is an upper
916  *     triangular matrix with diagonal elements of nonincreasing magnitude,
917  *     then qrsolv expects the full upper triangle of R, the permutation
918  *     matrix P, and the first n components of Q^T*b. The system
919  *     A*x = b, D*x = 0, is then equivalent to
920  *
921  *          R*z = Q^T*b,  P^T*D*P*z = 0,
922  *
923  *     where x = P*z. If this system does not have full rank, then a least
924  *     squares solution is obtained. On output qrsolv also provides an upper
925  *     triangular matrix S such that
926  *
927  *          P^T*(A^T*A + D*D)*P = S^T*S.
928  *
929  *     S is computed within qrsolv and may be of separate interest.
930  *
931  *     Parameters:
932  *
933  *      n is a positive integer INPUT variable set to the order of R.
934  *
935  *      r is an n by n array. On INPUT the full upper triangle must contain
936  *        the full upper triangle of the matrix R. On OUTPUT the full upper
937  *        triangle is unaltered, and the strict lower triangle contains the
938  *        strict upper triangle (transposed) of the upper triangular matrix S.
939  *
940  *      ldr is a positive integer INPUT variable not less than n which
941  *        specifies the leading dimension of the array R.
942  *
943  *      Pivot is an integer INPUT array of length n which defines the
944  *        permutation matrix P such that A*P = Q*R. Column j of P is column
945  *        Pivot(j) of the identity matrix.
946  *
947  *      diag is an INPUT array of length n which must contain the diagonal
948  *        elements of the matrix D.
949  *
950  *      qtb is an INPUT array of length n which must contain the first
951  *        n elements of the vector Q^T*b.
952  *
953  *      x is an OUTPUT array of length n which contains the least-squares
954  *        solution of the system A*x = b, D*x = 0.
955  *
956  *      Sdiag is an OUTPUT array of length n which contains the diagonal
957  *        elements of the upper triangular matrix S.
958  *
959  *      W is a work array of length n.
960  *
961  */
962 {
963     int i, kk, j, k, nsing;
964     double qtbpj, sum, temp;
965     double _sin, _cos, _tan, _cot; /* local variables, not functions */
966 
967     /*** Copy R and Q^T*b to preserve input and initialize S.
968          In particular, save the diagonal elements of R in x. ***/
969 
970     for (j = 0; j < n; j++) {
971         for (i = j; i < n; i++)
972             r[j*ldr+i] = r[i*ldr+j];
973         x[j] = r[j*ldr+j];
974         W[j] = qtb[j];
975     }
976 
977     /*** Eliminate the diagonal matrix D using a Givens rotation. ***/
978 
979     for (j = 0; j < n; j++) {
980 
981         /*** Prepare the row of D to be eliminated, locating the diagonal
982              element using P from the QR factorization. ***/
983 
984         if (diag[Pivot[j]] != 0) {
985             for (k = j; k < n; k++)
986                 Sdiag[k] = 0;
987             Sdiag[j] = diag[Pivot[j]];
988 
989             /*** The transformations to eliminate the row of D modify only
990                  a single element of Q^T*b beyond the first n, which is
991                  initially 0. ***/
992 
993             qtbpj = 0;
994             for (k = j; k < n; k++) {
995 
996                 /** Determine a Givens rotation which eliminates the
997                     appropriate element in the current row of D. **/
998                 if (Sdiag[k] == 0)
999                     continue;
1000                 kk = k + ldr * k;
1001                 if (fabs(r[kk]) < fabs(Sdiag[k])) {
1002                     _cot = r[kk] / Sdiag[k];
1003                     _sin = 1 / hypot(1, _cot);
1004                     _cos = _sin * _cot;
1005                 } else {
1006                     _tan = Sdiag[k] / r[kk];
1007                     _cos = 1 / hypot(1, _tan);
1008                     _sin = _cos * _tan;
1009                 }
1010 
1011                 /** Compute the modified diagonal element of R and
1012                     the modified element of (Q^T*b,0). **/
1013                 r[kk] = _cos * r[kk] + _sin * Sdiag[k];
1014                 temp = _cos * W[k] + _sin * qtbpj;
1015                 qtbpj = -_sin * W[k] + _cos * qtbpj;
1016                 W[k] = temp;
1017 
1018                 /** Accumulate the tranformation in the row of S. **/
1019                 for (i = k+1; i < n; i++) {
1020                     temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
1021                     Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
1022                     r[k*ldr+i] = temp;
1023                 }
1024             }
1025         }
1026 
1027         /** Store the diagonal element of S and restore
1028             the corresponding diagonal element of R. **/
1029         Sdiag[j] = r[j*ldr+j];
1030         r[j*ldr+j] = x[j];
1031     }
1032 
1033     /*** Solve the triangular system for z. If the system is singular, then
1034         obtain a least-squares solution. ***/
1035 
1036     nsing = n;
1037     for (j = 0; j < n; j++) {
1038         if (Sdiag[j] == 0 && nsing == n)
1039             nsing = j;
1040         if (nsing < n)
1041             W[j] = 0;
1042     }
1043 
1044     for (j = nsing-1; j >= 0; j--) {
1045         sum = 0;
1046         for (i = j+1; i < nsing; i++)
1047             sum += r[j*ldr+i] * W[i];
1048         W[j] = (W[j] - sum) / Sdiag[j];
1049     }
1050 
1051     /*** Permute the components of z back to components of x. ***/
1052 
1053     for (j = 0; j < n; j++)
1054         x[Pivot[j]] = W[j];
1055 
1056 } /*** lm_qrsolv. ***/
1057 
1058 /******************************************************************************/
1059 /*  lm_enorm (Euclidean norm)                                                 */
1060 /******************************************************************************/
1061 
lm_enorm(int n,const double * x)1062 double lm_enorm(int n, const double* x)
1063 /*     This function calculates the Euclidean norm of an n-vector x.
1064  *
1065  *     The Euclidean norm is computed by accumulating the sum of squares
1066  *     in three different sums. The sums of squares for the small and large
1067  *     components are scaled so that no overflows occur. Non-destructive
1068  *     underflows are permitted. Underflows and overflows do not occur in
1069  *     the computation of the unscaled sum of squares for the intermediate
1070  *     components. The definitions of small, intermediate and large components
1071  *     depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1072  *     restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
1073  *     and LM_SQRT_GIANT**2 not overflow.
1074  *
1075  *     Parameters:
1076  *
1077  *      n is a positive integer INPUT variable.
1078  *
1079  *      x is an INPUT array of length n.
1080  */
1081 {
1082     int i;
1083     double agiant, s1, s2, s3, xabs, x1max, x3max;
1084 
1085     s1 = 0;
1086     s2 = 0;
1087     s3 = 0;
1088     x1max = 0;
1089     x3max = 0;
1090     agiant = LM_SQRT_GIANT / n;
1091 
1092     /** Sum squares. **/
1093     for (i = 0; i < n; i++) {
1094         xabs = fabs(x[i]);
1095         if (xabs > LM_SQRT_DWARF) {
1096             if (xabs < agiant) {
1097                 s2 += SQR(xabs);
1098             } else if (xabs > x1max) {
1099                 s1 = 1 + s1 * SQR(x1max / xabs);
1100                 x1max = xabs;
1101             } else {
1102                 s1 += SQR(xabs / x1max);
1103             }
1104         } else if (xabs > x3max) {
1105             s3 = 1 + s3 * SQR(x3max / xabs);
1106             x3max = xabs;
1107         } else if (xabs != 0) {
1108             s3 += SQR(xabs / x3max);
1109         }
1110     }
1111 
1112     /** Calculate the norm. **/
1113     if (s1 != 0)
1114         return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1115     else if (s2 != 0)
1116         if (s2 >= x3max)
1117             return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1118         else
1119             return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1120     else
1121         return x3max * sqrt(s3);
1122 
1123 } /*** lm_enorm. ***/
1124