1 /++
2 $(H1 Nonlinear Least Squares Solver)
3 
4 Copyright: Copyright © 2018, Symmetry Investments & Kaleidic Associates Advisory Limited
5 Authors:   Ilya Yaroshenko
6 
7 Macros:
8 NDSLICE = $(REF_ALTTEXT $(TT $2), $2, mir, ndslice, $1)$(NBSP)
9 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
10 +/
11 module mir.optim.least_squares;
12 
13 import mir.ndslice.slice: Slice, SliceKind, Contiguous;
14 import std.meta;
15 import std.traits;
16 
17 public import std.typecons: Flag, Yes, No;
18 
19 ///
20 enum LMStatus
21 {
22     ///
23     success = 0,
24     ///
25     initialized,
26     ///
27     badBounds = -32,
28     ///
29     badGuess,
30     ///
31     badMinStepQuality,
32     ///
33     badGoodStepQuality,
34     ///
35     badStepQuality,
36     ///
37     badLambdaParams,
38     ///
39     numericError,
40 }
41 
42 version(D_Exceptions)
43 {
44     /+
45     Exception for $(LREF optimize).
46     +/
47     private static immutable leastSquaresLMException_initialized = new Exception("mir-optim LM-algorithm: status is 'initialized', zero iterations");
48     private static immutable leastSquaresLMException_badBounds = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badBounds.lmStatusString);
49     private static immutable leastSquaresLMException_badGuess = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badGuess.lmStatusString);
50     private static immutable leastSquaresLMException_badMinStepQuality = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badMinStepQuality.lmStatusString);
51     private static immutable leastSquaresLMException_badGoodStepQuality = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badGoodStepQuality.lmStatusString);
52     private static immutable leastSquaresLMException_badStepQuality = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badStepQuality.lmStatusString);
53     private static immutable leastSquaresLMException_badLambdaParams = new Exception("mir-optim LM-algorithm: " ~ LMStatus.badLambdaParams.lmStatusString);
54     private static immutable leastSquaresLMException_numericError = new Exception("mir-optim LM-algorithm: " ~ LMStatus.numericError.lmStatusString);
55     private static immutable leastSquaresLMExceptions = [
56         leastSquaresLMException_initialized,
57         leastSquaresLMException_badBounds,
58         leastSquaresLMException_badGuess,
59         leastSquaresLMException_badMinStepQuality,
60         leastSquaresLMException_badGoodStepQuality,
61         leastSquaresLMException_badStepQuality,
62         leastSquaresLMException_badLambdaParams,
63         leastSquaresLMException_numericError,
64     ];
65 }
66 
67 
68 
69 /++
70 Modified Levenberg-Marquardt parameters, data, and state.
71 +/
72 struct LeastSquaresLM(T)
73     if (is(T == double) || is(T == float))
74 {
75     import mir.math.common: sqrt;
76     import lapack: lapackint;
77 
78     /// Default tolerance in x
79     enum T tolXDefault = T(2) ^^ ((1 - T.mant_dig) / 2);
80     /// Default tolerance in gradient
81     enum T tolGDefault = T(2) ^^ ((1 - T.mant_dig) * 3 / 4);
82     /// Default value for `maxGoodResidual`.
83     enum T maxGoodResidualDefault = T.epsilon;
84     /// Default epsilon for finite difference Jacobian approximation
85     enum T jacobianEpsilonDefault = T(2) ^^ ((1 - T.mant_dig) / 3);
86     /// Default `lambda` is multiplied by this factor after step below min quality
87     enum T lambdaIncreaseDefault = 4;
88     /// Default `lambda` is multiplied by this factor after good quality steps
89     enum T lambdaDecreaseDefault = 0.25 / 1.618;
90     /// Default scale such as for steps below this quality, the trust region is shrinked
91     enum T minStepQualityDefault = 0.1;
92     /// Default scale such as for steps above thsis quality, the trust region is expanded
93     enum T goodStepQualityDefault = 0.68;
94     /// Default maximum trust region radius
95     enum T maxLambdaDefault = 1 / T.epsilon;
96     /// Default maximum trust region radius
97     enum T minLambdaDefault = T.epsilon;
98 
99     /// Delegates for low level D API.
100     alias FunctionDelegate = void delegate(Slice!(const(T)*) x, Slice!(T*) y) @safe nothrow @nogc pure;
101     /// ditto
102     alias JacobianDelegate = void delegate(Slice!(const(T)*) x, Slice!(T*, 2) J) @safe nothrow @nogc pure;
103 
104     /// Delegates for low level C API.
105     alias FunctionFunction = extern(C) void function(void* context, size_t m, size_t n, const(T)* x, T* y) @system nothrow @nogc pure;
106     ///
107     alias JacobianFunction = extern(C) void function(void* context, size_t m, size_t n, const(T)* x, T* J) @system nothrow @nogc pure;
108 
109     private T* _lower_ptr;
110     private T* _upper_ptr;
111     private T* _x_ptr;
112     private T* _deltaX_ptr;
113     private T* _deltaXBase_ptr;
114     private T* _mJy_ptr;
115     private lapackint* _ipiv_ptr;
116     private T* _y_ptr;
117     private T* _mBuffer_ptr;
118     private T* _nBuffer_ptr;
119     private T* _JJ_ptr;
120     private T* _J_ptr;
121     private Slice!(T*) _work;
122 
123     /++
124     Y = f(X) dimension.
125     
126     Can be decresed after allocation to reuse existing data allocated in LM.
127     +/
128     size_t m;
129 
130     /++
131     X dimension.
132 
133     Can be decresed after allocation to reuse existing data allocated in LM.
134     +/
135     size_t n;
136 
137     /// maximum number of iterations
138     size_t maxIter;
139     /// tolerance in x
140     T tolX = 0;
141     /// tolerance in gradient
142     T tolG = 0;
143     /// the algorithm stops iteration when the residual value is less or equal to `maxGoodResidual`.
144     T maxGoodResidual = 0;
145     /// (inverse of) initial trust region radius
146     T lambda = 0;
147     /// `lambda` is multiplied by this factor after step below min quality
148     T lambdaIncrease = 0;
149     /// `lambda` is multiplied by this factor after good quality steps
150     T lambdaDecrease = 0;
151     /// for steps below this quality, the trust region is shrinked
152     T minStepQuality = 0;
153     /// for steps above this quality, the trust region is expanded
154     T goodStepQuality = 0;
155     /// minimum trust region radius
156     T maxLambda = 0;
157     /// maximum trust region radius
158     T minLambda = 0;
159     /// epsilon for finite difference Jacobian approximation
160     T jacobianEpsilon = 0;
161 
162     /++
163     Counters and state values.
164     +/
165     size_t iterCt;
166     /// ditto
167     size_t fCalls;
168     /// ditto
169     size_t gCalls;
170     /// ditto
171     T residual = 0;
172     /// ditto
173     uint maxAge;
174     /// ditto
175     LMStatus status;
176     /// ditto
177     bool xConverged;
178     /// ditto
179     bool gConverged;
180     /// ditto
181     /// `residual <= maxGoodResidual`
182     bool fConverged()() const @property
183     {
184         return residual <= maxGoodResidual;
185     }
186 
187     /++
188     Initialize iteration params and allocates vectors in GC and resets iteration counters, states a.
189     Params:
190         m = Y = f(X) dimension
191         n = X dimension
192         lowerBounds = flag to allocate lower bounds
193         upperBounds = flag to allocate upper bounds
194     +/
195     this()(size_t m, size_t n, Flag!"lowerBounds" lowerBounds = No.lowerBounds, Flag!"upperBounds" upperBounds = No.upperBounds)
196     {
197         initParams;
198         gcAlloc(m, n, lowerBounds, upperBounds);
199     }
200 
201     /++
202     Initialize default params and allocates vectors in GC.
203     `lowerBounds` and `upperBounds` are binded to lm struct.
204     +/
205     this()(size_t m, size_t n, T[] lowerBounds, T[] upperBounds) @trusted
206     {
207         initParams;
208         gcAlloc(m, n, false, false);
209         if (lowerBounds)
210         {
211             assert(lowerBounds.length == n);
212             _lower_ptr = lowerBounds.ptr;
213         }
214         if (upperBounds)
215         {
216             assert(upperBounds.length == n);
217             _upper_ptr = upperBounds.ptr;
218         }
219     }
220 
221     @trusted pure nothrow @nogc @property
222     {
223         /++
224         Returns: lower bounds if they were set or zero length vector otherwise.
225         +/
226         Slice!(T*) lower() { return Slice!(T*)([_lower_ptr ? n : 0], _lower_ptr); }
227         /++
228         Returns: upper bounds if they were set or zero length vector otherwise.
229         +/
230         Slice!(T*) upper() { return Slice!(T*)([_upper_ptr ? n : 0], _upper_ptr); }
231         /++
232         Returns: Current X vector.
233         +/
234         Slice!(T*) x() { return Slice!(T*)([n], _x_ptr); }
235         /++
236         Returns: The last success ΔX.
237         +/
238         Slice!(T*) deltaX() { return Slice!(T*)([n], _deltaX_ptr); }
239         /++
240         Returns: Current Y = f(X).
241         +/
242         Slice!(T*) y() { return Slice!(T*)([m], _y_ptr); }
243     private:
244         Slice!(T*) mJy() { return Slice!(T*)([n], _mJy_ptr); }
245         Slice!(T*) deltaXBase() { return Slice!(T*)([n], _deltaXBase_ptr); }
246         Slice!(lapackint*) ipiv() { return Slice!(lapackint*)([n], _ipiv_ptr); }
247         Slice!(T*) mBuffer() { return Slice!(T*)([m], _mBuffer_ptr); }
248         Slice!(T*) nBuffer() { return Slice!(T*)([n], _nBuffer_ptr); }
249         Slice!(T*, 2) JJ() { return Slice!(T*, 2)([n, n], _JJ_ptr); }
250         Slice!(T*, 2) J() { return Slice!(T*, 2)([m, n], _J_ptr); }
251     }
252 
253     /++
254     Resets all counters and flags, fills `x`, `y`, `upper`, `lower`, vecors with default values.
255     +/
256     pragma(inline, false)
257     void reset()() @safe pure nothrow @nogc
258     {
259         lambda = 0;
260         iterCt = 0;
261         fCalls = 0;
262         gCalls = 0;     
263         residual = T.infinity;
264         maxAge = 0;
265         status = LMStatus.initialized;
266         xConverged = false;
267         gConverged = false;    
268         fill(T.nan, x);
269         fill(T.nan, y);
270         fill(-T.infinity, lower);
271         fill(+T.infinity, upper);
272     }
273 
274     /++
275     Initialize LM data structure with default params for iteration.
276     +/
277     pragma(inline, false)
278     void initParams()() @safe pure nothrow @nogc
279     {
280         maxIter = 100;
281         tolX = tolXDefault;
282         tolG = tolGDefault;
283         maxGoodResidual = maxGoodResidualDefault;
284         lambda = 0;
285         lambdaIncrease = lambdaIncreaseDefault;
286         lambdaDecrease = lambdaDecreaseDefault;
287         minStepQuality = minStepQualityDefault;
288         goodStepQuality = goodStepQualityDefault;
289         maxLambda = maxLambdaDefault;
290         minLambda = minLambdaDefault;
291         jacobianEpsilon = jacobianEpsilonDefault;
292     }
293 
294     /++
295     Allocates data in GC.
296     +/
297     pragma(inline, false)
298     auto gcAlloc()(size_t m, size_t n, bool lowerBounds = false, bool upperBounds = false) nothrow @trusted pure
299     {
300         import mir.lapack: syev_wk;
301         import mir.ndslice.allocation: uninitSlice, uninitAlignedSlice;
302         import mir.ndslice.slice: sliced;
303         import mir.ndslice.topology: canonical;
304 
305         enum alignment = 64;
306 
307         this.m = m;
308         this.n = n;
309         _lower_ptr = lowerBounds ? [n].uninitSlice!T._iterator : null;
310         _upper_ptr = upperBounds ? [n].uninitSlice!T._iterator : null;
311         _ipiv_ptr = [n].uninitSlice!lapackint._iterator;
312         _x_ptr = [n].uninitAlignedSlice!T(alignment)._iterator;
313         _deltaX_ptr = [n].uninitAlignedSlice!T(alignment)._iterator;
314         _mJy_ptr = [n].uninitAlignedSlice!T(alignment)._iterator;
315         _deltaXBase_ptr = [n].uninitAlignedSlice!T(alignment)._iterator;
316         _y_ptr = [m].uninitAlignedSlice!T(alignment)._iterator;
317         _mBuffer_ptr = [m].uninitAlignedSlice!T(alignment)._iterator;
318         _nBuffer_ptr = [n].uninitAlignedSlice!T(alignment)._iterator;
319         _JJ_ptr = [n, n].uninitAlignedSlice!T(alignment)._iterator;
320         _J_ptr = [m, n].uninitAlignedSlice!T(alignment)._iterator;
321         _work = [syev_wk('V', 'L', JJ.canonical, nBuffer)].uninitAlignedSlice!T(alignment);
322         reset;
323     }
324 
325     /++
326     Allocates data using C Runtime.
327     +/
328     pragma(inline, false)
329     void stdcAlloc()(size_t m, size_t n, bool lowerBounds = false, bool upperBounds = false) nothrow @nogc @trusted
330     {
331         import mir.lapack: syev_wk;
332         import mir.ndslice.allocation: stdcUninitSlice, stdcUninitAlignedSlice;
333         import mir.ndslice.slice: sliced;
334         import mir.ndslice.topology: canonical;
335 
336         enum alignment = 64; // AVX512 compatible
337 
338         this.m = m;
339         this.n = n;
340         _lower_ptr = lowerBounds ? [n].stdcUninitSlice!T._iterator : null;
341         _upper_ptr = upperBounds ? [n].stdcUninitSlice!T._iterator : null;
342         _ipiv_ptr = [n].stdcUninitSlice!lapackint._iterator;
343         _x_ptr = [n].stdcUninitAlignedSlice!T(alignment)._iterator;
344         _deltaX_ptr = [n].stdcUninitAlignedSlice!T(alignment)._iterator;
345         _mJy_ptr = [n].stdcUninitAlignedSlice!T(alignment)._iterator;
346         _deltaXBase_ptr = [n].stdcUninitAlignedSlice!T(alignment)._iterator;
347         _y_ptr = [m].stdcUninitAlignedSlice!T(alignment)._iterator;
348         _mBuffer_ptr = [m].stdcUninitAlignedSlice!T(alignment)._iterator;
349         _nBuffer_ptr = [n].stdcUninitAlignedSlice!T(alignment)._iterator;
350         _JJ_ptr = [n, n].stdcUninitAlignedSlice!T(alignment)._iterator;
351         _J_ptr = [m, n].stdcUninitAlignedSlice!T(alignment)._iterator;
352         _work = [syev_wk('V', 'L', JJ.canonical, nBuffer)].stdcUninitAlignedSlice!T(alignment);
353         reset;
354     }
355 
356     /++
357     Frees vectors including `x`, `y`, `upper`, `lower`. Use in pair with `.stdcAlloc`.
358     +/
359     pragma(inline, false)
360     void stdcFree()() nothrow @nogc @trusted
361     {
362         import core.stdc.stdlib: free;
363         import mir.internal.memory: alignedFree;
364         if (_lower_ptr) _lower_ptr.free;
365         if (_upper_ptr) _upper_ptr.free;
366         _ipiv_ptr.free;
367         _x_ptr.alignedFree;
368         _deltaX_ptr.alignedFree;
369         _mJy_ptr.alignedFree;
370         _deltaXBase_ptr.alignedFree;
371         _y_ptr.alignedFree;
372         _mBuffer_ptr.alignedFree;
373         _nBuffer_ptr.alignedFree;
374         _JJ_ptr.alignedFree;
375         _J_ptr.alignedFree;
376         _work._iterator.alignedFree;
377     }
378 
379     // size_t toHash() @safe pure nothrow @nogc
380     // {
381     //     return size_t(0);
382     // }
383     // size_t __xtoHash() @safe pure nothrow @nogc
384     // {
385     //     return size_t(0);
386     // }
387 }
388 
389 /++
390 High level D API for Levenberg-Marquardt Algorithm.
391 
392 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Modified Levenberg-Marquardt
393 algorithm, and an estimate of the Jacobian of `f` at x.
394 
395 The function `f` should take an input vector of length `n`, and fill an output
396 vector of length `m`.
397 
398 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 
399 
400 Throws: $(LREF LeastSquaresLMException)
401 Params:
402     f = `n -> m` function
403     g = `m × n` Jacobian (optional)
404     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
405     lm = Levenberg-Marquardt data structure
406     taskPool = task Pool with `.parallel` method for finite difference jacobian approximation in case of g is null (optional)
407 See_also: $(LREF optimizeImpl)
408 +/
409 void optimize(alias f, alias g = null, alias tm = null, T)(scope ref LeastSquaresLM!T lm)
410     if ((is(T == float) || is(T == double)) && __traits(compiles, optimizeImpl!(f, g, tm, T)))
411 {
412     if (auto err = optimizeImpl!(f, g, tm, T)(lm))
413         throw leastSquaresLMExceptions[err == 1 ? 0 : err + 33];
414 }
415 
416 /// ditto
417 void optimize(alias f, TaskPool, T)(scope ref LeastSquaresLM!T lm, TaskPool taskPool)
418     if (is(T == float) || is(T == double))
419 {
420     auto tm = delegate(size_t count, void* taskContext, scope LeastSquaresTask task)
421     {
422         version(all)
423         {
424             import mir.ndslice.topology: iota;
425             foreach(i; taskPool.parallel(count.iota))
426                 task(taskContext, taskPool.size, taskPool.size <= 1 ? 0 : taskPool.workerIndex - 1, i);
427         }
428         else // for debug
429         {
430             foreach(i; 0 .. count)
431                 task(taskContext, 1, 0, i);
432         }
433     };
434     if (auto err = optimizeImpl!(f, null, tm, T)(lm))
435         throw leastSquaresLMExceptions[err == 1 ? 0 : err + 33];
436 }
437 
438 /// With Jacobian
439 @safe unittest
440 {
441     import mir.ndslice.allocation: slice;
442     import mir.ndslice.slice: sliced;
443     import mir.blas: nrm2;
444 
445     auto lm = LeastSquaresLM!double(2, 2);
446     lm.x[] = [100, 100];
447     lm.optimize!(
448         (x, y)
449         {
450             y[0] = x[0];
451             y[1] = 2 - x[1];
452         },
453         (x, J)
454         {
455             J[0, 0] = 1;
456             J[0, 1] = 0;
457             J[1, 0] = 0;
458             J[1, 1] = -1;
459         },
460     );
461 
462     assert(nrm2((lm.x - [0, 2].sliced).slice) < 1e-8);
463 }
464 
465 /// Using Jacobian finite difference approximation computed using in multiple threads.
466 version(none) @safe unittest
467 {
468     import mir.ndslice.allocation: slice;
469     import mir.ndslice.slice: sliced;
470     import mir.blas: nrm2;
471     import std.parallelism: taskPool;
472 
473     auto lm = LeastSquaresLM!double(2, 2);
474     lm.x[] = [-1.2, 1];
475     lm.optimize!(
476         (x, y) // Rosenbrock function
477         {
478             y[0] = 10 * (x[1] - x[0]^^2);
479             y[1] = 1 - x[0];
480         },
481     )(taskPool);
482 
483     assert(nrm2((lm.x - [1, 1].sliced).slice) < 1e-6);
484 }
485 
486 /// Rosenbrock
487 @safe unittest
488 {
489     import mir.algorithm.iteration: all;
490     import mir.ndslice.allocation: slice;
491     import mir.ndslice.slice: sliced;
492     import mir.blas: nrm2;
493 
494     auto lm = LeastSquaresLM!double(2, 2, Yes.lowerBounds, Yes.upperBounds);
495     lm.x[] = [-1.2, 1];
496 
497     alias rosenbrockRes = (x, y)
498     {
499         y[0] = 10 * (x[1] - x[0]^^2);
500         y[1] = 1 - x[0];
501     };
502 
503     alias rosenbrockJac = (x, J)
504     {
505         J[0, 0] = -20 * x[0];
506         J[0, 1] = 10;
507         J[1, 0] = -1;
508         J[1, 1] = 0;
509     };
510 
511     static class FFF
512     {
513         static auto opCall(Slice!(const(double)*) x, Slice!(double*, 2) J)
514         {
515             rosenbrockJac(x, J);
516         }
517     }
518 
519     lm.optimize!(rosenbrockRes, FFF);
520 
521     // import std.stdio;
522 
523     // writeln(lm.iterCt, " ", lm.fCalls, " ", lm.gCalls);
524 
525     assert(nrm2((lm.x - [1, 1].sliced).slice) < 1e-8);
526 
527     /////
528 
529     lm.reset;
530     lm.lower[] = [10.0, 10.0];
531     lm.upper[] = [200.0, 200.0];
532     lm.x[] = [150.0, 150.0];
533 
534     lm.optimize!(rosenbrockRes, rosenbrockJac);
535 
536     // writeln(lm.iterCt, " ", lm.fCalls, " ", lm.gCalls);
537     assert(nrm2((lm.x - [10, 10].sliced).slice) < 1e-8);
538     assert(lm.x.all!"a >= 10");
539 }
540 
541 ///
542 @safe unittest
543 {
544     import mir.blas: nrm2;
545     import mir.math.common;
546     import mir.ndslice.allocation: slice;
547     import mir.ndslice.topology: linspace, map;
548     import mir.ndslice.slice: sliced;
549     import mir.random;
550     import mir.random.algorithm;
551     import mir.random.variable;
552     import std.parallelism: taskPool;
553 
554     alias model = (x, p) => p[0] * map!exp(-x * p[1]);
555 
556     auto p = [1.0, 2.0];
557 
558     auto xdata = [20].linspace([0.0, 10.0]);
559     auto rng = Random(12345);
560     auto ydata = slice(model(xdata, p) + 0.01 * rng.randomSlice(normalVar, xdata.shape));
561 
562     auto lm = LeastSquaresLM!double(xdata.length, 2);
563     lm.x[] = [0.5, 0.5];
564 
565     lm.optimize!((p, y) => y[] = model(xdata, p) - ydata)();
566 
567     assert((lm.x - [1.0, 2.0].sliced).slice.nrm2 < 0.05);
568 }
569 
570 ///
571 @safe pure unittest
572 {
573     import mir.algorithm.iteration: all;
574     import mir.ndslice.allocation: slice;
575     import mir.ndslice.topology: map, repeat, iota;
576     import mir.ndslice.slice: sliced;
577     import mir.random;
578     import mir.random.variable;
579     import mir.random.algorithm;
580     import mir.math.common;
581 
582     alias model = (x, p) => p[0] * map!exp(-x / p[1]) + p[2];
583 
584     auto xdata = iota([100], 1);
585     auto rng = Random(12345);
586     auto ydata = slice(model(xdata, [10.0, 10.0, 10.0]) + 0.1 * rng.randomSlice(normalVar, xdata.shape));
587 
588     auto lm = LeastSquaresLM!double(xdata.length, 3, [5.0, 11.0, 5.0], double.infinity.repeat(3).slice.field);
589 
590     lm.x[] = [15.0, 15.0, 15.0];
591     lm.optimize!((p, y) => 
592         y[] = model(xdata, p) - ydata);
593 
594     assert(all!"a >= b"(lm.x, lm.lower));
595 
596     // import std.stdio;
597 
598     // writeln(lm.x);
599     // writeln(lm.iterCt, " ", lm.fCalls, " ", lm.gCalls);
600 
601     lm.reset;
602     lm.x[] = [5.0, 5.0, 5.0];
603     lm.upper[] = [15.0, 9.0, 15.0];
604     lm.optimize!((p, y) => y[] = model(xdata, p) - ydata);
605 
606     assert(all!"a <= b"(lm.x, lm.upper));
607 
608     // writeln(lm.x);
609     // writeln(lm.iterCt, " ", lm.fCalls, " ", lm.gCalls);
610 }
611 
612 ///
613 @safe pure unittest
614 {
615     import mir.blas: nrm2;
616     import mir.math.common: sqrt;
617     import mir.ndslice.allocation: slice;
618     import mir.ndslice.slice: sliced;
619 
620     auto lm = LeastSquaresLM!double(1, 2, [-0.5, -0.5], [0.5, 0.5]);
621     lm.x[] = [0.001, 0.0001];
622     lm.optimize!(
623         (x, y)
624         {
625             y[0] = sqrt(1 - (x[0] ^^ 2 + x[1] ^^ 2));
626         },
627     );
628 
629     assert(nrm2((lm.x - lm.upper).slice) < 1e-8);
630 }
631 
632 /++
633 High level nothtow D API for Levenberg-Marquardt Algorithm.
634 
635 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Modified Levenberg-Marquardt
636 algorithm, and an estimate of the Jacobian of `f` at x.
637 
638 The function `f` should take an input vector of length `n`, and fill an output
639 vector of length `m`.
640 
641 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 
642 
643 Returns: optimization status.
644 Params:
645     f = `n -> m` function
646     g = `m × n` Jacobian (optional)
647     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
648     lm = Levenberg-Marquardt data structure
649 See_also: $(LREF optimize)
650 +/
651 LMStatus optimizeImpl(alias f, alias g = null, alias tm = null, T)(scope ref LeastSquaresLM!T lm)
652 {
653     auto fInst = delegate(Slice!(const(T)*) x, Slice!(T*) y)
654     {
655         f(x, y);
656     };
657     if (false) with(lm)
658         fInst(x, y);
659     static if (is(typeof(g) == typeof(null)))
660         enum LeastSquaresLM!T.JacobianDelegate gInst = null;
661     else
662     {
663         auto gInst = delegate(Slice!(const(T)*) x, Slice!(T*, 2) J)
664         {
665             g(x, J);
666         };
667         static if (isNullableFunction!(g))
668             if (!g)
669                 gInst = null;
670         if (false) with(lm)
671             gInst(x, J);
672     }
673 
674     static if (is(typeof(tm) == typeof(null)))
675         enum LeastSquaresThreadManagerDelegate tmInst = null;
676     else
677     {
678         auto tmInst = delegate(
679             size_t count,
680             void* taskContext,
681             scope LeastSquaresTask task)
682         {
683             tm(count, taskContext, task);
684         };
685         // auto tmInst = &tmInstDec;
686         static if (isNullableFunction!(tm))
687             if (!tm)
688                 tmInst = null;
689         if (false) with(lm)
690             tmInst(0, null, null);
691     }
692     alias TM = typeof(tmInst);
693     return optimizeLeastSquaresLM!T(lm, fInst.trustedAllAttr, gInst.trustedAllAttr,  tmInst.trustedAllAttr);
694 }
695 
696 // extern (C) void delegate(ulong count, void* taskContext, extern (C) void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe task) @system 
697 // extern (C) void delegate(ulong count, void* taskContext, extern (C) void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe task) pure nothrow @nogc @safe
698 
699 // extern (C) void delegate(ulong count, void* taskContext, scope extern (C) void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe task) @system 
700 // extern (C) void delegate(ulong count, void* taskContext, extern (C) void function(void* context, ulong totalThreads, ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe
701 // void delegate(ulong, void*, scope extern (C) void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe) pure nothrow @nogc @safe to parameter scope extern (C) void delegate(ulong count, void* taskContext, extern (C) void function(void* context, ulong totalThreads, ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe tm= cast(extern (C) void delegate(ulong count, void* taskContext, extern (C) void function(void* context, ulong totalThreads, ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe)null
702 
703 // optimizeLeastSquaresLMD(scope extern (C) void delegate(ulong count, void* taskContext, scope extern (C) void function(void* context, ulong totalThreads, ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe tm = cast(extern (C) void delegate(ulong count, void* taskContext, scope extern (C) void function(void* context, ulong totalThreads,ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe)null) is not callable using argument types (LeastSquaresLM!double, void delegate(Slice!(cast(SliceKind)2, [1LU], const(double)*), Slice!(cast(SliceKind)2, [1LU], double*)) pure nothrow @nogc @safe, void delegate(Slice!(cast(SliceKind)2, [1LU], const(double)*), Slice!(cast(SliceKind)2, [2LU], double*)) pure nothrow @nogc @safe, void delegate(ulong, void*, scope void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe) pure nothrow @nogc @safe)
704 // source/mir/least_squares.d(613,36):        cannot pass argument trustedAllAttr(tmInst) of type void delegate(ulong, void*, scope void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe) pure nothrow @nogc @safe to parameter scope extern (C) void delegate(ulong count, void* taskContext, scope extern (C) void function(void* context, ulong totalThreads, ulong treadId, ulong i) pure nothrow @nogc @safe task) pure nothrow @nogc @safe tm = cas
705 
706 // void delegate(
707 //     Slice!(cast(SliceKind)2, [1LU], const(double)*),
708 //     Slice!(cast(SliceKind)2, [1LU], double*)) pure nothrow @nogc @safe,
709 //     void delegate (
710 //         Slice!(cast(SliceKind)2, [1LU], const(double)*), Slice!(cast(SliceKind)2, [2LU], double*)) pure nothrow @nogc @safe, void delegate(ulong, void*, scope void function(void*, ulong, ulong, ulong) pure nothrow @nogc @safe) pure nothrow @nogc @safe)
711 
712 /++
713 Status string for low (extern) and middle (nothrow) levels D API.
714 Params:
715     st = optimization status
716 Returns: description for $(LMStatus)
717 +/
718 pragma(inline, false)
719 string lmStatusString(LMStatus st) @safe pure nothrow @nogc
720 {
721     final switch(st) with(LMStatus)
722     {
723         case success:
724             return "success";
725         case initialized:
726             return "data structure was initialized";
727         case badBounds:
728             return "Initial guess must be within bounds.";
729         case badGuess:
730             return "Initial guess must be an array of finite numbers.";
731         case badMinStepQuality:
732             return "0 <= minStepQuality < 1 must hold.";
733         case badGoodStepQuality:
734             return "0 < goodStepQuality <= 1 must hold.";
735         case badStepQuality:
736             return "minStepQuality < goodStepQuality must hold.";
737         case badLambdaParams:
738             return "1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt and T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1 must hold.";
739         case numericError:
740             return "numeric error";
741     }
742 }
743 
744 ///
745 alias LeastSquaresTask = extern(C) void function(
746                 void* context,
747                 size_t totalThreads,
748                 size_t treadId,
749                 size_t i)
750             @safe nothrow @nogc pure;
751 
752 /// Thread manager delegate type for low level `extern(D)` API.
753 alias LeastSquaresThreadManagerDelegate = void delegate(
754         size_t count,
755         void* taskContext,
756         scope LeastSquaresTask task,
757         )@safe nothrow @nogc pure;
758 
759 /++
760 Low level `extern(D)` instatiation.
761 Params:
762     lm = Levenberg-Marquardt data structure
763     f = `n -> m` function
764     g = `m × n` Jacobian (optional)
765     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
766 +/
767 pragma(inline, false)
768 LMStatus optimizeLeastSquaresLMD
769     (
770         scope ref LeastSquaresLM!double lm,
771         scope LeastSquaresLM!double.FunctionDelegate f,
772         scope LeastSquaresLM!double.JacobianDelegate g = null,
773         scope LeastSquaresThreadManagerDelegate tm = null,
774     ) @trusted nothrow @nogc pure
775 {
776     return optimizeLMImplGeneric!double(lm, f, g, tm);
777 }
778 
779 
780 /// ditto
781 pragma(inline, false)
782 LMStatus optimizeLeastSquaresLMS
783     (
784         scope ref LeastSquaresLM!float lm,
785         scope LeastSquaresLM!float.FunctionDelegate f,
786         scope LeastSquaresLM!float.JacobianDelegate g = null,
787         scope LeastSquaresThreadManagerDelegate tm = null,
788     ) @trusted nothrow @nogc pure
789 {
790     return optimizeLMImplGeneric!float(lm, f, g, tm);
791 }
792 
793 /// ditto
794 alias optimizeLeastSquaresLM(T : double) = optimizeLeastSquaresLMD;
795 /// ditto
796 alias optimizeLeastSquaresLM(T : float) = optimizeLeastSquaresLMS;
797 
798 
799 extern(C) @safe nothrow @nogc
800 {
801     /++
802     Status string for extern(C) API.
803     Params:
804         st = optimization status
805     Returns: description for $(LMStatus)
806     +/
807     extern(C)
808     pragma(inline, false)
809     immutable(char)* mir_least_squares_lm_status_string(LMStatus st) @trusted pure nothrow @nogc
810     {
811         return st.lmStatusString.ptr;
812     }
813 
814     /// Thread manager function type for low level `extern(C)` API.
815     alias LeastSquaresThreadManagerFunction =
816         extern(C) void function(
817             void* context,
818             size_t count,
819             void* taskContext,
820             scope LeastSquaresTask task)
821             @system nothrow @nogc pure;
822 
823     /++
824     Low level `extern(C)` wrapper instatiation.
825     Params:
826         lm = Levenberg-Marquardt data structure
827         fContext = context for the function
828         f = `n -> m` function
829         gContext = context for the Jacobian (optional)
830         g = `m × n` Jacobian (optional)
831         tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
832     +/
833     extern(C)
834     pragma(inline, false)
835     LMStatus mir_least_squares_lm_optimize_d
836         (
837             scope ref LeastSquaresLM!double lm,
838             scope void* fContext,
839             scope LeastSquaresLM!double.FunctionFunction f,
840             scope void* gContext = null,
841             scope LeastSquaresLM!double.JacobianFunction g = null,
842             scope void* tmContext = null,
843             scope LeastSquaresThreadManagerFunction tm = null,
844         ) @system nothrow @nogc pure
845     {
846         return optimizeLMImplGenericBetterC!double(lm, fContext, f, gContext, g, tmContext, tm);
847     }
848 
849     /// ditto
850     extern(C)
851     pragma(inline, false)
852     LMStatus mir_least_squares_lm_optimize_s
853         (
854             scope ref LeastSquaresLM!float lm,
855             scope void* fContext,
856             scope LeastSquaresLM!float.FunctionFunction f,
857             scope void* gContext = null,
858             scope LeastSquaresLM!float.JacobianFunction g = null,
859             scope void* tmContext = null,
860             scope LeastSquaresThreadManagerFunction tm = null,
861         ) @system nothrow @nogc pure
862     {
863         return optimizeLMImplGenericBetterC!float(lm, fContext, f, gContext, g, tmContext, tm);
864     }
865 
866     /// ditto
867     alias mir_least_squares_lm_optimize(T : double) = mir_least_squares_lm_optimize_d;
868 
869     /// ditto
870     alias mir_least_squares_lm_optimize(T : float) = mir_least_squares_lm_optimize_s;
871 
872     /++
873     Initialize LM data structure with default params for iteration.
874     Params:
875         lm = Levenberg-Marquart data structure
876     +/
877     void mir_least_squares_lm_init_params_d(ref LeastSquaresLM!double lm) pure
878     {
879         lm.initParams;
880     }
881 
882     /// ditto
883     void mir_least_squares_lm_init_params_s(ref LeastSquaresLM!float lm) pure
884     {
885         lm.initParams;
886     }
887 
888     /// ditto
889     alias mir_least_squares_lm_init_params(T : double) = mir_least_squares_lm_init_params_d;
890 
891     /// ditto
892     alias mir_least_squares_lm_init_params(T : float) = mir_least_squares_lm_init_params_s;
893 
894     /++
895     Resets all counters and flags, fills `x`, `y`, `upper`, `lower`, vecors with default values.
896     Params:
897         lm = Levenberg-Marquart data structure
898     +/
899     void mir_least_squares_lm_reset_d(ref LeastSquaresLM!double lm) pure
900     {
901         lm.reset;
902     }
903 
904     /// ditto
905     void mir_least_squares_lm_reset_s(ref LeastSquaresLM!float lm) pure
906     {
907         lm.reset;
908     }
909 
910     /// ditto
911     alias mir_least_squares_lm_reset(T : double) = mir_least_squares_lm_reset_d;
912 
913     /// ditto
914     alias mir_least_squares_lm_reset(T : float) = mir_least_squares_lm_reset_s;
915 
916     /++
917     Allocates data.
918     Params:
919         lm = Levenberg-Marquart data structure
920         m = Y = f(X) dimension
921         n = X dimension
922         lowerBounds = flag to allocate lower bounds
923         lowerBounds = flag to allocate upper bounds
924     +/
925     void mir_least_squares_lm_stdc_alloc_d(ref LeastSquaresLM!double lm, size_t m, size_t n, bool lowerBounds, bool upperBounds)
926     {
927         lm.stdcAlloc(m, n, lowerBounds, upperBounds);
928     }
929 
930     /// ditto
931     void mir_least_squares_lm_stdc_alloc_s(ref LeastSquaresLM!float lm, size_t m, size_t n, bool lowerBounds, bool upperBounds)
932     {
933         lm.stdcAlloc(m, n, lowerBounds, upperBounds);
934     }
935 
936     /// ditto
937     alias mir_least_squares_lm_stdc_alloc(T : double) = mir_least_squares_lm_stdc_alloc_d;
938 
939     /// ditto
940     alias mir_least_squares_lm_stdc_alloc(T : float) = mir_least_squares_lm_stdc_alloc_s;
941 
942     /++
943     Frees vectors including `x`, `y`, `upper`, `lower`.
944     Params:
945         lm = Levenberg-Marquart data structure
946     +/
947     void mir_least_squares_lm_stdc_free_d(ref LeastSquaresLM!double lm)
948     {
949         lm.stdcFree;
950     }
951 
952     /// ditto
953     void mir_least_squares_lm_stdc_free_s(ref LeastSquaresLM!float lm)
954     {
955         lm.stdcFree;
956     }
957 
958     /// ditto
959     alias mir_least_squares_lm_stdc_free(T : double) = mir_least_squares_lm_stdc_free_d;
960 
961     /// ditto
962     alias mir_least_squares_lm_stdc_free(T : float) = mir_least_squares_lm_stdc_free_s;
963 }
964 
965 private:
966 
967 LMStatus optimizeLMImplGenericBetterC(T)
968     (
969         scope ref LeastSquaresLM!T lm,
970         scope void* fContext,
971         scope LeastSquaresLM!T.FunctionFunction f,
972         scope void* gContext,
973         scope LeastSquaresLM!T.JacobianFunction g,
974         scope void* tmContext,
975         scope LeastSquaresThreadManagerFunction tm,
976     ) @system nothrow @nogc pure
977 {
978     version(LDC) pragma(inline, true);
979     if (g)
980         return optimizeLeastSquaresLM!T(
981             lm,
982             (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
983             (x, J) @trusted => g(gContext, J.length, x.length, x.iterator, J.iterator),
984             null
985         );
986     if (tm)
987         return optimizeLeastSquaresLM!T(
988             lm,
989             (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
990             null, (count, taskContext, scope task)  @trusted => tm(tmContext, count, taskContext, task)
991         );
992     return optimizeLeastSquaresLM!T(
993         lm,
994         (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
995         null,
996         null
997     );
998 }
999 
1000 extern(C) void defaultLMThreadManagerDelegate(T)(void* context, size_t totalThreads, size_t treadId, size_t j) @trusted pure nothrow @nogc
1001 {with(*cast(LeastSquaresLM!T*)((cast(void**)context)[0])){
1002     import mir.blas;
1003     import mir.math.common;
1004     auto f = *cast(LeastSquaresLM!T.FunctionDelegate*)((cast(void**)context)[1]);
1005     auto idx = totalThreads >= n ? j : treadId;
1006     auto p = JJ[idx];
1007     if(ipiv[idx]++ == 0)
1008     {
1009         copy(x, p);
1010     }
1011 
1012     auto save = p[j];
1013     auto xmh = save - jacobianEpsilon;
1014     auto xph = save + jacobianEpsilon;
1015     if (_lower_ptr)
1016         xmh = fmax(xmh, lower[j]);
1017     if (_upper_ptr)
1018         xph = fmin(xph, upper[j]);
1019     auto Jj = J[0 .. $, j];
1020     if (auto twh = xph - xmh)
1021     {
1022         p[j] = xph;
1023         f(p, mBuffer);
1024         copy(mBuffer, Jj);
1025 
1026         p[j] = xmh;
1027         f(p, mBuffer);
1028 
1029         p[j] = save;
1030 
1031         axpy(-1, mBuffer, Jj);
1032         scal(1 / twh, Jj);
1033     }
1034     else
1035     {
1036         import mir.ndslice.topology: flattened;
1037         fill(T(0), Jj);
1038     }
1039 }}
1040 
1041 private auto assumePure(T)(T t)
1042 if (isFunctionPointer!T || isDelegate!T)
1043 {
1044     enum attrs = functionAttributes!T | FunctionAttribute.pure_;
1045     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
1046 }
1047 
1048 // version = mir_optim_debug;
1049 
1050 // LM algorithm
1051 LMStatus optimizeLMImplGeneric(T)
1052     (
1053         scope ref LeastSquaresLM!T lm,
1054         scope LeastSquaresLM!T.FunctionDelegate f,
1055         scope LeastSquaresLM!T.JacobianDelegate g = null,
1056         scope LeastSquaresThreadManagerDelegate tm = null,
1057     ) @trusted nothrow @nogc
1058 {with(lm){
1059     import mir.blas;
1060     import mir.lapack;
1061     import mir.math.common;
1062     import mir.math.sum: sum;
1063     import mir.algorithm.iteration: all;
1064     import mir.ndslice.dynamic: transposed;
1065     import mir.ndslice.topology: canonical, diagonal;
1066     import mir.utility: max;
1067     import mir.ndslice.slice: sliced;
1068 
1069     version(LDC) pragma(inline, true);
1070 
1071     version(mir_optim_debug)
1072     {
1073         import core.stdc.stdio;
1074         auto file = assumePure(&fopen)("x.txt", "a");
1075         scope(exit)
1076             assumePure(&fclose)(file);
1077     }
1078 
1079     if (m == 0 || n == 0 || !x.all!"-a.infinity < a && a < a.infinity")
1080         return lm.status = LMStatus.badGuess; 
1081     if (!(!_lower_ptr || allLessOrEqual(lower, x)) || !(!_upper_ptr || allLessOrEqual(x, upper)))
1082         return lm.status = LMStatus.badBounds; 
1083     if (!(0 <= minStepQuality && minStepQuality < 1))
1084         return lm.status = LMStatus.badMinStepQuality;
1085     if (!(0 <= goodStepQuality && goodStepQuality <= 1))
1086         return lm.status = LMStatus.badGoodStepQuality;
1087     if (!(minStepQuality < goodStepQuality))
1088         return lm.status = LMStatus.badStepQuality;
1089     if (!(1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt))
1090         return lm.status = LMStatus.badLambdaParams;
1091     if (!(T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1))
1092         return lm.status = LMStatus.badLambdaParams;
1093 
1094     maxAge = maxAge ? maxAge : g ? 3 : cast(uint)(2 * n);
1095     uint age = maxAge;
1096 
1097     tm = tm ? tm : delegate(size_t count, void* taskContext, scope LeastSquaresTask task) pure @nogc nothrow @trusted
1098     {
1099         foreach(i; 0 .. count)
1100             task(taskContext, 1, 0, i);
1101     };
1102 
1103     bool needJacobian = true;
1104     f(x, y);
1105     ++fCalls;
1106     residual = dot(y, y);
1107 
1108     T nu = 2;
1109     // T mu = 1;
1110     T sigma = 0;
1111 
1112     do
1113     {
1114         if (!allLessOrEqual(x, x))
1115             return lm.status = LMStatus.numericError;
1116         T mJy_nrm2 = void;
1117         T deltaXBase_dot = void;
1118         if (needJacobian)
1119         {
1120             needJacobian = false;
1121             if (age < maxAge)
1122             {
1123                 age++;
1124                 auto d = 1 / deltaXBase_dot;
1125                 axpy(-1, y, mBuffer); // -deltaY
1126                 gemv(1, J, deltaXBase, 1, mBuffer); //-(f_new - f_old - J_old*h)
1127                 scal(-d, mBuffer);
1128                 ger(1, mBuffer, deltaXBase, J); //J_new = J_old + u*h'
1129             }
1130             else
1131             {
1132                 if (g)
1133                 {
1134                     age = 0;
1135                     g(x, J);
1136                     gCalls += 1;
1137                 }
1138                 else
1139                 {
1140                     age = 0;
1141                     fill(0, ipiv);
1142                     void*[2] context;
1143                     context[0] = &lm;
1144                     context[1] = &f;
1145                     tm(n, context.ptr, &defaultLMThreadManagerDelegate!T);
1146                     fCalls += ipiv.sum;
1147                 }
1148             }
1149             gemv(-1, J.transposed, y, 0, mJy);
1150             mJy_nrm2 = mJy.nrm2;
1151         }
1152 
1153         syrk(Uplo.Upper, 1, J.transposed, 0, JJ);
1154         if (syev('V', 'L', JJ.canonical, nBuffer, _work))
1155             return lm.status = LMStatus.numericError;
1156 
1157         if (!(lambda >= minLambda))
1158         {
1159             lambda = 0.0001 * nBuffer.back;
1160             if (!(lambda >= minLambda))
1161                 lambda = 1;
1162         }
1163 
1164         T sigmaInit = 0;
1165 
1166         if (nBuffer.front < 0)
1167             sigmaInit = nBuffer.front * -(1 + T.epsilon);
1168 
1169         if (nBuffer.front + sigmaInit < T.epsilon)
1170             sigmaInit += T.epsilon;
1171 
1172         if (!(mJy_nrm2 / ((nBuffer.front + sigmaInit) * (1 + lambda)) < T.max / 2))
1173             sigmaInit = mJy_nrm2 / ((T.max / 2) * (1 + lambda)) - nBuffer.front;
1174 
1175         if (sigmaInit == 0)
1176         {
1177             sigma = 0;
1178             nu = 2;
1179         }
1180         else
1181         {
1182             sigma = fmax(sigma, sigmaInit);
1183         }
1184 
1185         gemv(1, JJ, mJy, 0, deltaX);
1186 
1187         foreach(i; 0 .. n)
1188             nBuffer[i] = deltaX[i] / ((nBuffer[i] + sigma) * (1 + lambda));
1189 
1190         gemv(1, JJ.transposed, nBuffer, 0, deltaX);
1191 
1192         axpy(1, x, deltaX);
1193 
1194         if (_lower_ptr)
1195             applyLowerBound(deltaX, lower);
1196         if (_upper_ptr)
1197             applyUpperBound(deltaX, upper);
1198 
1199         axpy(-1, x, deltaX);
1200         copy(y, mBuffer);
1201         gemv(1, J, deltaX, 1, mBuffer); // (J * dx + y) * (J * dx + y)^T
1202         auto predictedResidual = dot(mBuffer, mBuffer);
1203         copy(x, nBuffer);
1204         axpy(1, deltaX, nBuffer);
1205 
1206         if (_lower_ptr)
1207             applyLowerBound(nBuffer, lower);
1208         if (_upper_ptr)
1209             applyUpperBound(nBuffer, upper);
1210 
1211         f(nBuffer, mBuffer);
1212 
1213         ++fCalls;
1214         ++iterCt;
1215         auto trialResidual = dot(mBuffer, mBuffer);
1216         if (trialResidual != trialResidual || trialResidual == T.infinity)
1217             return lm.status = LMStatus.numericError;
1218         auto improvement = residual - trialResidual;
1219         auto predictedImprovement = residual - predictedResidual;
1220         auto rho = improvement / predictedImprovement;
1221 
1222         version(mir_optim_debug)
1223         {
1224             assumePure(&fprintf)(file, "x = ");
1225             foreach (ref e; nBuffer)
1226             {
1227                 assumePure(&fprintf)(file, "%.4f ", e);
1228             }
1229             assumePure(&fprintf)(file, "\n");
1230             assumePure(&fprintf)(file, "lambda = %e\n", lambda);
1231             assumePure(&fprintf)(file, "sigma = %e\n", sigma);
1232             // assumePure(&fprintf)(file, "mu = %e\n", mu);
1233             assumePure(&fprintf)(file, "nu = %e\n", nu);
1234             assumePure(&fprintf)(file, "improvement = %e\n", improvement);
1235             assumePure(&fprintf)(file, "predictedImprovement = %e\n", predictedImprovement);
1236             assumePure(&fprintf)(file, "rho = %e\n", rho);
1237             assumePure(&fflush)(file);
1238         }
1239 
1240 
1241         if (rho > 0)
1242         {
1243             copy(deltaX, deltaXBase);
1244             deltaXBase_dot = dot(deltaXBase, deltaXBase);
1245             if (deltaXBase_dot != deltaXBase_dot || deltaXBase_dot == T.infinity)
1246                 return lm.status = LMStatus.numericError;
1247             copy(nBuffer, x);
1248             swap(y, mBuffer);
1249             residual = trialResidual;
1250             needJacobian = true;
1251         }
1252         if (rho > minStepQuality)
1253         {
1254             gemv(1, J.transposed, y, 0, nBuffer);
1255             gConverged = !(nBuffer.amax > tolG);
1256             xConverged = !(deltaXBase_dot.sqrt > tolX * (tolX + x.nrm2));
1257 
1258             if (gConverged || xConverged)
1259             {
1260                 if (age)
1261                 {
1262                     gConverged = false;
1263                     xConverged = false;
1264                     age = maxAge;
1265                 }
1266                 else
1267                 {
1268                     break;
1269                 }
1270             }
1271 
1272             if (fConverged)
1273                 break;
1274 
1275             if (rho > goodStepQuality)
1276             {
1277                 lambda = fmax(lambdaDecrease * lambda, minLambda);
1278                 sigma = sigma * 0.5;
1279                 nu = 2;
1280                 // mu = 1;
1281             }
1282         }
1283         else
1284         {
1285             if (fConverged)
1286                 break;
1287 
1288             auto newsigma = sigma * nu;
1289             // auto newlambda = lambdaIncrease * lambda * mu;
1290             auto newlambda = lambdaIncrease * lambda;
1291             if (newsigma > T.max / 8)
1292                 newsigma = sigma + sigma;
1293             if (newlambda > maxLambda)
1294                 newlambda = lambda + lambda;
1295             if (newlambda > maxLambda || newsigma > T.max / 8)
1296             {
1297                 if (age == 0)
1298                 {
1299                     break;
1300                 }
1301                 else
1302                 {
1303                     needJacobian = true;
1304                     age = maxAge;
1305                     continue;
1306                 }
1307             }
1308             nu += nu;
1309             // mu += mu;
1310             lambda = newlambda;
1311             sigma = newsigma;
1312         }
1313     }
1314     while (iterCt < maxIter);
1315 
1316     return lm.status = LMStatus.success;
1317 }}
1318 
1319 pragma(inline, false)
1320 void applyLowerBound(T)(Slice!(T*) x, Slice!(const(T)*) bound)
1321 {
1322     import mir.math.common: fmax;
1323     import mir.algorithm.iteration: each;
1324     each!((ref x, y) { x = x.fmax(y); } )(x, bound);
1325 }
1326 
1327 pragma(inline, false)
1328 void applyUpperBound(T)(Slice!(T*) x, Slice!(const(T)*) bound)
1329 {
1330     import mir.math.common: fmin;
1331     import mir.algorithm.iteration: each;
1332     each!((ref x, y) { x = x.fmin(y); } )(x, bound);
1333 }
1334 
1335 pragma(inline, false)
1336 T amax(T, SliceKind kind)(Slice!(const(T)*, 1, kind) x)
1337 {
1338     import mir.math.common: fmax, fabs;
1339     T ret = 0;
1340     foreach(ref e; x)
1341         ret = fmax(fabs(e), ret);
1342     return ret;
1343 }
1344 
1345 pragma(inline, false)
1346 void fill(T, SliceKind kind)(T value, Slice!(T*, 1, kind) x)
1347 {
1348     x[] = value;
1349 }
1350 
1351 pragma(inline, false)
1352 bool allLessOrEqual(T)(
1353     Slice!(const(T)*) a,
1354     Slice!(const(T)*) b,
1355     )
1356 {
1357     import mir.algorithm.iteration: all;
1358     return all!"a <= b"(a, b);
1359 }
1360 
1361 uint normalizeSafety()(uint attrs)
1362 {
1363     if (attrs & FunctionAttribute.system)
1364         attrs &= ~FunctionAttribute.safe;
1365     return attrs;
1366 }
1367 
1368 auto trustedAllAttr(T)(scope return T t) @trusted
1369     if (isFunctionPointer!T || isDelegate!T)
1370 {
1371     enum attrs = (functionAttributes!T & ~FunctionAttribute.system) 
1372         | FunctionAttribute.pure_
1373         | FunctionAttribute.safe
1374         | FunctionAttribute.nogc
1375         | FunctionAttribute.nothrow_;
1376     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
1377 }
1378 
1379 template isNullableFunction(alias f)
1380 {
1381     enum isNullableFunction = __traits(compiles, { alias F = Unqual!(typeof(f)); auto r = function(ref F e) {e = null;};} );
1382 }