From b526f9a476c3df4d65b945130beebba9f578e76d Mon Sep 17 00:00:00 2001 From: Margarita Lashina Date: Thu, 4 May 2023 20:05:32 +0300 Subject: [PATCH] added Levenberg-Marquardt algorithm + test --- .../tensors/api/LinearOpsTensorAlgebra.kt | 8 + .../kmath/tensors/core/DoubleTensorAlgebra.kt | 369 +++++++++++++++++- .../tensors/core/TestDoubleTensorAlgebra.kt | 85 +++- 3 files changed, 455 insertions(+), 7 deletions(-) diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/LinearOpsTensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/LinearOpsTensorAlgebra.kt index 60865ecba..5cf0081fb 100644 --- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/LinearOpsTensorAlgebra.kt +++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/api/LinearOpsTensorAlgebra.kt @@ -14,6 +14,8 @@ import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra.dot import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra.map import space.kscience.kmath.tensors.core.BroadcastDoubleTensorAlgebra.transposed import space.kscience.kmath.tensors.core.DoubleTensorAlgebra +import space.kscience.kmath.tensors.core.internal.LMSettings +import kotlin.reflect.KFunction3 /** * Common linear algebra operations. Operates on [Tensor]. @@ -117,4 +119,10 @@ public interface LinearOpsTensorAlgebra> : TensorPartialDivision * @return the square matrix x which is the solution of the equation. */ public fun solve(a: MutableStructure2D, b: MutableStructure2D): MutableStructure2D + + public fun lm( + func: KFunction3, MutableStructure2D, LMSettings, MutableStructure2D>, + p_input: MutableStructure2D, t_input: MutableStructure2D, y_dat_input: MutableStructure2D, + weight_input: MutableStructure2D, dp_input: MutableStructure2D, p_min_input: MutableStructure2D, p_max_input: MutableStructure2D, + c_input: MutableStructure2D, opts_input: DoubleArray, nargin: Int, example_number: Int): Double } diff --git a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/DoubleTensorAlgebra.kt b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/DoubleTensorAlgebra.kt index 5325fe19e..e7e22afa9 100644 --- a/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/DoubleTensorAlgebra.kt +++ b/kmath-tensors/src/commonMain/kotlin/space/kscience/kmath/tensors/core/DoubleTensorAlgebra.kt @@ -9,6 +9,7 @@ package space.kscience.kmath.tensors.core import space.kscience.kmath.PerformancePitfall +import space.kscience.kmath.linear.transpose import space.kscience.kmath.nd.* import space.kscience.kmath.operations.DoubleBufferOps import space.kscience.kmath.operations.DoubleField @@ -17,10 +18,8 @@ import space.kscience.kmath.tensors.api.AnalyticTensorAlgebra import space.kscience.kmath.tensors.api.LinearOpsTensorAlgebra import space.kscience.kmath.tensors.api.Tensor import space.kscience.kmath.tensors.core.internal.* -import kotlin.math.abs -import kotlin.math.ceil -import kotlin.math.floor -import kotlin.math.sqrt +import kotlin.math.* +import kotlin.reflect.KFunction3 /** * Implementation of basic operations over double tensors and basic algebra operations on them. @@ -717,6 +716,368 @@ public open class DoubleTensorAlgebra : val aInverse = aSvd.third.dot(s).dot(aSvd.first.transposed()) return aInverse.dot(b).as2D() } + + override fun lm( + func: KFunction3, MutableStructure2D, LMSettings, MutableStructure2D>, + p_input: MutableStructure2D, t_input: MutableStructure2D, y_dat_input: MutableStructure2D, + weight_input: MutableStructure2D, dp_input: MutableStructure2D, p_min_input: MutableStructure2D, p_max_input: MutableStructure2D, + c_input: MutableStructure2D, opts_input: DoubleArray, nargin: Int, example_number: Int): Double { + + var result_chi_sq = 0.0 + + val tensor_parameter = 0 + val eps:Double = 2.2204e-16 + + var settings = LMSettings(0, 0, example_number) + settings.func_calls = 0 // running count of function evaluations + + var p = p_input + val y_dat = y_dat_input + val t = t_input + + val Npar = length(p) // number of parameters + val Npnt = length(y_dat) // number of data points + var p_old = zeros(ShapeND(intArrayOf(Npar, 1))).as2D() // previous set of parameters + var y_old = zeros(ShapeND(intArrayOf(Npnt, 1))).as2D() // previous model, y_old = y_hat(t;p_old) + var X2 = 1e-3 / eps // a really big initial Chi-sq value + var X2_old = 1e-3 / eps // a really big initial Chi-sq value + var J = zeros(ShapeND(intArrayOf(Npnt, Npar))).as2D() // Jacobian matrix + val DoF = Npnt - Npar + 1 // statistical degrees of freedom + + var corr_p = 0 + var sigma_p = 0 + var sigma_y = 0 + var R_sq = 0 + var cvg_hist = 0 + + if (length(t) != length(y_dat)) { + println("lm.m error: the length of t must equal the length of y_dat") + val length_t = length(t) + val length_y_dat = length(y_dat) + X2 = 0.0 + + corr_p = 0 + sigma_p = 0 + sigma_y = 0 + R_sq = 0 + cvg_hist = 0 + +// if (tensor_parameter != 0) { // Зачем эта проверка? +// return +// } + } + + var weight = weight_input + if (nargin < 5) { + weight = fromArray(ShapeND(intArrayOf(1, 1)), doubleArrayOf((y_dat.transpose().dot(y_dat)).as1D()[0])).as2D() + } + + var dp = dp_input + if (nargin < 6) { + dp = fromArray(ShapeND(intArrayOf(1, 1)), doubleArrayOf(0.001)).as2D() + } + + var p_min = p_min_input + if (nargin < 7) { + p_min = p + p_min.abs() + p_min = p_min.div(-100.0).as2D() + } + + var p_max = p_max_input + if (nargin < 8) { + p_max = p + p_max.abs() + p_max = p_max.div(100.0).as2D() + } + + var c = c_input + if (nargin < 9) { + c = fromArray(ShapeND(intArrayOf(1, 1)), doubleArrayOf(1.0)).as2D() + } + + var opts = opts_input + if (nargin < 10) { + opts = doubleArrayOf(3.0, 10.0 * Npar, 1e-3, 1e-3, 1e-1, 1e-1, 1e-2, 11.0, 9.0, 1.0) + } + + val prnt = opts[0] // >1 intermediate results; >2 plots + val MaxIter = opts[1].toInt() // maximum number of iterations + val epsilon_1 = opts[2] // convergence tolerance for gradient + val epsilon_2 = opts[3] // convergence tolerance for parameters + val epsilon_3 = opts[4] // convergence tolerance for Chi-square + val epsilon_4 = opts[5] // determines acceptance of a L-M step + val lambda_0 = opts[6] // initial value of damping paramter, lambda + val lambda_UP_fac = opts[7] // factor for increasing lambda + val lambda_DN_fac = opts[8] // factor for decreasing lambda + val Update_Type = opts[9].toInt() // 1: Levenberg-Marquardt lambda update + // 2: Quadratic update + // 3: Nielsen's lambda update equations + + val plotcmd = "figure(102); plot(t(:,1),y_init,''-k'',t(:,1),y_hat,''-b'',t(:,1),y_dat,''o'',''color'',[0,0.6,0],''MarkerSize'',4); title(sprintf(''\\chi^2_\\nu = %f'',X2/DoF)); drawnow" + + p_min = make_column(p_min) + p_max = make_column(p_max) + + if (length(make_column(dp)) == 1) { + dp = ones(ShapeND(intArrayOf(Npar, 1))).div(1 / dp[0, 0]).as2D() + } + + val idx = get_zero_indices(dp) // indices of the parameters to be fit + val Nfit = idx?.shape?.component1() // number of parameters to fit + var stop = false // termination flag + val y_init = feval(func, t, p, settings) // residual error using p_try + + if (weight.shape.component1() == 1 || variance(weight) == 0.0) { // identical weights vector + weight = ones(ShapeND(intArrayOf(Npnt, 1))).div(1 / kotlin.math.abs(weight[0, 0])).as2D() // !!! need to check + println("using uniform weights for error analysis") + } + else { + weight = make_column(weight) + weight.abs() + } + + // initialize Jacobian with finite difference calculation + var lm_matx_ans = lm_matx(func, t, p_old, y_old,1, J, p, y_dat, weight, dp, settings) + var JtWJ = lm_matx_ans[0] + var JtWdy = lm_matx_ans[1] + X2 = lm_matx_ans[2][0, 0] + var y_hat = lm_matx_ans[3] + J = lm_matx_ans[4] + + if ( abs(JtWdy).max()!! < epsilon_1 ) { + println(" *** Your Initial Guess is Extremely Close to Optimal ***\n") + println(" *** epsilon_1 = %e\n$epsilon_1") + stop = true + } + + var lambda = 1.0 + var nu = 1 + when (Update_Type) { + 1 -> lambda = lambda_0 // Marquardt: init'l lambda + else -> { // Quadratic and Nielsen + lambda = lambda_0 * (diag(JtWJ)).max()!! + nu = 2 + } + } + + X2_old = X2 // previous value of X2 + var cvg_hst = ones(ShapeND(intArrayOf(MaxIter, Npar + 3))) // initialize convergence history + + var h = JtWJ.copyToTensor() + var dX2 = X2 + while (!stop && settings.iteration <= MaxIter) { //--- Start Main Loop + settings.iteration += 1 + + // incremental change in parameters + h = when (Update_Type) { + 1 -> { // Marquardt + val solve = solve(JtWJ.plus(make_matrx_with_diagonal(diag(JtWJ)).div(1 / lambda)).as2D(), JtWdy) + solve.asDoubleTensor() + } + + else -> { // Quadratic and Nielsen + val solve = solve(JtWJ.plus(lm_eye(Npar).div(1 / lambda)).as2D(), JtWdy) + solve.asDoubleTensor() + } + } + + // big = max(abs(h./p)) > 2; % this is a big step + + // --- Are parameters [p+h] much better than [p] ? + + var p_try = (p + h).as2D() // update the [idx] elements + p_try = smallest_element_comparison(largest_element_comparison(p_min, p_try.as2D()), p_max) // apply constraints + + var delta_y = y_dat.minus(feval(func, t, p_try, settings)) // residual error using p_try + + // TODO + //if ~all(isfinite(delta_y)) // floating point error; break + // stop = 1; + // break + //end + + settings.func_calls += 1 + + val tmp = delta_y.times(weight) + var X2_try = delta_y.as2D().transpose().dot(tmp) // Chi-squared error criteria + + val alpha = 1.0 + if (Update_Type == 2) { // Quadratic + // One step of quadratic line update in the h direction for minimum X2 + +// TODO +// val alpha = JtWdy.transpose().dot(h) / ((X2_try.minus(X2)).div(2.0).plus(2 * JtWdy.transpose().dot(h))) +// alpha = JtWdy'*h / ( (X2_try - X2)/2 + 2*JtWdy'*h ) ; +// h = alpha * h; +// +// p_try = p + h(idx); % update only [idx] elements +// p_try = min(max(p_min,p_try),p_max); % apply constraints +// +// delta_y = y_dat - feval(func,t,p_try,c); % residual error using p_try +// func_calls = func_calls + 1; +// тX2_try = delta_y' * ( delta_y .* weight ); % Chi-squared error criteria + } + + val rho = when (Update_Type) { // Nielsen + 1 -> { + val tmp = h.transposed().dot(make_matrx_with_diagonal(diag(JtWJ)).div(1 / lambda).dot(h).plus(JtWdy)) + X2.minus(X2_try).as2D()[0, 0] / abs(tmp.as2D()).as2D()[0, 0] + } + else -> { + val tmp = h.transposed().dot(h.div(1 / lambda).plus(JtWdy)) + X2.minus(X2_try).as2D()[0, 0] / abs(tmp.as2D()).as2D()[0, 0] + } + } + + println() + println("rho = " + rho) + + if (rho > epsilon_4) { // it IS significantly better + val dX2 = X2.minus(X2_old) + X2_old = X2 + p_old = p.copyToTensor().as2D() + y_old = y_hat.copyToTensor().as2D() + p = make_column(p_try) // accept p_try + + lm_matx_ans = lm_matx(func, t, p_old, y_old, dX2.toInt(), J, p, y_dat, weight, dp, settings) + // decrease lambda ==> Gauss-Newton method + + JtWJ = lm_matx_ans[0] + JtWdy = lm_matx_ans[1] + X2 = lm_matx_ans[2][0, 0] + y_hat = lm_matx_ans[3] + J = lm_matx_ans[4] + + lambda = when (Update_Type) { + 1 -> { // Levenberg + max(lambda / lambda_DN_fac, 1e-7); + } + 2 -> { // Quadratic + max( lambda / (1 + alpha) , 1e-7 ); + } + else -> { // Nielsen + nu = 2 + lambda * max( 1.0 / 3, 1 - (2 * rho - 1).pow(3) ) + } + } + + // if (prnt > 2) { + // eval(plotcmd) + // } + } + else { // it IS NOT better + X2 = X2_old // do not accept p_try + if (settings.iteration % (2 * Npar) == 0 ) { // rank-1 update of Jacobian + lm_matx_ans = lm_matx(func, t, p_old, y_old,-1, J, p, y_dat, weight, dp, settings) + JtWJ = lm_matx_ans[0] + JtWdy = lm_matx_ans[1] + dX2 = lm_matx_ans[2][0, 0] + y_hat = lm_matx_ans[3] + J = lm_matx_ans[4] + } + + // increase lambda ==> gradient descent method + lambda = when (Update_Type) { + 1 -> { // Levenberg + min(lambda * lambda_UP_fac, 1e7) + } + 2 -> { // Quadratic + lambda + abs(((X2_try.as2D()[0, 0] - X2) / 2) / alpha) + } + else -> { // Nielsen + nu *= 2 + lambda * (nu / 2) + } + } + } + + if (prnt > 1) { + val chi_sq = X2 / DoF + println("Iteration $settings.iteration, func_calls $settings.func_calls | chi_sq=$chi_sq | lambda=$lambda") + print("param: ") + for (pn in 0 until Npar) { + print(p[pn, 0].toString() + " ") + } + print("\ndp/p: ") + for (pn in 0 until Npar) { + print((h.as2D()[pn, 0] / p[pn, 0]).toString() + " ") + } + result_chi_sq = chi_sq + } + + // update convergence history ... save _reduced_ Chi-square + // cvg_hst(iteration,:) = [ func_calls p' X2/DoF lambda ]; + + if (abs(JtWdy).max()!! < epsilon_1 && settings.iteration > 2) { + println(" **** Convergence in r.h.s. (\"JtWdy\") ****") + println(" **** epsilon_1 = $epsilon_1") + stop = true + } + if ((abs(h.as2D()).div(abs(p) + 1e-12)).max() < epsilon_2 && settings.iteration > 2) { + println(" **** Convergence in Parameters ****") + println(" **** epsilon_2 = $epsilon_2") + stop = true + } + if (X2 / DoF < epsilon_3 && settings.iteration > 2) { + println(" **** Convergence in reduced Chi-square **** ") + println(" **** epsilon_3 = $epsilon_3") + stop = true + } + if (settings.iteration == MaxIter) { + println(" !! Maximum Number of Iterations Reached Without Convergence !!") + stop = true + } + } // --- End of Main Loop + + // --- convergence achieved, find covariance and confidence intervals + + // ---- Error Analysis ---- + +// if (weight.shape.component1() == 1 || weight.variance() == 0.0) { +// weight = DoF / (delta_y.transpose().dot(delta_y)) * ones(intArrayOf(Npt, 1)) +// } + +// if (nargout > 1) { +// val redX2 = X2 / DoF +// } +// +// lm_matx_ans = lm_matx(func, t, p_old, y_old, -1, J, p, y_dat, weight, dp) +// JtWJ = lm_matx_ans[0] +// JtWdy = lm_matx_ans[1] +// X2 = lm_matx_ans[2][0, 0] +// y_hat = lm_matx_ans[3] +// J = lm_matx_ans[4] +// +// if (nargout > 2) { // standard error of parameters +// covar_p = inv(JtWJ); +// siif nagma_p = sqrt(diag(covar_p)); +// } +// +// if (nargout > 3) { // standard error of the fit +// /// sigma_y = sqrt(diag(J * covar_p * J')); // slower version of below +// sigma_y = zeros(Npnt,1); +// for i=1:Npnt +// sigma_y(i) = J(i,:) * covar_p * J(i,:)'; +// end +// sigma_y = sqrt(sigma_y); +// } +// +// if (nargout > 4) { // parameter correlation matrix +// corr_p = covar_p ./ [sigma_p*sigma_p']; +// } +// +// if (nargout > 5) { // coefficient of multiple determination +// R_sq = corr([y_dat y_hat]); +// R_sq = R_sq(1,2).^2; +// } +// +// if (nargout > 6) { // convergence history +// cvg_hst = cvg_hst(1:iteration,:); +// } + + return result_chi_sq + } } public val Double.Companion.tensorAlgebra: DoubleTensorAlgebra get() = DoubleTensorAlgebra diff --git a/kmath-tensors/src/commonTest/kotlin/space/kscience/kmath/tensors/core/TestDoubleTensorAlgebra.kt b/kmath-tensors/src/commonTest/kotlin/space/kscience/kmath/tensors/core/TestDoubleTensorAlgebra.kt index cae01bed8..2c3b3a231 100644 --- a/kmath-tensors/src/commonTest/kotlin/space/kscience/kmath/tensors/core/TestDoubleTensorAlgebra.kt +++ b/kmath-tensors/src/commonTest/kotlin/space/kscience/kmath/tensors/core/TestDoubleTensorAlgebra.kt @@ -6,11 +6,11 @@ package space.kscience.kmath.tensors.core -import space.kscience.kmath.nd.ShapeND -import space.kscience.kmath.nd.contentEquals -import space.kscience.kmath.nd.get +import space.kscience.kmath.nd.* import space.kscience.kmath.operations.invoke +import space.kscience.kmath.tensors.core.internal.LMSettings import space.kscience.kmath.testutils.assertBufferEquals +import kotlin.math.roundToInt import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertFalse @@ -207,4 +207,83 @@ internal class TestDoubleTensorAlgebra { assertTrue { ShapeND(5, 5) contentEquals res.shape } assertEquals(2.0, res[4, 4]) } + + @Test + fun testLM() = DoubleTensorAlgebra { + fun lm_func(t: MutableStructure2D, p: MutableStructure2D, settings: LMSettings): MutableStructure2D { + val m = t.shape.component1() + var y_hat = DoubleTensorAlgebra.zeros(ShapeND(intArrayOf(m, 1))) + + if (settings.example_number == 1) { + y_hat = DoubleTensorAlgebra.exp((t.times(-1.0 / p[1, 0]))).times(p[0, 0]) + t.times(p[2, 0]).times( + DoubleTensorAlgebra.exp((t.times(-1.0 / p[3, 0]))) + ) + } + else if (settings.example_number == 2) { + val mt = t.max() + y_hat = (t.times(1.0 / mt)).times(p[0, 0]) + + (t.times(1.0 / mt)).pow(2).times(p[1, 0]) + + (t.times(1.0 / mt)).pow(3).times(p[2, 0]) + + (t.times(1.0 / mt)).pow(4).times(p[3, 0]) + } + else if (settings.example_number == 3) { + y_hat = DoubleTensorAlgebra.exp((t.times(-1.0 / p[1, 0]))) + .times(p[0, 0]) + DoubleTensorAlgebra.sin((t.times(1.0 / p[3, 0]))).times(p[2, 0]) + } + + return y_hat.as2D() + } + + val lm_matx_y_dat = doubleArrayOf( + 19.6594, 18.6096, 17.6792, 17.2747, 16.3065, 17.1458, 16.0467, 16.7023, 15.7809, 15.9807, + 14.7620, 15.1128, 16.0973, 15.1934, 15.8636, 15.4763, 15.6860, 15.1895, 15.3495, 16.6054, + 16.2247, 15.9854, 16.1421, 17.0960, 16.7769, 17.1997, 17.2767, 17.5882, 17.5378, 16.7894, + 17.7648, 18.2512, 18.1581, 16.7037, 17.8475, 17.9081, 18.3067, 17.9632, 18.2817, 19.1427, + 18.8130, 18.5658, 18.0056, 18.4607, 18.5918, 18.2544, 18.3731, 18.7511, 19.3181, 17.3066, + 17.9632, 19.0513, 18.7528, 18.2928, 18.5967, 17.8567, 17.7859, 18.4016, 18.9423, 18.4959, + 17.8000, 18.4251, 17.7829, 17.4645, 17.5221, 17.3517, 17.4637, 17.7563, 16.8471, 17.4558, + 17.7447, 17.1487, 17.3183, 16.8312, 17.7551, 17.0942, 15.6093, 16.4163, 15.3755, 16.6725, + 16.2332, 16.2316, 16.2236, 16.5361, 15.3721, 15.3347, 15.5815, 15.6319, 14.4538, 14.6044, + 14.7665, 13.3718, 15.0587, 13.8320, 14.7873, 13.6824, 14.2579, 14.2154, 13.5818, 13.8157 + ) + + var example_number = 1 + val p_init = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(4, 1)), doubleArrayOf(5.0, 2.0, 0.2, 10.0) + ).as2D() + + var t = ones(ShapeND(intArrayOf(100, 1))).as2D() + for (i in 0 until 100) { + t[i, 0] = t[i, 0] * (i + 1) + } + + val y_dat = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(100, 1)), lm_matx_y_dat + ).as2D() + + val weight = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(1, 1)), DoubleArray(1) { 4.0 } + ).as2D() + + val dp = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(1, 1)), DoubleArray(1) { -0.01 } + ).as2D() + + val p_min = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(4, 1)), doubleArrayOf(-50.0, -20.0, -2.0, -100.0) + ).as2D() + + val p_max = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(4, 1)), doubleArrayOf(50.0, 20.0, 2.0, 100.0) + ).as2D() + + val consts = BroadcastDoubleTensorAlgebra.fromArray( + ShapeND(intArrayOf(1, 1)), doubleArrayOf(0.0) + ).as2D() + + val opts = doubleArrayOf(3.0, 100.0, 1e-3, 1e-3, 1e-1, 1e-1, 1e-2, 11.0, 9.0, 1.0) + + val chi_sq = lm(::lm_func, p_init, t, y_dat, weight, dp, p_min, p_max, consts, opts, 10, example_number) + assertEquals(0.9131, (chi_sq * 10000).roundToInt() / 10000.0) + } }