4. 实例
从上述求解过程可以看到,梯度下降法其实比之前文章中介绍的Gauss-Newton方法要简单很多,那么这里还是给出一个只使用Eigen实现梯度下降法求解非线性最小二乘问题的例子。例子中模型函数为
f
(
x
;
θ
)
=
a
e
b
x
:
#include <Eigen/Dense>
#include <cmath>
#include <iostream>
#include <random>
#include <vector>
using namespace std;
using namespace Eigen;
// 模型函数: y = a * exp(b * x)
double model(double x, const Vector2d& theta) {
double a = theta(0);
double b = theta(1);
return a * exp(b * x);
}
// 计算残差: r_i = y_i - f(x_i; a, b)
VectorXd computeResiduals(const vector<double>& x_data,
const vector<double>& y_data, const Vector2d& theta) {
int N = x_data.size();
VectorXd r(N);
for (int i = 0; i < N; ++i) {
r(i) = y_data[i] - model(x_data[i], theta);
}
return r;
}
// 计算 Jacobian 矩阵 (N x 2): ∂r_i/∂a, ∂r_i/∂b
MatrixXd computeJacobian(const vector<double>& x_data, const Vector2d& theta) {
int N = x_data.size();
MatrixXd J(N, 2);
double a = theta(0);
double b = theta(1);
for (int i = 0; i < N; ++i) {
double x = x_data[i];
double exp_bx = exp(b * x); // exp(b*x)
J(i, 0) = -exp_bx; // ∂r/∂a = -exp(b*x)
J(i, 1) = -a * exp_bx * x; // ∂r/∂b = -a * exp(b*x) * x
}
return J;
}
int main() {
// ========================
// 1. 真实参数
// ========================
Vector2d true_params;
true_params << 2.0, -0.3; // a=2.0, b=-0.3 → y = 2 * exp(-0.3 * x)
cout << "真实参数: a = " << true_params(0) << ", b = " << true_params(1)
<< endl;
// ========================
// 2. 生成带噪声的数据
// ========================
int N = 20;
vector<double> x_data(N), y_data(N);
random_device rd;
mt19937 gen(rd());
normal_distribution<double> noise(0.0, 0.05); // 小噪声
for (int i = 0; i < N; ++i) {
x_data[i] = -2.0 + i * 0.4; // x 从 -2 到 6
double y_true = model(x_data[i], true_params);
y_data[i] = y_true + noise(gen);
}
// ========================
// 3. 初始化参数
// ========================
Vector2d theta;
theta << 1.0, 0.0; // 初始猜测: a=1.0, b=0.0
cout << "初始猜测: a = " << theta(0) << ", b = " << theta(1) << endl;
// ========================
// 4. 梯度下降法
// ========================
int max_iter = 500;
double alpha = 5e-3; // 学习率
double tol = 1e-6;
cout << "\n开始梯度下降...\n";
cout << "迭代\t残差平方和\t\t参数 a\t\t参数 b\n";
cout << "----\t----------\t\t------\t\t------\n";
for (int iter = 0; iter < max_iter; ++iter) {
// 计算残差
VectorXd r = computeResiduals(x_data, y_data, theta);
double cost = r.squaredNorm();
// 计算梯度
MatrixXd J = computeJacobian(x_data, theta);
Vector2d gradient = 2.0 * J.transpose() * r;
// 打印当前状态(每10次)
if (iter % 10 == 0) {
cout << iter << "\t" << cost << "\t\t" << theta(0) << "\t\t" << theta(1)
<< endl;
}
// 终止条件
if (gradient.norm() < tol) {
cout << "收敛!梯度范数: " << gradient.norm() << endl;
break;
}
// 更新参数
theta -= alpha * gradient;
}
// ========================
// 5. 输出结果
// ========================
cout << "\n--- 拟合完成 ---" << endl;
cout << "估计参数: a = " << theta(0) << ", b = " << theta(1) << endl;
cout << "真实参数: a = " << true_params(0) << ", b = " << true_params(1)
<< endl;
return 0;
}
运行结果如下:
真实参数: a = 2, b = -0.3
初始猜测: a = 1, b = 0
开始梯度下降...
迭代 残差平方和 参数 a 参数 b
---- ---------- ------ ------
0 22.7591 1 0
10 1.11435 1.72284 -0.345
20 0.100641 1.93634 -0.301778
30 0.0326195 1.99193 -0.294493
40 0.0286004 2.00545 -0.292882
50 0.0283681 2.0087 -0.292503
60 0.0283548 2.00948 -0.292413
70 0.028354 2.00967 -0.292391
80 0.0283539 2.00971 -0.292386
90 0.0283539 2.00972 -0.292385
100 0.0283539 2.00972 -0.292384
110 0.0283539 2.00973 -0.292384
120 0.0283539 2.00973 -0.292384
收敛!梯度范数: 9.36104e-07
--- 拟合完成 ---
估计参数: a = 2.00973, b = -0.292384
真实参数: a = 2, b = -0.3
求解的关键还是在于计算雅可比矩阵,对于问题模型函数
f
(
x
;
θ
)
=
a
e
b
x
来说,雅可比矩阵应该是:
J
(
θ
)
=
⎡
⎢
⎢
⎢
⎢
⎢
⎢
⎢
⎢
⎣
∂
(
y
1
−
a
e
b
x
1
)
∂
a
∂
(
y
1
−
a
e
b
x
1
)
∂
b
∂
(
y
2
−
a
e
b
x
2
)
∂
a
∂
(
y
2
−
a
e
b
x
2
)
∂
b
⋮
⋮
∂
(
y
m
−
a
e
b
x
m
)
∂
a
∂
(
y
m
−
a
e
b
x
m
)
∂
b
⎤
⎥
⎥
⎥
⎥
⎥
⎥
⎥
⎥
⎦
=
−
⎡
⎢
⎢
⎢
⎢
⎢
⎣
e
b
x
1
a
e
b
x
1
x
1
e
b
x
2
a
e
b
x
2
x
2
⋮
⋮
e
b
x
m
a
e
b
x
m
x
m
⎤
⎥
⎥
⎥
⎥
⎥
⎦
对比代码中的实现:
// 计算 Jacobian 矩阵 (N x 2): ∂r_i/∂a, ∂r_i/∂b
MatrixXd computeJacobian(const vector<double>& x_data, const Vector2d& theta) {
int N = x_data.size();
MatrixXd J(N, 2);
double a = theta(0);
double b = theta(1);
for (int i = 0; i < N; ++i) {
double x = x_data[i];
double exp_bx = exp(b * x); // exp(b*x)
J(i, 0) = -exp_bx; // ∂r/∂a = -exp(b*x)
J(i, 1) = -a * exp_bx * x; // ∂r/∂b = -a * exp(b*x) * x
}
return J;
}
另外,除了迭代过程中的初始条件和迭代停止条件,控制步长的学习率也需要注意。设置的学习率过小,迭代次数就会很长导致收敛很慢;而设置的学习率过大,就容易