Example of Analytic Derivatives
考虑通过样本数据对如下问题 (Rat43 ) 进行曲线拟合
y = b 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 y=\frac{b_{1}}{\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} y = ( 1 + e b 2 − b 3 x ) 1/ b 4 b 1 给定一些样本数据 { x i , y i } , ∀ i = 1 , … , n \left\{x_{i}, y_{i}\right\}, \forall i=1, \ldots, n { x i , y i } , ∀ i = 1 , … , n ,估计参数 b 1 , b 2 , b 3 , b 4 b_{1}, b_{2}, b_{3},b_{4} b 1 , b 2 , b 3 , b 4 。可以建模成寻找使得如下目标函数取得最小值时, b 1 , b 2 , b 3 , b 4 b_{1}, b_{2}, b_{3},b_{4} b 1 , b 2 , b 3 , b 4 的值:
E ( b 1 , b 2 , b 3 , b 4 ) = ∑ i f 2 ( b 1 , b 2 , b 3 , b 4 ; x i , y i ) = ∑ i ( b 1 ( 1 + e b 2 − b 3 x i ) 1 / b 4 − y i ) 2 \begin{aligned} E\left(b_{1}, b_{2}, b_{3}, b_{4}\right) & =\sum_{i} f^{2}\left(b_{1}, b_{2}, b_{3}, b_{4} ; x_{i}, y_{i}\right) \\ & =\sum_{i}\left(\frac{b_{1}}{\left(1+e^{b_{2}-b_{3} x_{i}}\right)^{1 / b_{4}}}-y_{i}\right)^{2} \end{aligned} E ( b 1 , b 2 , b 3 , b 4 ) = i ∑ f 2 ( b 1 , b 2 , b 3 , b 4 ; x i , y i ) = i ∑ ( ( 1 + e b 2 − b 3 x i ) 1/ b 4 b 1 − y i ) 2 使用 Ceres 解决该问题,我们需要定义一个 costFunction,用于计算给定 x x x 和 y y y 的残差 f f f 及其相对于 b 1 、 b 2 、 b 3 b_{1}、b_{2}、b_{3} b 1 、 b 2 、 b 3 和 b 4 b_{4} b 4 的导数。
D 1 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 D 2 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = − b 1 e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 3 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 x e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 4 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 log ( 1 + e b 2 − b 3 x ) b 4 2 ( 1 + e b 2 − b 3 x ) 1 / b 4 \begin{array}{l} D_{1} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{1}{\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} \\ D_{2} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{-b_{1} e^{b_{2}-b_{3} x}}{b_{4}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}+1}} \\ D_{3} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{b_{1} x e^{b_{2}-b_{3} x}}{b_{4}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}+1}} \\ D_{4} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{b_{1} \log \left(1+e^{b_{2}-b_{3} x}\right)}{b_{4}^{2}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} \end{array} D 1 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = ( 1 + e b 2 − b 3 x ) 1/ b 4 1 D 2 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 ( 1 + e b 2 − b 3 x ) 1/ b 4 + 1 − b 1 e b 2 − b 3 x D 3 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 ( 1 + e b 2 − b 3 x ) 1/ b 4 + 1 b 1 x e b 2 − b 3 x D 4 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 2 ( 1 + e b 2 − b 3 x ) 1/ b 4 b 1 l o g ( 1 + e b 2 − b 3 x ) 有了这些导数,我们现在可以将 CostFunction 实现为:
Copy class Rat43Analytic : public SizedCostFunction<1,4> {
public:
Rat43Analytic(const double x, const double y) : x_(x), y_(y) {}
virtual ~Rat43Analytic() {}
virtual bool Evaluate(double const* const* parameters,
double* residuals,
double** jacobians) const {
const double b1 = parameters[0][0];
const double b2 = parameters[0][1];
const double b3 = parameters[0][2];
const double b4 = parameters[0][3];
residuals[0] = b1 * pow(1 + exp(b2 - b3 * x_), -1.0 / b4) - y_;
if (!jacobians) return true;
double* jacobian = jacobians[0];
if (!jacobian) return true;
jacobian[0] = pow(1 + exp(b2 - b3 * x_), -1.0 / b4);
jacobian[1] = -b1 * exp(b2 - b3 * x_) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
jacobian[2] = x_ * b1 * exp(b2 - b3 * x_) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
jacobian[3] = b1 * log(1 + exp(b2 - b3 * x_)) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4) / (b4 * b4);
return true;
}
private:
const double x_;
const double y_;
};
这些难以阅读并且有很多冗余。因此在实践中我们会用一些子表达式来提高其效率,这会给我们带来类似的结果:
Copy class Rat43AnalyticOptimized : public SizedCostFunction<1,4> {
public:
Rat43AnalyticOptimized(const double x, const double y) : x_(x), y_(y) {}
virtual ~Rat43AnalyticOptimized() {}
virtual bool Evaluate(double const* const* parameters,
double* residuals,
double** jacobians) const {
const double b1 = parameters[0][0];
const double b2 = parameters[0][1];
const double b3 = parameters[0][2];
const double b4 = parameters[0][3];
const double t1 = exp(b2 - b3 * x_);
const double t2 = 1 + t1;
const double t3 = pow(t2, -1.0 / b4);
residuals[0] = b1 * t3 - y_;
if (!jacobians) return true;
double* jacobian = jacobians[0];
if (!jacobian) return true;
const double t4 = pow(t2, -1.0 / b4 - 1);
jacobian[0] = t3;
jacobian[1] = -b1 * t1 * t4 / b4;
jacobian[2] = -x_ * jacobian[1];
jacobian[3] = b1 * log(t2) * t3 / (b4 * b4);
return true;
}
private:
const double x_;
const double y_;
};
两种实现的区别如下:
When should you use analytical derivatives?
性能是最先考虑的,可以利用计算中的代数结构来获得比自动微分更好的性能。尽管如此,要想从 analytical derivatives 中获得最佳性能,还需要做大量的工作。在采用这种方法之前,最好先估算一下求解 jacobian 所花费的时间占总求解时间的比例,使用 Amdahl’s Law 即可。