Example of Analytic Derivatives
考虑通过样本数据对如下问题 (Rat43 ) 进行曲线拟合
y = b 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 y=\frac{b_{1}}{\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} y = ( 1 + e b 2 − b 3 x ) 1/ b 4 b 1 给定一些样本数据 { x i , y i } , ∀ i = 1 , … , n \left\{x_{i}, y_{i}\right\}, \forall i=1, \ldots, n { x i , y i } , ∀ i = 1 , … , n ,估计参数 b 1 , b 2 , b 3 , b 4 b_{1}, b_{2}, b_{3},b_{4} b 1 , b 2 , b 3 , b 4 。可以建模成寻找使得如下目标函数取得最小值时, b 1 , b 2 , b 3 , b 4 b_{1}, b_{2}, b_{3},b_{4} b 1 , b 2 , b 3 , b 4 的值:
E ( b 1 , b 2 , b 3 , b 4 ) = ∑ i f 2 ( b 1 , b 2 , b 3 , b 4 ; x i , y i ) = ∑ i ( b 1 ( 1 + e b 2 − b 3 x i ) 1 / b 4 − y i ) 2 \begin{aligned} E\left(b_{1}, b_{2}, b_{3}, b_{4}\right) & =\sum_{i} f^{2}\left(b_{1}, b_{2}, b_{3}, b_{4} ; x_{i}, y_{i}\right) \\ & =\sum_{i}\left(\frac{b_{1}}{\left(1+e^{b_{2}-b_{3} x_{i}}\right)^{1 / b_{4}}}-y_{i}\right)^{2} \end{aligned} E ( b 1 , b 2 , b 3 , b 4 ) = i ∑ f 2 ( b 1 , b 2 , b 3 , b 4 ; x i , y i ) = i ∑ ( ( 1 + e b 2 − b 3 x i ) 1/ b 4 b 1 − y i ) 2 使用 Ceres 解决该问题,我们需要定义一个 costFunction,用于计算给定 x x x 和 y y y 的残差 f f f 及其相对于 b 1 、 b 2 、 b 3 b_{1}、b_{2}、b_{3} b 1 、 b 2 、 b 3 和 b 4 b_{4} b 4 的导数。
D 1 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 D 2 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = − b 1 e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 3 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 x e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 4 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 log ( 1 + e b 2 − b 3 x ) b 4 2 ( 1 + e b 2 − b 3 x ) 1 / b 4 \begin{array}{l} D_{1} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{1}{\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} \\ D_{2} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{-b_{1} e^{b_{2}-b_{3} x}}{b_{4}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}+1}} \\ D_{3} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{b_{1} x e^{b_{2}-b_{3} x}}{b_{4}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}+1}} \\ D_{4} f\left(b_{1}, b_{2}, b_{3}, b_{4} ; x, y\right)=\frac{b_{1} \log \left(1+e^{b_{2}-b_{3} x}\right)}{b_{4}^{2}\left(1+e^{b_{2}-b_{3} x}\right)^{1 / b_{4}}} \end{array} D 1 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = ( 1 + e b 2 − b 3 x ) 1/ b 4 1 D 2 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 ( 1 + e b 2 − b 3 x ) 1/ b 4 + 1 − b 1 e b 2 − b 3 x D 3 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 ( 1 + e b 2 − b 3 x ) 1/ b 4 + 1 b 1 x e b 2 − b 3 x D 4 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 4 2 ( 1 + e b 2 − b 3 x ) 1/ b 4 b 1 l o g ( 1 + e b 2 − b 3 x ) 有了这些导数,我们现在可以将 CostFunction 实现为:
Copy class Rat43Analytic : public SizedCostFunction<1,4> {
public:
Rat43Analytic(const double x, const double y) : x_(x), y_(y) {}
virtual ~Rat43Analytic() {}
virtual bool Evaluate(double const* const* parameters,
double* residuals,
double** jacobians) const {
const double b1 = parameters[0][0];
const double b2 = parameters[0][1];
const double b3 = parameters[0][2];
const double b4 = parameters[0][3];
residuals[0] = b1 * pow(1 + exp(b2 - b3 * x_), -1.0 / b4) - y_;
if (!jacobians) return true;
double* jacobian = jacobians[0];
if (!jacobian) return true;
jacobian[0] = pow(1 + exp(b2 - b3 * x_), -1.0 / b4);
jacobian[1] = -b1 * exp(b2 - b3 * x_) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
jacobian[2] = x_ * b1 * exp(b2 - b3 * x_) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
jacobian[3] = b1 * log(1 + exp(b2 - b3 * x_)) *
pow(1 + exp(b2 - b3 * x_), -1.0 / b4) / (b4 * b4);
return true;
}
private:
const double x_;
const double y_;
};
这些难以阅读并且有很多冗余。因此在实践中我们会用一些子表达式来提高其效率,这会给我们带来类似的结果:
Copy class Rat43AnalyticOptimized : public SizedCostFunction<1,4> {
public:
Rat43AnalyticOptimized(const double x, const double y) : x_(x), y_(y) {}
virtual ~Rat43AnalyticOptimized() {}
virtual bool Evaluate(double const* const* parameters,
double* residuals,
double** jacobians) const {
const double b1 = parameters[0][0];
const double b2 = parameters[0][1];
const double b3 = parameters[0][2];
const double b4 = parameters[0][3];
const double t1 = exp(b2 - b3 * x_);
const double t2 = 1 + t1;
const double t3 = pow(t2, -1.0 / b4);
residuals[0] = b1 * t3 - y_;
if (!jacobians) return true;
double* jacobian = jacobians[0];
if (!jacobian) return true;
const double t4 = pow(t2, -1.0 / b4 - 1);
jacobian[0] = t3;
jacobian[1] = -b1 * t1 * t4 / b4;
jacobian[2] = -x_ * jacobian[1];
jacobian[3] = b1 * log(t2) * t3 / (b4 * b4);
return true;
}
private:
const double x_;
const double y_;
};
两种实现的区别如下:
When should you use analytical derivatives?
性能是最先考虑的,可以利用计算中的代数结构来获得比自动微分更好的性能。尽管如此,要想从 analytical derivatives 中获得最佳性能,还需要做大量的工作。在采用这种方法之前,最好先估算一下求解 jacobian 所花费的时间占总求解时间的比例,使用 Amdahl’s Law 即可。
There is no other way to compute the derivatives, e.g. you wish to compute the derivative of the root of a polynomial, a 3 ( x , y ) z 3 + a 2 ( x , y ) z 2 + a 1 ( x , y ) z + a 0 ( x , y ) = 0 a_{3}(x, y) z^{3}+a_{2}(x, y) z^{2}+a_{1}(x, y) z+a_{0}(x, y)=0 a 3 ( x , y ) z 3 + a 2 ( x , y ) z 2 + a 1 ( x , y ) z + a 0 ( x , y ) = 0 , with respect to x , y x,y x , y , This requires the use of the Inverse Function Theorem .
Last updated 10 months ago