視覺十四講:第八講_直接法
阿新 • • 發佈:2020-08-05
1.直接法的推導:
考慮某個空間點P和兩個時刻的相機,P的世界座標為[X,Y,Z],它在兩個相機上成像,記非齊次畫素座標為\(p_{1},p_{2}\),目標是求第一個相機到第二個相機的相對位姿變換。
直接法的思路是根據當前相機的位姿估計值,來尋找\(p_{2}\)的位置,誤差項為光度誤差
\(e = I_{1}(p_{1}) - I_{2}(p_{2})\)
考慮多個空間點P,那個整個相機位姿估計問題會變成:
主要問題是求誤差e相對於位姿的導數關係。使用李代數的擾動模型,給\(exp(\xi)\)左乘一個小擾動\(exp(\delta \xi)\)得:
記:\(q = \delta \xi^{\Lambda} exp(\xi^{\Lambda}) P\)
\(u = \frac{1}{Z_{2}}Kq\)
q為P在擾動之後,位於第二個相機座標系下的座標,而u為它的畫素座標,利用一階泰勒展開,有:
故誤差相對於李代數的雅可比矩陣為:
2.直接法分類:
1.稀疏關鍵點:通常使用數百個至上千個關鍵點,像LK光流那樣,這種稀疏直接法不必計算描述子,因此速度最快,但只能計算稀疏的重構。
2.半稠密直接法:考慮影象內帶有梯度的畫素點,捨棄畫素梯度不明顯的地方,重構一個半稠密結構。
3.稠密直接法:考慮所有畫素點,需要GPU加速。
3.實踐直接法:
1.單層稀疏直接法:
class JacobianAccumulator { public: JacobianAccumulator( const cv::Mat &img1_, const cv::Mat &img2_, const VecVector2d &px_ref_, const vector<double> depth_ref_, Sophus::SE3d &T21_) : img1(img1_), img2(img2_), px_ref(px_ref_), depth_ref(depth_ref_), T21(T21_) { projection = VecVector2d(px_ref.size(), Eigen::Vector2d(0, 0)); } /// accumulate jacobians in a range void accumulate_jacobian(const cv::Range &range); /// get hessian matrix Matrix6d hessian() const { return H; } /// get bias Vector6d bias() const { return b; } /// get total cost double cost_func() const { return cost; } /// get projected points VecVector2d projected_points() const { return projection; } /// reset h, b, cost to zero void reset() { H = Matrix6d::Zero(); b = Vector6d::Zero(); cost = 0; } private: const cv::Mat &img1; const cv::Mat &img2; const VecVector2d &px_ref; const vector<double> depth_ref; Sophus::SE3d &T21; VecVector2d projection; // projected points std::mutex hessian_mutex; Matrix6d H = Matrix6d::Zero(); Vector6d b = Vector6d::Zero(); double cost = 0; }; void DirectPoseEstimationSingleLayer( const cv::Mat &img1, const cv::Mat &img2, const VecVector2d &px_ref, const vector<double> depth_ref, Sophus::SE3d &T21) { const int iterations = 10; double cost = 0, lastCost = 0; auto t1 = chrono::steady_clock::now(); JacobianAccumulator jaco_accu(img1, img2, px_ref, depth_ref, T21); for (int iter = 0; iter < iterations; iter++) { //10次迭代次數 jaco_accu.reset(); cv::parallel_for_(cv::Range(0, px_ref.size()), //迴圈2000個點 std::bind(&JacobianAccumulator::accumulate_jacobian, &jaco_accu, std::placeholders::_1)); Matrix6d H = jaco_accu.hessian(); Vector6d b = jaco_accu.bias(); //10次迭代獲取更新值 Vector6d update = H.ldlt().solve(b); //更新姿態 T21 = Sophus::SE3d::exp(update) * T21; cost = jaco_accu.cost_func(); if (std::isnan(update[0])) { // sometimes occurred when we have a black or white patch and H is irreversible cout << "update is nan" << endl; break; } if (iter > 0 && cost > lastCost) { cout << "cost increased: " << cost << ", " << lastCost << endl; break; } if (update.norm() < 1e-3) { // converge break; } lastCost = cost; cout << "iteration: " << iter << ", cost: " << cost << endl; } cout << "T21 = \n" << T21.matrix() << endl; auto t2 = chrono::steady_clock::now(); auto time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1); cout << "direct method for single layer: " << time_used.count() << endl; // plot the projected pixels here cv::Mat img2_show; cv::cvtColor(img2, img2_show, CV_GRAY2BGR); VecVector2d projection = jaco_accu.projected_points(); for (size_t i = 0; i < px_ref.size(); ++i) { auto p_ref = px_ref[i]; auto p_cur = projection[i]; if (p_cur[0] > 0 && p_cur[1] > 0) { cv::circle(img2_show, cv::Point2f(p_cur[0], p_cur[1]), 2, cv::Scalar(0, 250, 0), 2); cv::line(img2_show, cv::Point2f(p_ref[0], p_ref[1]), cv::Point2f(p_cur[0], p_cur[1]), cv::Scalar(0, 250, 0)); } } cv::imshow("current", img2_show); cv::waitKey(); } void JacobianAccumulator::accumulate_jacobian(const cv::Range &range) { // parameters const int half_patch_size = 1; //區塊大小 int cnt_good = 0; Matrix6d hessian = Matrix6d::Zero(); Vector6d bias = Vector6d::Zero(); double cost_tmp = 0; for (size_t i = range.start; i < range.end; i++) { //第一張圖的畫素座標轉相機歸一化座標,乘深度轉為相機3D點 Eigen::Vector3d point_ref = depth_ref[i] * Eigen::Vector3d((px_ref[i][0] - cx) / fx, (px_ref[i][1] - cy) / fy, 1); //將第一張圖的點根據估計的位姿,轉到第二張圖的座標系下 Eigen::Vector3d point_cur = T21 * point_ref; if (point_cur[2] < 0) // depth invalid continue; //將相機座標系轉為畫素座標系 float u = fx * point_cur[0] / point_cur[2] + cx, v = fy * point_cur[1] / point_cur[2] + cy; //去除邊界點 if (u < half_patch_size || u > img2.cols - half_patch_size || v < half_patch_size || v > img2.rows - half_patch_size) continue; projection[i] = Eigen::Vector2d(u, v); double X = point_cur[0], Y = point_cur[1], Z = point_cur[2], //深度平方,用於雅可比矩陣的計算 Z2 = Z * Z, Z_inv = 1.0 / Z, Z2_inv = Z_inv * Z_inv; cnt_good++; //2個for迴圈是為了計算 for (int x = -half_patch_size; x <= half_patch_size; x++) for (int y = -half_patch_size; y <= half_patch_size; y++) { //重投影光度誤差, double error = GetPixelValue(img1, px_ref[i][0] + x, px_ref[i][1] + y) - GetPixelValue(img2, u + x, v + y); Matrix26d J_pixel_xi; Eigen::Vector2d J_img_pixel; //計算李代數雅可比矩陣 J_pixel_xi(0, 0) = fx * Z_inv; J_pixel_xi(0, 1) = 0; J_pixel_xi(0, 2) = -fx * X * Z2_inv; J_pixel_xi(0, 3) = -fx * X * Y * Z2_inv; J_pixel_xi(0, 4) = fx + fx * X * X * Z2_inv; J_pixel_xi(0, 5) = -fx * Y * Z_inv; J_pixel_xi(1, 0) = 0; J_pixel_xi(1, 1) = fy * Z_inv; J_pixel_xi(1, 2) = -fy * Y * Z2_inv; J_pixel_xi(1, 3) = -fy - fy * Y * Y * Z2_inv; J_pixel_xi(1, 4) = fy * X * Y * Z2_inv; J_pixel_xi(1, 5) = fy * X * Z_inv; //影象梯度 J_img_pixel = Eigen::Vector2d( 0.5 * (GetPixelValue(img2, u + 1 + x, v + y) - GetPixelValue(img2, u - 1 + x, v + y)), 0.5 * (GetPixelValue(img2, u + x, v + 1 + y) - GetPixelValue(img2, u + x, v - 1 + y)) ); // 計算誤差對於李代數的雅可比矩陣 Vector6d J = -1.0 * (J_img_pixel.transpose() * J_pixel_xi).transpose(); hessian += J * J.transpose(); bias += -error * J; cost_tmp += error * error; } } if (cnt_good) { // set hessian, bias and cost unique_lock<mutex> lck(hessian_mutex); //多執行緒互斥量,自動釋放 H += hessian; b += bias; cost += cost_tmp / cnt_good; } }
2.多層稀疏直接法:
類似於光流,把單層直接法擴充套件到金字塔式的多層直接法上,用Coarse-to-fine的過程計算相對運動。
void DirectPoseEstimationMultiLayer( const cv::Mat &img1, const cv::Mat &img2, const VecVector2d &px_ref, const vector<double> depth_ref, Sophus::SE3d &T21) { // parameters int pyramids = 4; double pyramid_scale = 0.5; double scales[] = {1.0, 0.5, 0.25, 0.125}; // create pyramids vector<cv::Mat> pyr1, pyr2; // image pyramids for (int i = 0; i < pyramids; i++) { if (i == 0) { pyr1.push_back(img1); pyr2.push_back(img2); } else { cv::Mat img1_pyr, img2_pyr; cv::resize(pyr1[i - 1], img1_pyr, cv::Size(pyr1[i - 1].cols * pyramid_scale, pyr1[i - 1].rows * pyramid_scale)); cv::resize(pyr2[i - 1], img2_pyr, cv::Size(pyr2[i - 1].cols * pyramid_scale, pyr2[i - 1].rows * pyramid_scale)); pyr1.push_back(img1_pyr); pyr2.push_back(img2_pyr); } } double fxG = fx, fyG = fy, cxG = cx, cyG = cy; // backup the old values for (int level = pyramids - 1; level >= 0; level--) { VecVector2d px_ref_pyr; // set the keypoints in this pyramid level for (auto &px: px_ref) { px_ref_pyr.push_back(scales[level] * px); } // scale fx, fy, cx, cy in different pyramid levels fx = fxG * scales[level]; fy = fyG * scales[level]; cx = cxG * scales[level]; cy = cyG * scales[level]; DirectPoseEstimationSingleLayer(pyr1[level], pyr2[level], px_ref_pyr, depth_ref, T21); } }