zl程序教程

您现在的位置是:首页 >  后端

当前栏目

rgbd-slam_slam算法详解

算法 详解 SLAM rgbd
2023-06-13 09:13:38 时间

大家好,又见面了,我是你们的朋友全栈君。

知识点总结:

关于rgbdslam

算法输入:last frame 的深度图和rgb图 和current frame rgb图

核心算法: solve pnp

算法输出: last frame到current frame姿态变化

代码实现

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import numpy as np
import time
import math
import cv2
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
rgb_paths=[]
depth_paths=[]
ground_datas=[]
Camera_fx=517.306408
Camera_fy= 516.469215
Camera_cx= 318.643040
Camera_cy= 255.313989
Camera_depth_scale=5000.000000
Camera_k1= 0.262383
Camera_k2= -0.953104
Camera_p1= -0.005358
Camera_p2= 0.002628
Camera_k3= 1.163314
def d_main():
fig = plt.figure()
#ax1 = plt.axes(projection='3d')
ax1 = plt.gca(projection='3d')
ts_in_world=[]
#@1  load rgbdimage
image_dir="/home/tum/rgbd_dataset_freiburg1_desk/";
# open txt and
filename =image_dir+"associate_with_ground.txt";
with open(filename, 'r') as file_to_read:
while True:
lines=file_to_read.readline()
if not lines:
break
pass
timestamp_rgb,rgb_path,timestamp_depth,depth_path,timestamp_ground,g_tx,g_ty,g_tz,g_qx,g_qy,g_qz,g_qw=[i for i in lines.split()]
rgb_paths.append(rgb_path)
g_data=np.array([g_tx,g_ty,g_tz,g_qw,g_qx,g_qy,g_qz],dtype=np.double)
ground_datas.append(g_data)
depth_paths.append(depth_path)
pass
pass
pass
print len(rgb_paths)
print rgb_paths[0]
# 定义当前帧和上一个帧
orb = cv2.ORB_create(500)
last_rgb=cv2.imread(image_dir+rgb_paths[0],0)
last_depth=cv2.imread(image_dir+depth_paths[0],-1)
bf = cv2.BFMatcher(cv2.NORM_HAMMING)
#need to do 图像金字塔
#@2  load rgbdimage
t_in_world=np.array([0.0,0.0,0.0]).reshape(3,1)
for i in range(1,len(rgb_paths)):
#对上一帧提取orb描述子
kp1, des1 = orb.detectAndCompute(last_rgb,None)
im_path=image_dir+rgb_paths[i]
im_rgb=cv2.imread(im_path,0)
im_path=image_dir+depth_paths[i]
im_depth=cv2.imread(im_path,-1)
kp2, des2 = orb.detectAndCompute(im_rgb,None)
matches = bf.knnMatch(des1, trainDescriptors = des2, k = 2)
#good = [m for (m,n) in matches if m.distance < 0.55*n.distance]
good = []
last_mkpId=[]
im_mkpId=[]
for m,n in matches:
if m.distance < 0.55*n.distance:
#print "m.queryIdx"+str(m.queryIdx) # 上一帧图像的id
#print "################"
#print "m.imgIdx"+str(m.imgIdx)  #一直都是0
#print "################"
#print "m.trainIdx"+str(m.trainIdx) #当前帧图像的id
#print "################"
good.append([m])
last_mkpId.append(m.queryIdx)
im_mkpId.append(m.trainIdx)
#遍历last_mkpid和im_mkpid,并且将对应kps重新构建组存储起来
#求解出每个kp的3d坐标
#对上一帧进行求解3d点的位置
last_pts_in_camera=[]
last_uv_in_camera=[]
im_pts_in_camera=[]
im_uv_in_camera=[]
#得到last 和 im的3d点和对应的uv,来估计相机的姿态
for i in range(len(last_mkpId)):
uv_last=kp1[last_mkpId[i]].pt# 获得特征点的像素坐标,需要转化成3d坐标
uv_im=kp2[im_mkpId[i]].pt
uv_last_ar=np.array([0,0])
uv_im_ar=np.array([0,0])
last_pt_in_camera=np.array([0.0,0.0,0.0])
im_pt_in_camera=np.array([0.0,0.0,0.0])
uv_last_ar[0]=int(uv_last[0])
uv_last_ar[1]=int(uv_last[1])
uv_im_ar[0]=int(uv_im[0])
uv_im_ar[1]=int(uv_im[1])
# need to do orb特征没有均匀化
#提取深度
depth_last=last_depth.item(uv_last_ar[1],uv_last_ar[0])
depth_im=im_depth.item(uv_im_ar[1],uv_im_ar[0])
if depth_im ==0 or  depth_last ==0:
continue
#add last
rgb_depth=depth_last/Camera_depth_scale
rgb_x=(uv_last_ar[0]-Camera_cx)*rgb_depth/Camera_fx
rgb_y=(uv_last_ar[1]-Camera_cy)*rgb_depth/Camera_fy
last_pt_in_camera[0]=rgb_x
last_pt_in_camera[1]=rgb_y
last_pt_in_camera[2]=rgb_depth
last_uv_in_camera.append(uv_last_ar)
last_pts_in_camera.append(last_pt_in_camera)
#add im
rgb_depth=depth_im/Camera_depth_scale
rgb_x=(uv_im_ar[0]-Camera_cx)*rgb_depth/Camera_fx
rgb_y=(uv_im_ar[1]-Camera_cy)*rgb_depth/Camera_fy
im_pt_in_camera[0]=rgb_x
im_pt_in_camera[1]=rgb_y
im_pt_in_camera[2]=rgb_depth
im_uv_in_camera.append(uv_im_ar)
im_pts_in_camera.append(im_pt_in_camera)
#转化成为np.array
last_uv_in_camera_ar=np.array(last_uv_in_camera,dtype=np.double).reshape(len(last_uv_in_camera),2)
last_pts_in_camera_ar=np.array(last_pts_in_camera,dtype=np.double).reshape(len(last_uv_in_camera),3)
im_pts_in_camera_ar=np.array(im_pts_in_camera,dtype=np.double).reshape(len(last_uv_in_camera),3)
im_uv_in_camera_ar=np.array(im_uv_in_camera,dtype=np.double).reshape(len(last_uv_in_camera),2)
camera_matrix=np.array(([Camera_fx, 0.0, Camera_cx],
[0.0, Camera_fy, Camera_cy],
[0.0, 0.0, 1.0]))
dist_coeffs = np.array([Camera_k1,Camera_k2,Camera_p1,Camera_p2,Camera_k3]).reshape(5,1)  # Assuming no lens distortion
#求解2d-2d 通过匹配到的特征点恢复出相机的姿态,对极几何
#get基础矩阵 ->如何恢复出rt呢?
fundamental_matrix, mask=cv2.findFundamentalMat(last_uv_in_camera_ar,im_uv_in_camera_ar)
#print fundamental_matrix
#get 本质矩阵
essential_matrix,mask=cv2.findEssentialMat(last_uv_in_camera_ar,im_uv_in_camera_ar,camera_matrix)
#print essential_matrix
points, RR, t_in_camera, mask =cv2.recoverPose(essential_matrix,last_uv_in_camera_ar,im_uv_in_camera_ar)
#通过求解pnp
found, rvec, tvec = cv2.solvePnP(last_pts_in_camera_ar, im_uv_in_camera_ar, camera_matrix,dist_coeffs)
rotM = cv2.Rodrigues(rvec)[0]
t_in_world=t_in_world+tvec#利用pnp求解的姿态
t_in_world=t_in_world+tvec
ts_in_world.append(t_in_world)
#可视化
#cv2.drawMatchesKnn()使用的点对集good是一维的,(N,1);画出good中前几个点对连线
#good = np.expand_dims(good,1)
#print len(good)
im_rgb_kp = cv2.drawKeypoints(last_rgb, kp1, None, color=(0,255,0), flags=0)
#cv2.imshow("1",im_rgb_kp )
#cv2.waitKey(10)
im_rgb_kp = cv2.drawKeypoints(im_rgb, kp2, None, color=(0,255,0), flags=0)
#cv2.imshow("2",im_rgb_kp )
#cv2.waitKey(10)
#img_match=cv2.drawMatchesKnn(last_rgb, kp1, im_rgb, kp2, good, None, flags=2)
#cv2.imshow("3",img_match)
#cv2.waitKey(1)
#update
last_rgb=im_rgb
last_depth=im_depth
# plot_ts_in_world=np.array(ts_in_world).reshape(len(ts_in_world),3)
# plot_gt_in_word=np.array(ground_datas).reshape(len(ground_datas),7)
# print plot_gt_in_word
# ax1.plot(plot_ts_in_world[:,0],plot_ts_in_world[:,1],plot_ts_in_world[:,2],'red') 
# ax1.plot(plot_ts_in_world[:,0],plot_ts_in_world[:,1],plot_ts_in_world[:,2],'ob',markersize=2)
# ax1.plot(plot_gt_in_word[:,0],plot_gt_in_word[:,1],plot_gt_in_word[:,2],'og',markersize=4)
# plt.pause(0.1)
#进行绘图
plot_ts_in_world=np.array(ts_in_world).reshape(len(ts_in_world),3)
plot_gt_in_word=np.array(ground_datas).reshape(len(ground_datas),7)
print plot_gt_in_word
ax1.plot(plot_ts_in_world[:,0],plot_ts_in_world[:,1],plot_ts_in_world[:,2],'red') 
ax1.plot(plot_ts_in_world[:,0],plot_ts_in_world[:,1],plot_ts_in_world[:,2],'ob',markersize=2)
ax1.plot(plot_gt_in_word[:,0],plot_gt_in_word[:,1],plot_gt_in_word[:,2],'og',markersize=4)
plt.show()
if __name__ == '__main__':   
d_main()
cv2.destroyAllWindows()
pass

使用pnp的效果

3d-2d通过特征匹配求解pnp恢复相机运动

对比如果仅仅使用2d-2d通过特征匹配求解出本质矩阵的效果

2d-2d通过特征匹配求解本质矩阵恢复相机运动

总结:两种方法在求解相机姿态的过程中,求解出的r,t都存在突变值,因此效果都不好,其中pnp优于求解本质矩阵,在查阅相关资料的过程中,发现了gx写的rgbd-slam,可以借鉴当中normofTransform函数,即:Δt+min(2π−r,r)作为限制条件来减少突值

关于rgbd-slam,由于原始的代码版本基于opencv2.4.x系列,目前修改到了基于opencv3.x系列,链接如下:

代码仓库:https://gitee.com/davidhan008/rgbd-slam

相关解释:http://blog.sina.com.cn/s/blog_161aed33e0102ymkm.html

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/192389.html原文链接:https://javaforall.cn