iT邦幫忙

2024 iThome 鐵人賽

DAY 14
0

有了對應點之後,我們就可以來計算相機的姿態了。背後的原理是利用相機投影的幾何關係,解出相機的旋轉矩陣和平移向量,這些理論的部分我們會在後面利用幾篇文章來細說。本文會直接示範利用 OpenCV 的函數來展示,主要就是以下兩個函式:findEssentialMatrecoverPose

首先,我們要先使用 TUM-RGBD 提供的相機參數,加到原先 Dataset 的 class 中:

class TUMRGBD:
    ...

    def intrinsic_matrix(self):
        # Check: https://cvg.cit.tum.de/data/datasets/rgbd-dataset/file_formats#intrinsic_camera_calibration_of_the_kinect
        fx = 535.4
        fy = 539.2
        cx = 320.1
        cy = 247.6
        return np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])

接下來就是延續前一篇的程式碼:

def main():
    dataset = TUMRGBD("data/rgbd_dataset_freiburg2_desk")

    frames = []    
    # Get the first two valid frames
    for i in range(0, len(dataset), 100):
        x = dataset[i]
        if x is None:
            continue
        frames.append(x)
        if len(frames) == 2:
            break
        
    rgb1 = cv2.imread(frames[0]["rgb_path"])
    gray1 = cv2.cvtColor(rgb1, cv2.COLOR_BGR2GRAY)
    
    rgb2 = cv2.imread(frames[1]["rgb_path"])
    gray2 = cv2.cvtColor(rgb2, cv2.COLOR_BGR2GRAY)

    keypoints1, descriptors1 = detcet_features(gray1, type="orb", nfeatures=5000)
    keypoints2, descriptors2 = detcet_features(gray2, type="orb", nfeatures=5000)

    print(f"Detected {len(keypoints1)} keypoints in frame 1")
    print(f"Detected {len(keypoints2)} keypoints in frame 2")
    
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    matches = bf.match(descriptors1, descriptors2)
    
    points1 = np.array([keypoints1[m.queryIdx].pt for m in matches])
    points2 = np.array([keypoints2[m.trainIdx].pt for m in matches])

    intrinsic = dataset.intrinsic_matrix()
    print("Dataset intrinsic matrix:", intrinsic)
    fx = float(intrinsic[0, 0])
    cx = float(intrinsic[0, 2])
    cy = float(intrinsic[1, 2])

    E, mask = cv2.findEssentialMat(
        points1, points2, focal=fx, pp=(cx, cy), method=cv2.RANSAC, prob=0.999, threshold=1.0
    )
    points1 = points1[mask.ravel() == 1]
    points2 = points2[mask.ravel() == 1]
    
    _, R, t, mask = cv2.recoverPose(E, points1, points2, focal=fx, pp=(cx, cy))
    R = R.T
    t = -R @ t
    print("Rotation:")
    print(R)
    print("Translation:")
    print(t.ravel())

cv2.findEssentialMat 回傳的 mask 可以過濾掉錯誤的對應點

    points1 = points1[mask.ravel() == 1]
    points2 = points2[mask.ravel() == 1]

如果我們視覺化過濾後的特徵點匹配,就可以得到下面的結果。

matches

接著搭配我們前面的 3D 視覺化函式

def create_frustum_with_image(image, camera_to_world=np.eye(4), color="red", axis=False):
    height, width = image.shape[:2]
    aspect_ratio = width * 1.0 / height
    objects = []    # Record all the objects to created in this function
    center = np.array([0, 0, 0])
    points = np.array([
        [0.5, 0.5, 1],
        [0.5, -0.5, 1],
        [-0.5, -0.5, 1],
        [-0.5, 0.5, 1],
    ])
    points[:, 0] *= aspect_ratio

    for i in range(4):
        line = scene.visuals.Line(pos=np.array([center, points[i]]), color=color, antialias=True, width=2, parent=view.scene)
        objects.append(line)
        line = scene.visuals.Line(pos=np.array([points[i], points[(i + 1) % 4]]), color=color, antialias=True, width=2, parent=view.scene)
        objects.append(line)

    if axis:
        camera_axis = scene.visuals.XYZAxis(parent=view.scene, width=2, antialias=True)
        objects.append(camera_axis)
    
    # Create the image visual
    image_visual = scene.visuals.Image(image, parent=view.scene)
    image_scaling = 1.0 / height
    image_translate = (-width / 2.0 * image_scaling, -height / 2.0 * image_scaling)
    image_visual.transform = scene.transforms.STTransform(scale=(image_scaling, image_scaling), translate=image_translate)
    z_transform = scene.transforms.MatrixTransform()
    z_transform.translate([0, 0, 1.0])
    image_visual.transform = z_transform * image_visual.transform
    objects.append(image_visual)

    new_transform = scene.transforms.MatrixTransform()
    new_transform.matrix = camera_to_world.T    # NOTE: we need to transpose the matrix
    for object in objects:
        object.transform = new_transform * object.transform
    return objects

由於我們只知到兩個相機的相對姿態,所以我們可以將第一個相機的姿態設為單位矩陣,第二個相機的姿態則是我們計算出來的結果,可以得到下面的結果。

    camera_to_world_est = np.eye(4)
    camera_to_world_est[:3, :3] = R
    camera_to_world_est[:3, 3] = t.ravel()
    
    create_frustum_with_image(rgb1, np.eye(4), color="blue")
    create_frustum_with_image(rgb2, camera_to_world_est, color="blue")

estimate


上一篇
Day12: 特徵點匹配
下一篇
Day14: 三角測量 Triangulation
系列文
3D 重建實戰:使用 2D 圖片做相機姿態估計與三維空間重建30
圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言