1. 简介
在计算机视觉中,图像特征是非常重要的一部分。SIFT(尺度不变特征变换)是一种强大的特征提取算法,广泛用于计算机视觉领域中的图像配准、拼接、目标识别等方面。 在本文中,我们将使用Python编程语言和OpenCV库来实现SIFT特征提取和匹配的过程。
2. SIFT算法的原理
2.1 尺度空间
SIFT算法在图像的多个尺度上检测关键点并提取特征,因此需要对图像进行尺度空间的建模。尺度空间是指同一幅图像的不同尺度版本,也称为高斯金字塔。高斯金字塔由多个不同尺度的高斯滤波器构成。高斯滤波器是一种低通滤波器,可以模糊图像并进行平滑处理。
import cv2
img = cv2.imread('image.jpg', 0)
# 构建高斯金字塔
gaussian_pyramid = [img]
for i in range(5):
gaussian_pyramid.append(cv2.pyrDown(gaussian_pyramid[-1]))
2.2 关键点检测
SIFT算法使用尺度空间的极值点来检测关键点。极值点是指像素在当前帧和相邻帧中都是局部最大值或局部最小值的点。在SIFT算法中,使用Laplacian of Gaussian(LoG)算子来检测图像中的极值点。LoG算子是指高斯滤波器与拉普拉斯算子的组合。
# 构建拉普拉斯金字塔
laplacian_pyramid = [cv2.Laplacian(gaussian_pyramid[0], cv2.CV_32F)]
for i in range(1, 6):
laplacian = cv2.subtract(gaussian_pyramid[i - 1], cv2.pyrUp(gaussian_pyramid[i]))
laplacian_pyramid.append(laplacian)
keypoints = []
for i in range(1, 5):
for j in range(1, laplacian_pyramid[i].shape[0] - 1):
for k in range(1, laplacian_pyramid[i].shape[1] - 1):
patch = laplacian_pyramid[i][j - 1:j + 2, k - 1:k + 2]
if np.argmax(patch) == 4 or np.argmin(patch) == 4:
keypoints.append(cv2.KeyPoint(x=k * 2 ** i, y=j * 2 ** i, _size=2 ** i))
2.3 关键点定位和方向确定
在SIFT算法中,确定关键点的精确位置和尺度非常重要。为了使关键点对旋转更加稳定,SIFT算法还需要确定关键点的主要方向。为此,需要在关键点周围的图像区域中计算梯度幅值和方向,然后在梯度幅值最大的方向上建立一个主方向。
# 计算梯度幅值和方向
keypoints_with_orientation = []
for kp in keypoints:
xs, ys = int(kp.pt[0]), int(kp.pt[1])
size = kp.size
patch = cv2.GaussianBlur(img[ys - size:ys + size + 1, xs - size:xs + size + 1], (3, 3), 0)
dx = cv2.Sobel(patch, cv2.CV_32F, 1, 0, ksize=3)
dy = cv2.Sobel(patch, cv2.CV_32F, 0, 1, ksize=3)
magnitude, angle = cv2.cartToPolar(dx, dy, angleInDegrees=True)
binning = np.zeros(36)
for x in range(-size, size + 1):
for y in range(-size, size + 1):
_magnitude = magnitude[y + size, x + size]
_angle = angle[y + size, x + size] % 360
bin_idx = int(_angle / 10)
binning[bin_idx] += _magnitude
max_idx = np.argmax(binning)
kp.angle = (max_idx + 0.5) * 10
keypoints_with_orientation.append(kp)
2.4 特征描述
在SIFT算法中,关键点的特征描述使用一个128维的向量表示。为了提取这个向量,SIFT算法将关键点周围的图像区域划分成若干个小的子区域,并计算每个子区域内像素的梯度方向和梯度幅值。每个子区域内的8个方向则形成了一个8维的向量,如此一来,每个关键点的特征就被编码成一个128维的向量。
# 计算特征向量
descriptors = []
for kp in keypoints_with_orientation:
xs, ys = int(kp.pt[0]), int(kp.pt[1])
size = kp.size
patch = cv2.GaussianBlur(img[ys - size:ys + size + 1, xs - size:xs + size + 1], (3, 3), 0)
dx = cv2.Sobel(patch, cv2.CV_32F, 1, 0, ksize=3)
dy = cv2.Sobel(patch, cv2.CV_32F, 0, 1, ksize=3)
magnitude, angle = cv2.cartToPolar(dx, dy, angleInDegrees=True)
binning = np.zeros(128)
for x in range(-size, size + 1):
for y in range(-size, size + 1):
_magnitude = magnitude[y + size, x + size]
_angle = angle[y + size, x + size] - kp.angle
if _angle < 0:
_angle += 360
if _angle >= 360:
_angle -= 360
bin_idx = int(_angle / 360 * 16)
binning[bin_idx:bin_idx + 8] += _magnitude
descriptors.append(binning)
3. 特征匹配
在SIFT算法中,关键点的特征描述使用一个128维的向量表示。为了匹配两幅图像的特征,需要计算这些向量之间的欧几里得距离,并选择距离最近的几个特征。SIFT算法通常选择前两个最近的特征来进行匹配,同时还需要计算这两个特征之间的距离比。
# 特征匹配
matches = []
for i in range(len(descriptors1)):
distances = np.linalg.norm(descriptors2 - descriptors1[i], axis=1)
best_match = np.argmin(distances)
second_best_match = np.argsort(distances)[1]
if distances[best_match] < 0.8 * distances[second_best_match]:
matches.append(cv2.DMatch(i, best_match, distances[best_match]))
4. 示例代码
下面是一个完整的示例代码,用于实现SIFT特征提取和匹配。
import cv2
import numpy as np
def sift(img):
# 构建高斯金字塔
gaussian_pyramid = [img]
for i in range(5):
gaussian_pyramid.append(cv2.pyrDown(gaussian_pyramid[-1]))
# 构建拉普拉斯金字塔
laplacian_pyramid = [cv2.Laplacian(gaussian_pyramid[0], cv2.CV_32F)]
for i in range(1, 6):
laplacian = cv2.subtract(gaussian_pyramid[i - 1], cv2.pyrUp(gaussian_pyramid[i]))
laplacian_pyramid.append(laplacian)
# 检测关键点
keypoints = []
for i in range(1, 5):
for j in range(1, laplacian_pyramid[i].shape[0] - 1):
for k in range(1, laplacian_pyramid[i].shape[1] - 1):
patch = laplacian_pyramid[i][j - 1:j + 2, k - 1:k + 2]
if np.argmax(patch) == 4 or np.argmin(patch) == 4:
keypoints.append(cv2.KeyPoint(x=k * 2 ** i, y=j * 2 ** i, _size=2 ** i))
# 确定关键点位置和方向
keypoints_with_orientation = []
for kp in keypoints:
xs, ys = int(kp.pt[0]), int(kp.pt[1])
size = kp.size
patch = cv2.GaussianBlur(img[ys - size:ys + size + 1, xs - size:xs + size + 1], (3, 3), 0)
dx = cv2.Sobel(patch, cv2.CV_32F, 1, 0, ksize=3)
dy = cv2.Sobel(patch, cv2.CV_32F, 0, 1, ksize=3)
magnitude, angle = cv2.cartToPolar(dx, dy, angleInDegrees=True)
binning = np.zeros(36)
for x in range(-size, size + 1):
for y in range(-size, size + 1):
_magnitude = magnitude[y + size, x + size]
_angle = angle[y + size, x + size] % 360
bin_idx = int(_angle / 10)
binning[bin_idx] += _magnitude
max_idx = np.argmax(binning)
kp.angle = (max_idx + 0.5) * 10
keypoints_with_orientation.append(kp)
# 计算特征向量
descriptors = []
for kp in keypoints_with_orientation:
xs, ys = int(kp.pt[0]), int(kp.pt[1])
size = kp.size
patch = cv2.GaussianBlur(img[ys - size:ys + size + 1, xs - size:xs + size + 1], (3, 3), 0)
dx = cv2.Sobel(patch, cv2.CV_32F, 1, 0, ksize=3)
dy = cv2.Sobel(patch, cv2.CV_32F, 0, 1, ksize=3)
magnitude, angle = cv2.cartToPolar(dx, dy, angleInDegrees=True)
binning = np.zeros(128)
for x in range(-size, size + 1):
for y in range(-size, size + 1):
_magnitude = magnitude[y + size, x + size]
_angle = angle[y + size, x + size] - kp.angle
if _angle < 0:
_angle += 360
if _angle >= 360:
_angle -= 360
bin_idx = int(_angle / 360 * 16)
binning[bin_idx:bin_idx + 8] += _magnitude
descriptors.append(binning)
return keypoints, descriptors
def match(descriptors1, descriptors2):
matches = []
for i in range(len(descriptors1)):
distances = np.linalg.norm(descriptors2 - descriptors1[i], axis=1)
best_match = np.argmin(distances)
second_best_match = np.argsort(distances)[1]
if distances[best_match] < 0.8 * distances[second_best_match]:
matches.append(cv2.DMatch(i, best_match, distances[best_match]))
return matches
def draw_matches(img1, img2, kps1, kps2, matches):
out_img = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8)
out_img[:, :img1.shape[1], :] = img1[..., None]
out_img[:, img1.shape[1]:, :] = img2[..., None]
for i, match in enumerate(matches):
left_idx = match.queryIdx
right_idx = match.trainIdx
cv2.circle(out_img, (int(kps1[left_idx].pt[0]), int(kps1[left_idx].pt[1])), 4, (0, 0, 255), 1)
cv2.circle(out_img, (int(kps2[right_idx].pt[0]) + img1.shape[1], int(kps2[right_idx].pt[1])), 4, (0, 0, 255), 1)
cv2.line(out_img, (int(kps1[left_idx].pt[0]), int(kps1[left_idx].pt[1])),
(int(kps2[right_idx].pt[0]) + img1.shape[1], int(kps2[right_idx].pt[1])), (0, 255, 0), 1)
return out_img
if __name__ == '__main__':
img1 = cv2.imread('image1.jpg', 0)
img2 = cv2.imread('image2.jpg', 0)
kps1, descriptors1 = sift(img1)
kps2, descriptors2 = sift(img2)
matches = match(descriptors1, descriptors2)
out_img = draw_matches(img1, img2, kps1, kps2, matches)
cv2.imwrite('matches.jpg', out_img)