文章目录[隐藏]
CV — 目标检测:letterbox
一、相关概念
-
letterbox:
-
概念:
在大多数目标检测算法中,由于 卷积核为方形(不排除卷积核有矩形的情况),所以模型输入图片的尺寸也需要为方形。然而大多数数据集的图片基本上为 矩形,直接将图片 resize 到正方形,会导致图片失真,比如细长图片中的物体会变畸形。
letterbox操作:在对图片进行resize时,保持原图的长宽比进行等比例缩放,当长边 resize 到需要的长度时,短边剩下的部分采用灰色填充。
-
补充点:
- 在目标检测领域,对数据集图片进行了 letterbox 操作,同时标注框也需要进行 letterbox 操作。
-
相关算法:
在 yolo,ssd 等算法的图片预处理过程中,皆使用了 letterbox 处理。
-
二、代码实现
(一) python代码
-
样例说明:
-
直接 resize:
我们从下图观察,左侧为原图,右侧为直接 resize 之后的图片,明显感觉右侧图片汽车形变失真了
-
letterbox 操作:
右侧图图,我们在进行 resize 时保持了原图的长度比,上下部分不足的部分采用灰色进行填充。
同时左侧绿色的 标注框 是在原图尺寸下,右侧蓝色是 letterbox 之后的标注框,标注框的坐标也要跟着变换。
-
-
完整代码:
#!/usr/bin/env python # encoding: utf-8 ''' @Author : pentiumCM @Email : 842679178@qq.com @Software: PyCharm @File : util.py @Time : 2021/7/17 1:58 @desc : 目标检测工具类 ''' import cv2 import torch import numpy as np def letterbox_image(image_src, dst_size, pad_color=(114, 114, 114)): """ 缩放图片,保持长宽比。 :param image_src: 原图(numpy) :param dst_size: (h,w) :param pad_color: 填充颜色,默认是灰色 :return: """ src_h, src_w = image_src.shape[:2] dst_h, dst_w = dst_size scale = min(dst_h / src_h, dst_w / src_w) pad_h, pad_w = int(round(src_h * scale)), int(round(src_w * scale)) if image_src.shape[0:2] != (pad_w, pad_h): image_dst = cv2.resize(image_src, (pad_w, pad_h), interpolation=cv2.INTER_LINEAR) else: image_dst = image_src top = int((dst_h - pad_h) / 2) down = int((dst_h - pad_h + 1) / 2) left = int((dst_w - pad_w) / 2) right = int((dst_w - pad_w + 1) / 2) # add border image_dst = cv2.copyMakeBorder(image_dst, top, down, left, right, cv2.BORDER_CONSTANT, value=pad_color) x_offset, y_offset = max(left, right) / dst_w, max(top, down) / dst_h return image_dst, x_offset, y_offset def letterbox_label(bounding_box, dst_size=(640, 640), x_offset=0, y_offset=0, normalize=False, src_size=None): """ 缩放图片,调整 bounding_box 的坐标 :param bounding_box: (numpy,(-1,4))标注框,采用归一化的形式,x / w :param dst_size: (tuple)填充之后图片的尺寸,(h,w) :param x_offset: (float)上下填充的大小,归一化形式 :param y_offset: (float)左右填充的大小,归一化形式 :param normalize: (bool)传入的 bounding_box 是否归一化 :param src_size: (tuple)原图的尺寸,(h,w),归一化时候需要 :return: """ if not normalize: assert src_size, 'src_size is None' h = src_size[0] w = src_size[1] bounding_box = bounding_box.astype(np.float) bounding_box[:, 0] = bounding_box[:, 0] / w # top left x bounding_box[:, 1] = bounding_box[:, 1] / h # top left y bounding_box[:, 2] = bounding_box[:, 2] / w # bottom right x bounding_box[:, 3] = bounding_box[:, 3] / h # bottom right y y = bounding_box.clone() if isinstance(bounding_box, torch.Tensor) else np.copy(bounding_box) # 整体图片尺寸 pad_h = dst_size[0] pad_w = dst_size[1] # 内部(除去填充部分)图片尺寸 inner_w = pad_w * (1 - 2 * x_offset) inner_h = pad_h * (1 - 2 * y_offset) y[:, 0] = inner_w * bounding_box[:, 0] + pad_w * x_offset # top left x y[:, 1] = inner_h * bounding_box[:, 1] + pad_h * y_offset # top left y y[:, 2] = inner_w * bounding_box[:, 2] + pad_w * x_offset # bottom right x y[:, 3] = inner_h * bounding_box[:, 3] + pad_h * y_offset # bottom right y return y def plot_one_box(box, image, label=None, color=(0, 255, 0), line_thickness=3): """ Plots one bounding box on image using OpenCV :param box: bounding_box,xyxy。类型:list :param image: :param color: :param label: :param line_thickness: :return: """ assert image.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' tl = line_thickness or round(0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 # line/font thickness # 左上,右下 c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) if label: tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText(image, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) def letterbox_test(): """ letterbox 变换测试 :return: """ # h,w dst_size = (640, 640) image_path = 'F:/develop_code/python/ssd-pytorch/VOCdevkit/VOC2007/JPEGImages/000012.jpg' labels = [156, 97, 351, 270] image = cv2.imread(image_path) # box:xyxy box = np.array(labels, dtype=np.float) box = np.reshape(box, (-1, 4)) cv2.imshow('org_image', image) image_directresize = image image_directresize = cv2.resize(image_directresize, dst_size) cv2.imshow('image_directresize', image_directresize) # 可视化标注框 for i in range(box.shape[0]): plot_one_box(box=box[i], image=image, line_thickness=2) letter_image, x_offset, y_offset = letterbox_image(image, dst_size) letter_box = letterbox_label(box, dst_size, x_offset, y_offset, False, image.shape[:-1]) for i in range(letter_box.shape[0]): plot_one_box(box=letter_box[i], image=letter_image, line_thickness=2, color=(255, 0, 0)) cv2.imshow('org_label', image) cv2.imshow('letter_image', letter_image) cv2.waitKey() if __name__ == '__main__': letterbox_test()
版权声明:本文为CSDN博主「pentiumCM」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/pentiumCM/article/details/118887321
暂无评论