参考:https://github.com/ultralytics/yolov5
数据集格式: voc
1. 标注所需图像。
2. 数据组织成 VOC的格式。
3. 分割数据集:
# coding:utf-8
import os
import random
import argparse
parser = argparse.ArgumentParser()
#xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='./VOCdevkit/logo2021/JPEGImages', type=str, help='input xml label path')
#数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='./VOCdevkit/logo2021/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()
trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)
num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)
file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')
for i in list_index:
name = total_xml[i][:-4] + '\n'
if i in trainval:
file_trainval.write(name)
if i in train:
file_train.write(name)
else:
file_val.write(name)
else:
file_test.write(name)
file_trainval.close()
file_train.close()
file_val.close()
file_test.close()
4. 修改 yaml 文件
复制 VOC.yaml 为 VOClogo.yaml,修改其内容:
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
# └── VOC ← downloads here
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: xx/data # VOCdevkit的上级路径
train: # train images (relative to 'path') 16551 images
- yolo/images/train2021
# - images/train2007
# - images/val2012
# - images/val2007
val: # val images (relative to 'path') 4952 images
- yolo/images/val2021
test: # test images (optional)
- yolo/images/val2021
# Classes
nc: 17 # number of classes
names: [...] # class names 自己的类别
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm import tqdm
from utils.general import download, Path
import shutil
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'TVlogo{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# # Download
dir = Path(yaml['path']) # dataset root dir
# url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
# urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
# url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
# url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
# download(urls, dir=dir / 'images', delete=False)
# Convert
path = dir / f'VOCdevkit'
for year, image_set in ('2021', 'train'), ('2021', 'val'):
imgs_path = dir / f'yolo/images' / f'{image_set}{year}'
lbs_path = dir / f'yolo/labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
image_ids = open(path / f'TVlogo{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'TVlogo{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
shutil.copy(f, imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format
这里相对官方修改了 部分路径, 并且 使用 shutil.copy 复制图像。这里不下载VOC data, 使用我们自己的数据。
5. 训练:
python train.py --data VOClogo.yaml --weights yolov5s.pt --img 640 --epochs 20 --batch-size 64 --device 0 --hyp yolov5/data/hyps/hyp.finetune.yaml
代码会根据 自定义的 VOClogo.yaml 将 VOC 格式转为 yolo 格式,如下。
└── yolo
├── images
│ ├── train2021
│ └── val2021
└── labels
├── train2021
└── val2021
遇到的问题:
train: WARNING: xxx: ignoring corrupt image/label: non-normalized or out of bounds coordinates xxx
数据标注有问题,我的是 xml 中的 w, h 不对引起的,根据图像大小,修改xml 内容就可以了。
小技巧:
利用vscode的端口转发查看 训练曲线。
cd 到 events.out.tfevents... 目录,然后 tensorboard --logdir='./'
在本机浏览器 http://localhost:6006 就可以看到 训练曲线。
6. 推理
python detect.py --data data/VOClogo.yaml --weights runs/train/exp12/weights/best.pt --source testDataDir --imgsz 640 --device 0
版权声明:本文为CSDN博主「星魂非梦」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/hymn1993/article/details/123007661
暂无评论