训练yolo v5 自定义数据集

参考：https://github.com/ultralytics/yolov5

数据集格式： voc

1. 标注所需图像。

2. 数据组织成 VOC的格式。

3. 分割数据集：

# coding:utf-8

import os
import random
import argparse

parser = argparse.ArgumentParser()
#xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='./VOCdevkit/logo2021/JPEGImages', type=str, help='input xml label path')
#数据集的划分，地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='./VOCdevkit/logo2021/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

for i in list_index:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        file_trainval.write(name)
        if i in train:
            file_train.write(name)
        else:
            file_val.write(name)
    else:
        file_test.write(name)

file_trainval.close()
file_train.close()
file_val.close()
file_test.close()

4. 修改 yaml 文件

复制 VOC.yaml 为 VOClogo.yaml，修改其内容：

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: python train.py --data VOC.yaml
# parent
# ├── yolov5
# └── datasets
#     └── VOC  ← downloads here


# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: xx/data # VOCdevkit的上级路径
train: # train images (relative to 'path')  16551 images
  - yolo/images/train2021
  # - images/train2007
  # - images/val2012
  # - images/val2007
val: # val images (relative to 'path')  4952 images
  - yolo/images/val2021
test: # test images (optional)
  - yolo/images/val2021

# Classes
nc: 17  # number of classes
names: [...]  # class names 自己的类别

# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
  import xml.etree.ElementTree as ET

  from tqdm import tqdm
  from utils.general import download, Path
  import shutil

  def convert_label(path, lb_path, year, image_id):
      def convert_box(size, box):
          dw, dh = 1. / size[0], 1. / size[1]
          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
          return x * dw, y * dh, w * dw, h * dh

      in_file = open(path / f'TVlogo{year}/Annotations/{image_id}.xml')
      out_file = open(lb_path, 'w')
      tree = ET.parse(in_file)
      root = tree.getroot()
      size = root.find('size')
      w = int(size.find('width').text)
      h = int(size.find('height').text)

      for obj in root.iter('object'):
          cls = obj.find('name').text
          if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
              xmlbox = obj.find('bndbox')
              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
              cls_id = yaml['names'].index(cls)  # class id
              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')


  # # Download
  dir = Path(yaml['path'])  # dataset root dir
  # url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  # urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
  #         url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
  #         url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
  # download(urls, dir=dir / 'images', delete=False)

  # Convert
  path = dir / f'VOCdevkit'
  for year, image_set in ('2021', 'train'), ('2021', 'val'):
      imgs_path = dir / f'yolo/images' / f'{image_set}{year}'
      lbs_path = dir / f'yolo/labels' / f'{image_set}{year}'
      imgs_path.mkdir(exist_ok=True, parents=True)
      lbs_path.mkdir(exist_ok=True, parents=True)

      image_ids = open(path / f'TVlogo{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
          f = path / f'TVlogo{year}/JPEGImages/{id}.jpg'  # old img path
          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
          shutil.copy(f, imgs_path / f.name) # move image
          convert_label(path, lb_path, year, id)  # convert labels to YOLO format

这里相对官方修改了部分路径，并且使用 shutil.copy 复制图像。这里不下载VOC data，使用我们自己的数据。

5. 训练：