

数据准备之后,进行模型训练的全流程。本文直接从jupter notebook转换而来,都经过验证,但格式可能会有点问题。



1.1 划分训练和验证集

import os

def create_train_val(datasets=[("VOC2007_DEST",1.0),("VOC2012_DEST",1.0),("COCO_VOC_DEST",0.95)]):

    wd = os.getcwd()
    for dataset,percent in datasets:
        img_files = os.listdir('%s/VOCdevkit/%s/JPEGImages' %(wd,dataset))
        split = int(len(img_files) * percent)
        train_img_files,val_img_files = img_files[:split],img_files[split:]
        with open('%s/VOCdevkit/%s/ImageSets/Main/train.txt' %(wd,dataset),'w') as f1:
            for img_file in train_img_files:
        print('train.txt done')
        with open('%s/VOCdevkit/%s/ImageSets/Main/val.txt' %(wd,dataset),'w') as f2:
            for img_file in val_img_files:
        print('val.txt done')


mport os
from sklearn.model_selection import train_test_split

def create_train_val(datasets=[("VOC2007_DEST",1.0),("VOC2012_DEST",1.0),("COCO_VOC_DEST",0.95)]):

    wd = os.getcwd()
    for dataset,percent in datasets:
        img_files = os.listdir('%s/VOCdevkit/%s/JPEGImages' %(wd,dataset))
        if percent<1.0:
            train_img_files,val_img_files = train_test_split(img_files,train_size=percent)
        with open('%s/VOCdevkit/%s/ImageSets/Main/train.txt' %(wd,dataset),'w') as f1:
            for img_file in train_img_files:
        print('train.txt done')
        with open('%s/VOCdevkit/%s/ImageSets/Main/val.txt' %(wd,dataset),'w') as f2:
            for img_file in val_img_files:
        print('val.txt done')

1.2 将数据标注格式转换为YOLO格式

import os
import shutil
from tqdm import tqdm
import xml.etree.ElementTree as ET
def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_yolo(datasets=[('VOC2007_DEST','train'),('VOC2007_DEST','val')],classes=['bicycle','bus','car','motorbike','truck']):
    wd = os.getcwd()
    for i,(dataset, image_set) in enumerate(datasets):
        dest_path = 'VOCdevkit/%s/labels/'%(dataset)

        if os.path.exists(dest_path):
    for i,(dataset, image_set) in enumerate(datasets):
        dest_path = 'VOCdevkit/%s/labels/'%(dataset)
        if not os.path.exists(dest_path):
    for i,(dataset, image_set) in tqdm(enumerate(datasets)):
        image_ids = open('VOCdevkit/%s/ImageSets/Main/%s.txt'%(dataset, image_set)).read().strip().split()
        list_file = open('%s_%s.txt'%(dataset, image_set), 'w')
        for image_id in image_ids:
            list_file.write('%s/VOCdevkit/%s/JPEGImages/%s.jpg\n'%(wd, dataset, image_id))
            try: #对于有标记文件的数据可以进行转换
                in_file = open('VOCdevkit/%s/Annotations/%s.xml'%(dataset, image_id))
                out_file = open('VOCdevkit/%s/labels/%s.txt'%(dataset, image_id), 'w')
                root = tree.getroot()
                size = root.find('size')
                # print image_id
                w = int(size.find('width').text)
                h = int(size.find('height').text)

                for obj in root.iter('object'):
                    difficult = obj.find('difficult').text
                    cls = obj.find('name').text
                    if cls not in classes or int(difficult) == 1:
                    cls_id = classes.index(cls)
                    xmlbox = obj.find('bndbox')
                    b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
                    bb = convert((w,h),b)
                    out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
                out_file = open('VOCdevkit/%s/labels/%s.txt'%(dataset, image_id), 'w')


    train_txt=[a+'_'+b+'.txt' for a,b in datasets if b=='train']
    if len(train_txt):
        strs_train = 'cat '+ ' '.join(train_txt) +' > cfg/train.txt'
    val_txt = [a+'_'+b+'.txt' for a,b in datasets if b=='val']
    if len(val_txt):
        strs_val = 'cat '+ ' '.join(val_txt) +'> cfg/val.txt'    
    train_val_txt = [a+'_'+b+'.txt' for a,b in datasets]
    if len(train_val_txt):
        rm_txt = 'rm '+ ' '.join(train_val_txt)
    print("all Done!")

# convert_yolo()
6it [00:04,  1.24it/s]

all Done!
! ls VOCdevkit/VOC2012_DEST/labels




2.1 新建cfg/vechle.names


names = ['bicycle','bus','car','motorbike','truck']
with open('cfg/vechcle.names','w') as f:
    for i,name in enumerate(names):
        if i != len(names)-1:
with open('cfg/vechcle.names') as f:

2.2 新建cfg/vechle.data

可以复制datknet cfg/voc.data再根据自己的情况进行修改

datas=["classes= 5",
"train  = /workspace/yolo_demo/cfg/train.txt",
"valid  = /workspace/yolo_demo/cfg/val.txt",
"names = /workspace/yolo_demo/cfg/vechcle.names",
"backup = /workspace/yolo_demo/backup"
with open('cfg/vechcle.data','w') as f:
    for i,data in enumerate(datas):
        if i != len(datas)-1:
with open('cfg/vechcle.data') as f:
classes= 5
train  = /workspace/yolo_demo/cfg/train.txt
valid  = /workspace/yolo_demo/cfg/val.txt
names = /workspace/yolo_demo/cfg/vechcle.names
backup = /workspace/yolo_demo/backup

2.3 根据所选模型的不同,设置不同的配置文件


上图为不同模型的对比结果。下面将列出不同模型的在MSCOCO数据集上的预训练模型及相关说明,FPS是在 RTX 2070 ® 或 Tesla V100 (V)上的结果

配置文件 输入大小 mAP@0.5 mAP@0.5:0.95 FPS 大小 预训练模型
yolov4-p6.cfg 1280x1280 72.1% 54.0% 32(V) 487MB yolov4-p6.conv.289
yolov4-p5.cfg 896x896 10.0% 51.6% 43(V) 271MB yolov4-p5.conv.232
yolov4-csp-x-swish.cfg 640x640 69.9% 51.5% 23®50(V) 381MB yolov4-csp-x-swish.conv.192
yolov4-csp-swish.cfg 640x640 68.7% 50% 70(V) 202M yolov4-csp-swish.conv.164
yolov4x-mish.cfg 640x640 68.5% 50.1% 23®50(V) 381M yolov4x-mish.conv.166
yolov4-csp.cfg 640x640 67.4% 48.7% 70(V) 202M yolov4-csp.conv.142
yolov4-csp.cfg 512x512 64.8% 46.2% 93(V) 202M yolov4-csp.conv.142
yolov4.cfg 608x608 65.7% 43.5% 34®64(V) 245M yolov4.conv.137
yolov4.cfg 512x512 64.9% 43.0% 45®83(V) 245M yolov4.conv.137
yolov4.cfg 416x416 62.8% 41.2% 55®96(V) 245M yolov4.conv.137
yolov4.cfg 320x320 60% 38% 63®123(V) 245M yolov4.conv.137
yolov4-tiny.cfg 416x416 40% -% 330®371(1080Ti) 23.1M yolov4-tiny.conv.29
yolov3.cfg 414x416 55.3% -% 66® 236M darknet53.conv.74
yolov3-tiny.cfg 416x416 33.1% -% 370® 33.7M yolov3-tiny.conv.11
enet-coco.cfg(yolov3 efficientnetb0) None 45.5% -% 55® 18.3M enetb0-coco.conv.74



2.3.1 新建cfg/yolov4_vechle.cfg


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数,这里设置608
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第744行:stopbackward的数值表示迭代这么多次后,前边的层将停止更新,可以注释掉,用来做迁移学习
  • 第970,1058,1146行:修改classes为我们的具体分类数
  • 第963,1051,1139行:修改filters为(classes+5)*3

2.3.2 新建cfg/yolov4_tiny_vechle.cfg


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数,这里设置608
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第220,269行:修改classes为我们的具体分类数
  • 第212,263行:修改filters为(classes+5)*3

2.3.3 新建cfg/yolo3_vechcle.cfg


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数,这里设置608
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第611,695,779行:修改classes为我们的具体分类数
  • 第605,689,773行:修改filters为(classes+5)*3

2.3.4 新建cfg/yolo3_vechcle_mosaic.cfg


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数,这里设置608
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第25行:加mosaic=1
  • 第612,696,780行:修改classes为我们的具体分类数
  • 第606,690,774行:修改filters为(classes+5)*3

2.3.5 新建cfg/yolo3_tiny_vechcle.cfg


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数,这里设置608
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第135,177行:修改classes为我们的具体分类数
  • 第127,171行:修改filters为(classes+5)*3

2.3.6 新建cfg/enetb0_vechcle.cfg

模型大小于yolov4 的十分之一,精度与416x416相当,速度也相当,所以用efficient b0做backbone


  • 第8,9行:width,height 可以设置为416,608,832等其它32的倍数
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1015,1066行:修改classes为我们的具体分类数
  • 第1007,1060行:修改filters为(classes+5)*3

2.3.7 新建cfg/yolov4_p6_vechcle.cfg


  • 第8,9行:width,height 可以设置为1280
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第2144,2189,2234,2279行:修改classes为我们的具体分类数
  • 第2136,2189,2226,2271行:修改filters为(classes+5)*4

2.3.8 新建cfg/yolov4_p5_vechcle.cfg


  • 第8,9行:width,height 可以设置为896
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1728,1773,1818行:修改classes为我们的具体分类数
  • 第1720,1765,1810行:修改filters为(classes+5)*4

2.3.9 新建cfg/yolov4_csp_x_swish_vechcle.cfg


  • 第8,9行:width,height 可以设置为640
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1453,1496,1539行:修改classes为我们的具体分类数
  • 第1447,1490,1533行:修改filters为(classes+5)*3

2.3.10 新建cfg/yolov4_csp_swish_vechcle.cfg


  • 第8,9行:width,height 可以设置为640
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1252,1295,1338行:修改classes为我们的具体分类数
  • 第1446,1489,1332行:修改filters为(classes+5)*3

2.3.11 新建cfg/yolov4x_mish_vechcle.cfg


  • 第8,9行:width,height 可以设置为640
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1159,1289,1419行:修改classes为我们的具体分类数
  • 第1152,1282,1412行:修改filters为(classes+5)*3

2.3.12 新建cfg/yolov4_csp_vechcle.cfg


  • 第8,9行:width,height 可以设置为640
  • 第20行:max_batches 可以设置为分类数乘2000,但不少于图片数,所以建议设大一点,图片数乘5,第张图可以见到5次
  • 第22行:steps分别是max_batches的0.8和0.9
  • 第1034,1148,1262行:修改classes为我们的具体分类数
  • 第1027,1141,1255行:修改filters为(classes+5)*3


3.1 下载预训练权重

  • yolov4 已下载好yolov4.conv.137,放到cfg目录下
  • yolov4-tiny 已下载好yolov4-tiny.conv.29,放到cfg目录下
  • yolov3 已下载好darknet53.conv.74,放到cfg目录下
  • yolov3-tiny 已下载好yolov3-tiny.conv.11,放到cfg目录下


3.2 开始模型的训练


CUDA_VISIBLE_DEVICES=0,1,2,3 nohup darknet detector train cfg/vechle.data cfg/vechle.cfg cfg/yolov4.conv.137 -gpus 0,1,2,3 -dont_show -map 2>&1 >logs/train20210713.log &

3.2.1 Yolo V4训练

with open('train_yolov4.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_vechcle.cfg cfg/yolov4.conv.137 -gpus 6,7 -dont_show -map 2>&1 >logs/yolov4.log &")
with open('train_yolov4.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_vechcle.cfg cfg/yolov4.conv.137 -gpus 6,7 -dont_show -map 2>&1 >logs/yolov4.log &
#建议在shell 中执行该命令
import os 
os.system("sh train_yolov4.sh")

3.2.2 Yolo V4 tiny训练

with open('train_yolov4_tiny.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_tiny_vechcle.cfg cfg/yolov4-tiny.conv.29 -gpus 5 -dont_show -map 2>&1 >logs/yolov4_tiny.log &")
with open('train_yolov4_tiny.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_tiny_vechcle.cfg cfg/yolov4-tiny.conv.29 -gpus 5 -dont_show -map 2>&1 >logs/yolov4_tiny.log &
#建议在shell 中执行该命令
import os 
os.system("sh train_yolov4_tiny.sh")

3.2.3 Yolo V3 训练

with open('train_yolov3.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov3_vechcle.cfg cfg/darknet53.conv.74 -gpus 3,4 -dont_show -map 2>&1 >logs/yolov3.log &")
with open('train_yolov3.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov3_vechcle.cfg cfg/darknet53.conv.74 -gpus 3,4 -dont_show -map 2>&1 >logs/yolov3.log &

3.2.4 Yolov3 加mosaic数据增强训练

with open('train_yolov3_mosaic.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov3_vechcle_mosaic.cfg cfg/darknet53.conv.74 -gpus 2,5 -dont_show -map 2>&1 >logs/yolov3_mosaic.log &")
with open('train_yolov3_mosaic.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov3_vechcle_mosaic.cfg cfg/darknet53.conv.74 -gpus 2,5 -dont_show -map 2>&1 >logs/yolov3_mosaic.log &

3.2.5 Yolo V3 tiny训练

with open('train_yolov3_tiny.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov3_tiny_vechcle.cfg cfg/yolov3-tiny.conv.11 -gpus 2 -dont_show -map 2>&1 >logs/yolov3_tiny.log &")
with open('train_yolov3_tiny.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov3_tiny_vechcle.cfg cfg/yolov3-tiny.conv.11 -gpus 2 -dont_show -map 2>&1 >logs/yolov3_tiny.log &

3.2.6 Yolo V3 efficientb0训练

with open('train_yolov3_enet.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/enetb0_vechcle.cfg cfg/enetb0-coco.conv.132 -gpus 1 -dont_show -map 2>&1 >logs/yolov3_enet.log &")
with open('train_yolov3_enet.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/enetb0_vechcle.cfg cfg/enetb0-coco.conv.132 -gpus 1 -dont_show -map 2>&1 >logs/yolov3_enet.log &

3.2.7 Yolo V4 p6训练

with open('train_yolov4_p6.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_p6_vechcle.cfg cfg/yolov4-p6.conv.289 -gpus 6,7 -dont_show -map 2>&1 >logs/yolov4_p6.log &")
with open('train_yolov4_p6.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_p6_vechcle.cfg cfg/yolov4-p6.conv.289 -gpus 6,7 -dont_show -map 2>&1 >logs/yolov4_p6.log &

3.2.8 Yolo V4 p5训练

with open('train_yolov4_p5.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_p5_vechcle.cfg cfg/yolov4-p5.conv.232 -gpus 3,4 -dont_show -map 2>&1 >logs/yolov4_p5.log &")
with open('train_yolov4_p5.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_p5_vechcle.cfg cfg/yolov4-p5.conv.232 -gpus 3,4 -dont_show -map 2>&1 >logs/yolov4_p5.log &

3.2.9 Yolo V4 csp x swish训练

with open('train_yolov4_csp_x_swish.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_csp_x_swish_vechcle.cfg cfg/yolov4-csp-x-swish.conv.192 -gpus 0,1 -dont_show -map 2>&1 >logs/yolov4_csp_x_swish.log &")
with open('train_yolov4_csp_x_swish.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_csp_x_swish_vechcle.cfg cfg/yolov4-csp-x-swish.conv.192 -gpus 0,1 -dont_show -map 2>&1 >logs/yolov4_csp_x_swish.log &

3.2.10 Yolo V4 csp swish训练

with open('train_yolov4_csp_swish.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_csp_swish_vechcle.cfg cfg/yolov4-csp-swish.conv.164 -gpus 0,1 -dont_show -map 2>&1 >logs/yolov4_csp_swish.log &")
with open('train_yolov4_csp_swish.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4_csp_swish_vechcle.cfg cfg/yolov4-csp-swish.conv.164 -gpus 0,1 -dont_show -map 2>&1 >logs/yolov4_csp_swish.log &

3.2.11 Yolo V4 x mish训练

with open('train_yolov4x_mish.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4x_mish_vechcle.cfg cfg/yolov4x-mish.conv.166 -gpus 2,3 -dont_show -map 2>&1 >logs/yolov4x_mish.log &")
with open('train_yolov4x_mish.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4x_mish_vechcle.cfg cfg/yolov4x-mish.conv.166 -gpus 2,3 -dont_show -map 2>&1 >logs/yolov4x_mish.log &

3.2.12 Yolo V4 csp 训练

with open('train_yolov4_csp.sh','w') as f:
    f.write("nohup darknet detector train cfg/vechcle.data cfg/yolov4_csp_vechcle.cfg cfg/yolov4-csp.conv.142 -gpus 6,7 -dont_show -map 2>&1 >logs/yolov4_csp.log &")
with open('train_yolov4x_mish.sh') as f:
nohup darknet detector train cfg/vechcle.data cfg/yolov4x_mish_vechcle.cfg cfg/yolov4x-mish.conv.166 -gpus 2,3 -dont_show -map 2>&1 >logs/yolov4x_mish.log &


在模型使用时,修改配置文件batch=1 subdivisions=1用于推理。别外,可以能过修改.cfg文件中的height,width来修改模型输入。可以是608608,832832或其它什么可32的倍数。改大输入会对小目标有提升。这时模型无须再次训练,还用原来模型即可。当然要从概本上提升精度,还是要用大输入尺寸来训练模型。

4.1 命令行对图片和视频模型的推理

#指定GPU 6
darknet detector test cfg/vechle.data cfg/vechle.cfg backup/vechle_best.weights -i 6 /workspace/vechcle/VOCdevkit/VOC2007_DEST/JPEGImages/000004.jpg -thresh 0.25
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
Create CUDA-stream - 6 
 Create cudnn-handle 6 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 

 seen 64, trained: 918 K-images (14 Kilo-batches_64) 
 Detection layer: 139 - type = 28 
 Detection layer: 150 - type = 28 
 Detection layer: 161 - type = 28 
/workspace/vechcle/VOCdevkit/VOC2007_DEST/JPEGImages/000004.jpg: Predicted in 19.047000 milli-seconds.
car: 89%
car: 77%
truck: 49%
truck: 28%
car: 33%
car: 97%
car: 98%
car: 96%
car: 98%
car: 99%
car: 99%

darknet detector test cfg/vechle.data cfg/vechle.cfg backup/vechle_best.weights -ext_output /workspace/vechcle/VOCdevkit/VOC2007_DEST/JPEGImages/000004.jpg -thresh 0.25
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
Create CUDA-stream - 0 
 Create cudnn-handle 0 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 

 seen 64, trained: 918 K-images (14 Kilo-batches_64) 
 Detection layer: 139 - type = 28 
 Detection layer: 150 - type = 28 
 Detection layer: 161 - type = 28 
/workspace/vechcle/VOCdevkit/VOC2007_DEST/JPEGImages/000004.jpg: Predicted in 19.375000 milli-seconds.
car: 89%	(left_x:   -0   top_y:  323   width:   17   height:   20)
car: 77%	(left_x:   12   top_y:  309   width:   75   height:   56)
truck: 49%	(left_x:   15   top_y:  310   width:   68   height:   53)
truck: 28%	(left_x:   54   top_y:  307   width:   29   height:   22)
car: 33%	(left_x:   78   top_y:  327   width:   13   height:   15)
car: 97%	(left_x:   85   top_y:  326   width:   29   height:   23)
car: 98%	(left_x:  108   top_y:  327   width:   35   height:   25)
car: 96%	(left_x:  137   top_y:  322   width:   50   height:   36)
car: 98%	(left_x:  173   top_y:  326   width:   78   height:   37)
car: 99%	(left_x:  229   top_y:  329   width:  103   height:   45)
car: 99%	(left_x:  362   top_y:  327   width:  134   height:   62)

darknet detector demo cfg/vechle.data cfg/vechle.cfg backup/vechle_best.weights test.mp4 -out_filename result.avi
darknet detector demo cfg/vechle.data cfg/vechle.cfg backup/vechle_best.weights -c 0 -out_filename result.avi
darknet detector demo cfg/vechle.data cfg/vechle.cfg backup/vechle_best.weights -dont_show -ext_output < cfg/val.txt > result.txt
Process is interrupted.
这个工作暂时也不做了,不难,主要是应用darknet 进行图片推理,然后获得结果,最后利用coco转voc中找码转成xml即可
UsageError: Line magic function `%ºsh` not found.

4.2 python对图片和视频的推理

4.2.1 图片推理

import os
import cv2
import numpy as np
import random
import sys
import infer
# sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import darknet
class DetectImage:
    def __init__(self, dataPath, configPath, weightPath, namesPath, gpu_id=0):
        :param metaPath:   ***.data 存储各种参数
        :param configPath: ***.cfg  网络结构文件
        :param weightPath: ***.weights yolo的权重
        :param namesPath:  ***.names中的names路径,这里是便于读取使用
        if not os.path.exists(configPath):
            raise(ValueError("Invalid config path {}".format(os.path.abspath(configPath))))
        if not os.path.exists(weightPath):
            raise(ValueError("Invalid weight path {}".format(os.path.abspath(weightPath))))
        if not os.path.exists(dataPath):
            raise(ValueError("Invalid data file path {}".format(os.path.abspath(dataPath))))
        if not isinstance(gpu_id,int):
            raise(ValueError("Invalid gpu id {}".format(gpu_id)))
        # 设置gpu_id
        # 网络,各种参数,分类名称,分类对应的颜色,batch_size=1,单张图片进行处理
        self.network, self.class_names, self.class_colors = darknet.load_network(
    def predict_image(self, image, thresh=0.25, is_show=False, save_path=''):
        :param image:    cv2.imread 图像, darknet自己会对图像进行预处理
        :param thresh:   置信度阈值, 其它阈值不变
        :param is_show:  是否将画框之后的图像返回
        :param save_path: 画框后的保存路径
        :return:         返回1个矩阵
        # bgr->rgb
        rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 获取图片大小,网络输入大小
        height, width = rgb_img.shape[:2]
        network_width = darknet.network_width(self.network)
        network_height = darknet.network_height(self.network)

        darknet_image = darknet.make_image(network_width, network_height, 3)
        image_resized = cv2.resize(rgb_img, (network_width, network_height),
        darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
        detections = darknet.detect_image(self.network, self.class_names, darknet_image, thresh=thresh)
#         image = darknet.draw_boxes(detections, image_resized, class_colors)
        for label, confidence, bbox in detections:
            x,y,w,h = bbox
            # 获取在原图中坐标
            x *= width / network_width
            w *= width / network_width
            y *= height / network_height
            h *= height / network_height
        if is_show:
            image = darknet.draw_boxes(origin_image_detections, rgb_img, self.class_colors)
            bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            # 保存图像
            if save_path:
                cv2.imwrite(save_path, bgr_img)
            return image  #返回画框的rgb图像
        return origin_image_detections
# if __name__=="__main__":

#     detect = DetectImage(dataPath=r'./cfg/vechcle.data',
#                configPath=r'./cfg/vechcle.cfg',
#                weightPath=r'./backup/vechcle_best.weights',
#                namesPath=r'./cfg/vechcle.names',
#                gpu_id=1)

#     detections = detect.predict_image(image)
#     darknet.print_detections(detections,True)
#     image = cv2.imread(r'./data/car.jpg', -1)
#     new_image=detect.predict_image(image, is_show=True,save_path='./data/pred.jpg')
# # detections = detect.predict_image(image)
# # darknet.print_detections(detections,True)

import os
import cv2
import numpy as np
import random
import sys
import infer
# sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import darknet
import threading

class DetectImage:
    _instance_lock = threading.Lock()
    def __init__(self, dataPath, configPath, weightPath, namesPath, gpu_id=0):
        :param metaPath:   ***.data 存储各种参数
        :param configPath: ***.cfg  网络结构文件
        :param weightPath: ***.weights yolo的权重
        :param namesPath:  ***.names中的names路径,这里是便于读取使用
        if not os.path.exists(configPath):
            raise(ValueError("Invalid config path {}".format(os.path.abspath(configPath))))
        if not os.path.exists(weightPath):
            raise(ValueError("Invalid weight path {}".format(os.path.abspath(weightPath))))
        if not os.path.exists(dataPath):
            raise(ValueError("Invalid data file path {}".format(os.path.abspath(dataPath))))
        if not isinstance(gpu_id,int):
            raise(ValueError("Invalid gpu id {}".format(gpu_id)))
        # 设置gpu_id
        # 网络,各种参数,分类名称,分类对应的颜色,batch_size=1,单张图片进行处理
        self.network, self.class_names, self.class_colors = darknet.load_network(
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, "__instance"):
            with DetectImage._instance_lock:
                if not hasattr(cls, "_instance"):
                    DetectImage._instance = super().__new__(cls)
        return DetectImage._instance
    def predict_image(self, image, thresh=0.25, is_show=False, save_path=''):
        :param image:    cv2.imread 图像, darknet自己会对图像进行预处理
        :param thresh:   置信度阈值, 其它阈值不变
        :param is_show:  是否将画框之后的图像返回
        :param save_path: 画框后的保存路径
        :return:         返回1个矩阵
        # bgr->rgb
        rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 获取图片大小,网络输入大小
        height, width = rgb_img.shape[:2]
        network_width = darknet.network_width(self.network)
        network_height = darknet.network_height(self.network)

        darknet_image = darknet.make_image(network_width, network_height, 3)
        image_resized = cv2.resize(rgb_img, (network_width, network_height),
        darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
        detections = darknet.detect_image(self.network, self.class_names, darknet_image, thresh=thresh)
#         image = darknet.draw_boxes(detections, image_resized, class_colors)
        for label, confidence, bbox in detections:
            x,y,w,h = bbox
            # 获取在原图中坐标
            x *= width / network_width
            w *= width / network_width
            y *= height / network_height
            h *= height / network_height
        if is_show:
            image = darknet.draw_boxes(origin_image_detections, rgb_img, self.class_colors)
            bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            # 保存图像
            if save_path:
                cv2.imwrite(save_path, bgr_img)
            return image  #返回画框的rgb图像
        return origin_image_detections

detect = DetectImage(dataPath=r'./cfg/vechcle.data',
image = cv2.imread(r'./data/car.jpg', -1)
new_image=detect.predict_image(image, is_show=True,save_path='./data/pred2.jpg')
# detections = detect.predict_image(image)
# darknet.print_detections(detections,True)

import matplotlib.pyplot as plt


4.2.2 视频推理

import infer
import cv2
import darknet
import os
import time
import random

class DetectVideo:
    def __init__(self,args):
        if not  isinstance(self.args,edict):
            print("""args is not a valid edict,please check:\n
            args.input type:str default=0\n
            args.out_filename type:str default= \n
            args.weights,default=yolov4.weights \n
            args.dont_show windown inference display. For headless systems \n
            args.ext_output display bbox coordinates of detected objects \n
            args.config_file "./cfg/yolov4.cfg path to config file \n
            args.data_file ./cfg/coco.data path to data file \n
            args.thresh 0.25 remove detections with confidence below this value\n
            args.gpu_id 1 int select use which gpu do inferece""")
            # 设置gpu_id
            self.network, self.class_names, self.class_colors = darknet.load_network(
            self.width = darknet.network_width(self.network)
            self.height = darknet.network_height(self.network)
            input_path = self.str2int(self.args.input)
            self.cap = cv2.VideoCapture(input_path)
    def str2int(self,video_path):
        argparse returns and string althout webcam uses int (0, 1 ...)
        Cast to int if needed
            return int(video_path)
        except ValueError:
            return video_path
    def check_arguments_errors(self,args):
        assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
        if not os.path.exists(args.config_file):
            raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
        if not os.path.exists(args.weights):
            raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
        if not os.path.exists(args.data_file):
            raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
        if self.str2int(args.input) == str and not os.path.exists(args.input):
            raise(ValueError("Invalid video path {}".format(os.path.abspath(args.input))))
    def set_saved_video(self,input_video, output_video, size):
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        fps = int(input_video.get(cv2.CAP_PROP_FPS))
        video = cv2.VideoWriter(output_video, fourcc, fps, size)
        return video
    def video_capture(self):
        while self.cap.isOpened():
            ret, frame = self.cap.read()
            if not ret:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            height, width = frame_rgb.shape[:2]
            if i==0:
                video = self.set_saved_video(self.cap, self.args.out_filename, (width, height))
            frame_resized = cv2.resize(frame_rgb, (self.width, self.height),

            img_for_detect = darknet.make_image(self.width, self.height, 3)
            darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())
            prev_time = time.time()
            detections = darknet.detect_image(self.network, self.class_names, img_for_detect, thresh=self.args.thresh)
            fps = int(1/(time.time() - prev_time))
            print("FPS: {}".format(fps))
            for label, confidence, bbox in detections:
                x,y,w,h = bbox
                # 获取在原图中坐标
                x *= width / self.width
                w *= width / self.width
                y *= height / self.height
                h *= height / self.height
            image = darknet.draw_boxes(origin_image_detections, frame_rgb, self.class_colors)
            bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            if self.args.out_filename is not None:
            if not self.args.dont_show:
                cv2.imshow('Inference', bgr_img)
            if cv2.waitKey(fps) == 27:
from easydict import EasyDict as edict
# from infer.detect_video import DetectVideo 

detectv = DetectVideo(args)

!python darknet_video.py ,以下代码为darknet_video.py中内容,但是在jupyter中没跑通

import random
import infer
import os
import cv2
import time
import darknet
import argparse
from threading import Thread, enumerate
from queue import Queue
from easydict import EasyDict as edict

def parser():
    parser = argparse.ArgumentParser(description="YOLO Object Detection")
    parser.add_argument("--input", type=str, default=0,
                        help="video source. If empty, uses webcam 0 stream")
    parser.add_argument("--out_filename", type=str, default="",
                        help="inference video name. Not saved if empty")
    parser.add_argument("--weights", default="yolov4.weights",
                        help="yolo weights path")
    parser.add_argument("--dont_show", action='store_true',
                        help="windown inference display. For headless systems")
    parser.add_argument("--ext_output", action='store_true',
                        help="display bbox coordinates of detected objects")
    parser.add_argument("--config_file", default="./cfg/yolov4.cfg",
                        help="path to config file")
    parser.add_argument("--data_file", default="./cfg/coco.data",
                        help="path to data file")
    parser.add_argument("--thresh", type=float, default=.25,
                        help="remove detections with confidence below this value")
    return parser.parse_args()

def str2int(video_path):
    argparse returns and string althout webcam uses int (0, 1 ...)
    Cast to int if needed
        return int(video_path)
    except ValueError:
        return video_path

def check_arguments_errors(args):
    assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
    if not os.path.exists(args.config_file):
        raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
    if not os.path.exists(args.weights):
        raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
    if not os.path.exists(args.data_file):
        raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
    if str2int(args.input) == str and not os.path.exists(args.input):
        raise(ValueError("Invalid video path {}".format(os.path.abspath(args.input))))

def set_saved_video(input_video, output_video, size):

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    fps = int(input_video.get(cv2.CAP_PROP_FPS))
#     print(f"fps{fps} size {size}")
    video = cv2.VideoWriter(output_video, fourcc, fps, size)
    return video

def video_capture(frame_queue, darknet_image_queue):
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height),
#         frame_queue.put(frame_resized)
        img_for_detect = darknet.make_image(width, height, 3)
        darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())

def inference(darknet_image_queue, detections_queue, fps_queue):
    while cap.isOpened():
        darknet_image = darknet_image_queue.get()
        prev_time = time.time()
        detections = darknet.detect_image(network, class_names, darknet_image, thresh=args.thresh)
        for label, confidence, bbox in detections:
            x,y,w,h = bbox
#             # 获取在原图中坐标
            x *= img_width / width
            w *= img_width / width
            y *= img_height / height
            h *= img_height / height
        fps = int(1/(time.time() - prev_time))
        print("FPS: {}".format(fps))
#         darknet.print_detections(detections, args.ext_output)

def drawing(frame_queue, detections_queue, fps_queue):
    random.seed(3)  # deterministic bbox colors
    video = set_saved_video(cap, args.out_filename, (int(img_width), int(img_height)))
    while cap.isOpened():
        frame_rgb = frame_queue.get()
        detections = detections_queue.get()
        fps = fps_queue.get()
        if frame_rgb is not None:
            image = darknet.draw_boxes(detections, frame_rgb, class_colors)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if args.out_filename is not None:
#                 print("drawing ...,img.shape",image.shape)
            if not args.dont_show:
                cv2.imshow('Inference', image)
            if cv2.waitKey(fps) == 27:

if __name__ == '__main__':

    frame_queue = Queue() #存放resized 后图片
    darknet_image_queue = Queue(maxsize=1)
    detections_queue = Queue(maxsize=1)
    fps_queue = Queue(maxsize=1)

#     args = parser()
#     args = parser()
    network, class_names, class_colors = darknet.load_network(
    width = darknet.network_width(network)
    height = darknet.network_height(network)
    input_path = str2int(args.input)
    cap = cv2.VideoCapture(input_path)
    img_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH )
    img_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()
    Thread(target=inference, args=(darknet_image_queue, detections_queue, fps_queue)).start()
    Thread(target=drawing, args=(frame_queue, detections_queue, fps_queue)).start()

fps25 size (1280, 720)
frame_queue = Queue() #存放resized 后图片
darknet_image_queue = Queue(maxsize=1)
detections_queue = Queue(maxsize=1)
fps_queue = Queue(maxsize=1)

#     args = parser()

#     args = parser()
network, class_names, class_colors = darknet.load_network(

width = darknet.network_width(network)
height = darknet.network_height(network)
input_path = str2int(args.input)
cap = cv2.VideoCapture(input_path)
img_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH )
img_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()
Thread(target=inference, args=(darknet_image_queue, detections_queue, fps_queue)).start()
Thread(target=drawing, args=(frame_queue, detections_queue, fps_queue)).start()

fps25 size (1280, 720)
4.2.3 显示视频

from IPython.display import clear_output,  display, HTML
from PIL import Image
import matplotlib.pyplot as plt
import time
import cv2
import base64

current_time = 0

# 图像处理函数
def processImg(img):
 # 画出一个框
#     cv2.rectangle(img, (500, 300), (800, 400), (0, 0, 255), 5, 1, 0)
 # 上下翻转
 # img= cv2.flip(img, 0)

 # 显示FPS
    global current_time
    if current_time == 0:
        current_time = time.time()
        last_time = current_time
        current_time = time.time()
        fps = 1. / (current_time - last_time)
        text = "FPS: %d" % int(fps)
        cv2.putText(img, text , (0,100), cv2.FONT_HERSHEY_TRIPLEX, 3.65, (255, 0, 0), 2)
#     img = cv2.resize(img,(1080,1080))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    return img

def arrayShow(imageArray):
#     ret, png = cv2.imencode('.png', imageArray)
#     encoded = base64.b64encode(png)
#     return Image(data=encoded.decode('ascii'))
    return Image.fromarray(imageArray)

video = cv2.VideoCapture("./data/road.mp4")
        ret, frame = video.read()
        if not ret:
        lines, columns, _ = frame.shape
        frame = processImg(frame)
        frame = cv2.resize(frame, (int(columns / small), int(lines / small)))

        img = arrayShow(frame)

        # 控制帧率
    except KeyboardInterrupt:




5.1 yolov4模型测试方法一



darknet detector map cfg/vechcle.data cfg/yolov4_vechcle.cfg backup/yolov4_vechcle_best.weights
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
Create CUDA-stream - 0 
 Create cudnn-handle 0 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 

 seen 64, trained: 424 K-images (6 Kilo-batches_64) 

 calculation mAP (mean average precision)...
 Detection layer: 139 - type = 28 
 Detection layer: 150 - type = 28 
 Detection layer: 161 - type = 28 

 detections_count = 36911, unique_truth_count = 4058  
class_id = 0, name = bicycle, ap = 64.76%   	 (TP = 230, FP = 99) 
class_id = 1, name = bus, ap = 86.69%   	 (TP = 270, FP = 67) 
class_id = 2, name = car, ap = 74.02%   	 (TP = 1739, FP = 1002) 
class_id = 3, name = motorbike, ap = 73.81%   	 (TP = 350, FP = 140) 
class_id = 4, name = truck, ap = 64.48%   	 (TP = 326, FP = 263) 

 for conf_thresh = 0.25, precision = 0.65, recall = 0.72, F1-score = 0.68 
 for conf_thresh = 0.25, TP = 2915, FP = 1571, FN = 1143, average IoU = 51.59 % 

 IoU threshold = 50 %, used Area-Under-Curve for each unique Recall 
 mean average precision (mAP@0.50) = 0.727514, or 72.75 % 

Set -points flag:
 `-points 101` for MS COCO 
 `-points 11` for PascalVOC 2007 (uncomment `difficult` in voc.data) 
 `-points 0` (AUC) for ImageNet, PascalVOC 2010-2012, your custom dataset

 CUDA-version: 10010 (11020), cuDNN: 7.6.5, GPU count: 8  
 OpenCV version: 4.5.0
 0 : compute_capability = 700, cudnn_half = 0, GPU: Tesla V100-SXM2-32GB 
darknet detector map cfg/vechcle.data cfg/yolov4_vechcle.cfg backup/yolov4_vechcle_best.weights -iou_thresh 0.75
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
Create CUDA-stream - 0 
 Create cudnn-handle 0 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 

 seen 64, trained: 424 K-images (6 Kilo-batches_64) 

 calculation mAP (mean average precision)...
 Detection layer: 139 - type = 28 
 Detection layer: 150 - type = 28 
 Detection layer: 161 - type = 28 

 detections_count = 36911, unique_truth_count = 4058  
class_id = 0, name = bicycle, ap = 29.88%   	 (TP = 132, FP = 197) 
class_id = 1, name = bus, ap = 71.45%   	 (TP = 241, FP = 96) 
class_id = 2, name = car, ap = 44.90%   	 (TP = 1221, FP = 1520) 
class_id = 3, name = motorbike, ap = 39.21%   	 (TP = 221, FP = 269) 
class_id = 4, name = truck, ap = 37.34%   	 (TP = 239, FP = 350) 

 for conf_thresh = 0.25, precision = 0.46, recall = 0.51, F1-score = 0.48 
 for conf_thresh = 0.25, TP = 2054, FP = 2432, FN = 2004, average IoU = 39.15 % 

 IoU threshold = 75 %, used Area-Under-Curve for each unique Recall 
 mean average precision (mAP@0.75) = 0.445590, or 44.56 % 

Set -points flag:
 `-points 101` for MS COCO 
 `-points 11` for PascalVOC 2007 (uncomment `difficult` in voc.data) 
 `-points 0` (AUC) for ImageNet, PascalVOC 2010-2012, your custom dataset

 CUDA-version: 10010 (11020), cuDNN: 7.6.5, GPU count: 8  
 OpenCV version: 4.5.0
 0 : compute_capability = 700, cudnn_half = 0, GPU: Tesla V100-SXM2-32GB 
5.2 yolov4模型性能测试方法二


darknet detector valid cfg/vechcle.data cfg/yolov4_vechcle.cfg backup/yolov4_vechcle_best.weights
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
Create CUDA-stream - 0 
 Create cudnn-handle 0 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 

 seen 64, trained: 424 K-images (6 Kilo-batches_64) 
 Detection layer: 139 - type = 28 
 Detection layer: 150 - type = 28 
 Detection layer: 161 - type = 28 

 CUDA-version: 10010 (11020), cuDNN: 7.6.5, GPU count: 8  
 OpenCV version: 4.5.0
results: Using default 'results'
 0 : compute_capability = 700, cudnn_half = 0, GPU: Tesla V100-SXM2-32GB 
eval: Using default 'voc'
Total Detection Time: 42.000000 Seconds
ls results
head -n 15 ./results/comp4_det_test_bicycle.txt
000000401980 0.839656 1.000000 207.924377 362.778381 427.000000
000000401980 0.617458 343.060028 215.706848 640.000000 427.000000
000000401980 0.137739 365.956360 266.341797 632.603455 427.000000
000000401980 0.048878 1.000000 113.423553 373.908722 427.000000
000000401980 0.027138 258.326660 183.985229 640.000000 427.000000
000000401980 0.005769 362.092041 282.595978 558.476379 422.788727
000000401980 0.005717 399.438232 134.345291 635.504150 427.000000
000000401980 0.004526 379.546814 323.268311 616.657593 427.000000
000000401980 0.002933 381.449188 354.378296 602.514404 427.000000
000000401980 0.002256 371.549042 362.315216 533.911743 427.000000
000000401980 0.002217 345.463776 288.028381 503.355743 419.107727
000000401980 0.001431 357.871887 364.965546 492.497253 427.000000
000000401980 0.001198 344.290649 252.155457 584.557373 409.533447
000000401980 0.001176 396.900513 374.411926 597.720337 426.435547
000000401980 0.001167 448.817688 215.300049 640.000000 427.000000

以上结果为:文件名 置信度 x y w h

然后将reval_voc_py3.py 和 voc_eval_py3.py放到和results同一级,具体内容要看源代码,然后执行以下代码,当然,我们的数据放到多个文件中,所以呢想要用这份代码需要改一下,暂时没必要

python reval_voc_py.py --voc_dir VOCdevkit --year 2007 --image_set test --class ./cfg/vechcle.names --output_dir test

年份为数据集文件夹时间 VOC2012
验证集文件名test.txt \VOC2017\ImageSets\Main
类名文件 my_Data.names
输出文件夹名 test
获重结果后可以画Pr曲线,具体参见https://blog.csdn.net/weixin_45702256/article/details/119321994 这里就不再多做了,有需要再写

6、Anchor Box先验框聚类分析与修改


6.1 使用k-means聚类获得自己数据集的先验框大小

darknet detector calc_anchors cfg/vechcle.data -num_of_clusters 9 -width 608 -height 608
IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

 CUDA-version: 10010 (11020), cuDNN: 7.6.5, GPU count: 8  
 OpenCV version: 4.5.0

以上命令可以放到shell中执行,结果大致如下,聚类结果保存在anchors.txt中,还有一个txt文档是counters_per_class.txt是用来保存每个分类有多少个框的,calc_anchors 计算是采用随机数初始的, 每次计算出的结果不太一样,可以多计算几次,从中选一个

root@884f85fe676b:/workspace/yolo_demo# darknet detector calc_anchors cfg/vechcle.data -num_of_clusters 9 -width 608 -height 608
 CUDA-version: 10010 (11020), cuDNN: 7.6.5, GPU count: 8
 OpenCV version: 4.5.0

 num_of_clusters = 9, width = 608, height = 608
 read labels from 24710 images
 loaded          image: 24710    box: 83514
 all loaded.

 calculating k-means++ ...

 iterations = 148

counters_per_class = 8488, 7101, 48015, 10024, 9886

 avg IoU = 60.70 %

Saving anchors to the file: anchors.txt
anchors =  21, 20,  48, 51, 114, 71,  75,150, 183,141, 174,287, 378,205, 342,387, 532,467

也可以通过python脚本来获取聚类值,gen_anchors.py有1个要修改的地方 第17~20行,改成自己的数值(416,608,832),本代码运行时间较长

python gen_anchors.py -filelist cfg/train.txt -output_dir ./ -num_clusters 9
centroids.shape (9, 2)

0.66,0.64, 1.59,1.52, 2.14,4.03, 3.99,2.43, 4.34,6.73, 7.86,10.79, 8.56,4.66, 14.03,9.26, 15.79,15.51

6.2 修改cfg中anchors大小,然后进行重新训练




tree -L 1 ./
├── anchors9.txt
├── anchors.txt
├── backup
├── cfg
├── chart_enetb0_vechcle.png
├── chart.png
├── chart_yolov3_tiny_vechcle.png
├── chart_yolov3_vechcle_mosaic.png
├── chart_yolov3_vechcle.png
├── chart_yolov4_csp_swish_vechcle.png
├── chart_yolov4_csp_vechcle.png
├── chart_yolov4_csp_x_swish_vechcle.png
├── chart_yolov4_p5_vechcle.png
├── chart_yolov4_p6_vechcle.png
├── chart_yolov4_tiny_vechcle.png
├── chart_yolov4_vechcle.png
├── chart_yolov4x_mish_vechcle.png
├── COCO
├── core.105601
├── core.122098
├── core.123505
├── core.156773
├── core.159716
├── core.176745
├── core.23172
├── core.33626
├── core.60037
├── core.62810
├── counters_per_class.txt
├── darknet_video.py
├── data
├── data_prepare.ipynb
├── gen_anchors.py
├── infer
├── logs
├── model_prepare.ipynb
├── results
├── reval_voc_py3.py
├── train_yolov3_enet.sh
├── train_yolov3_mosaic.sh
├── train_yolov3.sh
├── train_yolov3_tiny.sh
├── train_yolov4_csp.sh
├── train_yolov4_csp_swish.sh
├── train_yolov4_csp_x_swish.sh
├── train_yolov4_p5.sh
├── train_yolov4_p6.sh
├── train_yolov4.sh
├── train_yolov4_tiny.sh
├── train_yolov4x_mish.sh
├── VOCdevkit
└── voc_eval_py3.py

8 directories, 44 files


tree -L 1 ./data
├── bus_station.mp4
├── car.jpg
├── labels
├── pred1.jpg
├── pred2.jpg
├── pred_bus_station.mp4
├── pred.jpg
├── pred.mp4
├── pred_road.mp4
├── pred_thred.mp4
└── road.mp4

1 directory, 10 files
tree -L 1 ./cfg
├── darknet53.conv.74
├── enetb0-coco.conv.132
├── enetb0_vechcle.cfg
├── libdarknet.so
├── train.txt
├── val.txt
├── vechcle.data
├── vechcle.names
├── yolov3-tiny.conv.11
├── yolov3_tiny_vechcle.cfg
├── yolov3_vechcle.cfg
├── yolov3_vechcle_mosaic.cfg
├── yolov4.conv.137
├── yolov4-csp.conv.142
├── yolov4-csp-swish.conv.164
├── yolov4_csp_swish_vechcle.cfg
├── yolov4_csp_vechcle.cfg
├── yolov4-csp-x-swish.conv.192
├── yolov4_csp_x_swish_vechcle.cfg
├── yolov4-p5.conv.232
├── yolov4_p5_vechcle.cfg
├── yolov4-p6.conv.289
├── yolov4_p6_vechcle.cfg
├── yolov4-tiny.conv.29
├── yolov4_tiny_vechcle.cfg
├── yolov4_vechcle.cfg
├── yolov4x-mish.conv.166
└── yolov4x_mish_vechcle.cfg

0 directories, 28 files
tree -L 1 ./logs
├── yolov3_enet.log
├── yolov3.log
├── yolov3_mosaic.log
├── yolov3_tiny.log
├── yolov4_csp.log
├── yolov4_csp_swish.log
├── yolov4_csp_x_swish.log
├── yolov4.log
├── yolov4_p5.log
├── yolov4_p6.log
├── yolov4_tiny.log
└── yolov4x_mish.log

0 directories, 12 files



一,概述 目标检测算法主要分为两个大的方向:单阶段检测器/双阶段检测器,其对应的代表性算法分别是Faster-rcnn和Yolo。而随着目标检测性能的大幅度提升,这个领域的门槛变得很高&#