目标检测 YOLOv5 - 损失函数的改进

flyfish

完整代码下载地址
该改进源码完全兼容原版的YOLOv5:v5版本，同时backbone支持mobilenetv3,shufflenetv2，原有的backbone全部支持等等

类别有包含关系的例如一个目标可以是人，男人，也有互斥关系的，一个类别例如人，猫，狗。在数据集的类别是互斥关系下尝试损失函数的改进

类别是包含关系的

BCEWithLogitsLoss 可以用于多标签分类的，一个目标可以属于一个或者多个类别，例如一个目标可以是人，男人，儿童，类别存在一种包括关系。
因为BCEWithLogitsLoss = Sigmoid + BCELoss，BCEWithLogitsLoss将Sigmoid加入了损失函数中。Sigmoid概率和不需要是1。
例如sigmoid的计算结果取出一行看示例代码中的输出[0.5100, 0.6713, 0.5025]这个数累加起来不是1，如果定义阈值大于等于0.50。那么这个目标同时属于三个类，结果如果要求只属于一个类，可以取最大的那个。

类别是互斥关系的

如果检测的类别，类别是互斥关系，例如人，猫，狗这种互斥关系，如何改造呢？
CrossEntropyLoss = LogSoftmax + NLLLoss
Softmax概率和是1或者说接近1。Softmax 大值比其他值具有更大的概率。Sigmoid数值大则概率大，但概率不会比另一个数值的概率更大。
看示例代码中的输出[0.2543, 0.4990, 0.2467]这三个数加起来和是1。

Sigmoid和Softmax 示例代码

import torch
import torch.nn as nn

input = torch.Tensor([[0.0402, 0.7142,0.01],
        [0.2214, 0.4781,0.01]])

net1 = nn.Sigmoid()
output1 = net1(input)
print(output1)
# tensor([[0.5100, 0.6713, 0.5025],
#         [0.5551, 0.6173, 0.5025]])
net2 = nn.Softmax(dim=-1)
output2 = net2(input)
print(output2)
# tensor([[0.2543, 0.4990, 0.2467],
#         [0.3224, 0.4167, 0.2609]])

Softmax是互斥关系，那么是尝试使用交叉熵损失改造下看看。

更改代码如下或者直接到这里YOLOv5-ShuffleNetV2-CrossEntropyLoss下载全部代码

训练阶段

utils/loss.py

class ComputeLoss:
    # Compute losses
    def __init__(self, model, autobalance=False):
        super(ComputeLoss, self).__init__()
        device = next(model.parameters()).device  # get model device
        h = model.hyp  # hyperparameters

        # Define criteria
       

        #changed by Sisyphus

        BCEcls = nn.CrossEntropyLoss()
        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))

        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
        print("self.cp, self.cn：",self.cp,":", self.cn)

        # Focal loss
        g = h['fl_gamma']  # focal loss gamma
        if g > 0:
            BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

        det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
        for k in 'na', 'nc', 'nl', 'anchors':
            setattr(self, k, getattr(det, k))

    def __call__(self, p, targets):  # predictions, targets, model
        device = targets.device
        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
        tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets

        # Losses
        for i, pi in enumerate(p):  # layer index, layer predictions
            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
            print("indices[i] :",indices[i].shape )
            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj

            n = b.shape[0]  # number of targets
            if n:
                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets

                # Regression
                pxy = ps[:, :2].sigmoid() * 2. - 0.5
                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                pbox = torch.cat((pxy, pwh), 1)  # predicted box
                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
                lbox += (1.0 - iou).mean()  # iou loss

                # Objectness
                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio

                # Classification
                if self.nc > 1:  # cls loss (only if multiple classes)
                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
                    t[range(n), tcls[i]] = self.cp
                    #lcls += self.BCEcls(ps[:, 5:], t)  # BCE
                    #changed by Sisyphus 20210914
                    lcls += self.BCEcls(ps[:, 5:], tcls[i].clone().detach()) 

                # Append targets to text file
                # with open('targets.txt', 'a') as file:
                #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

            obji = self.BCEobj(pi[..., 4], tobj)
            lobj += obji * self.balance[i]  # obj loss
            if self.autobalance:
                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()

        if self.autobalance:
            self.balance = [x / self.balance[self.ssi] for x in self.balance]
        lbox *= self.hyp['box']
        lobj *= self.hyp['obj']
        lcls *= self.hyp['cls']
        bs = tobj.shape[0]  # batch size

        loss = lbox + lobj + lcls
        return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()

推理阶段

models/yolo.py

class Detect(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                tmp = x[i][...,5:]# add by Sisyphus 
                tmp = tmp.softmax(dim=-1)
                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                y[...,5:] = tmp
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()