目标检测 YOLOv5 - 学习率

目标检测 YOLOv5 - 学习率

flyfish

YOLOv5的学习率有3个
先用简单代码可视化下学习率大概的样子,然后再可视化三个学习率。

学习率大概的样子

from torch.optim import SGD
import torch.optim.lr_scheduler as lr_scheduler
from torchvision.models import shufflenet_v2_x0_5 as testmodel
import math
import matplotlib.pyplot as plt

epochs=300

def one_cycle(y1=0.0, y2=1.0, steps=100):
    return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1

model = testmodel()
optimizer = SGD(model.parameters(), lr=0.001)
lf = one_cycle(1, 0.2, epochs)
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
lr_t=[]
epoch_t=[]
for epoch in range(epochs):
    optimizer.zero_grad()
    scheduler.step()
    lr=optimizer.param_groups[0]['lr']
    print("learning rate: {:.6f}".format(optimizer.param_groups[0]['lr']))
    lr_t.append(lr)
    epoch_t.append(epoch)

plt.figure()
plt.plot(epoch_t, lr_t, color="r",label='learning rate')
plt.legend()
plt.show()

在这里插入图片描述

三个学习率的样子

#关于以下配置内容在从hyp.scrach.yaml
# lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
# lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
# momentum: 0.937  # SGD momentum/Adam beta1
# weight_decay: 0.0005  # optimizer weight decay 5e-4
# warmup_epochs: 3.0  # warmup epochs (fractions ok)
# warmup_momentum: 0.8  # warmup initial momentum
# warmup_bias_lr: 0.1  # warmup initial bias lr

from torch.optim import SGD
import torch.optim.lr_scheduler as lr_scheduler
from torchvision.models import shufflenet_v2_x0_5 as testmodel
import math
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn

lr0 = 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf =  0.2  # final OneCycleLR learning rate (lr0 * lrf)
momentum = 0.937  # SGD momentum/Adam beta1
weight_decay = 0.0005  # optimizer weight decay 5e-4
warmup_epochs = 3.0  # warmup epochs (fractions ok)
warmup_momentum = 0.8  # warmup initial momentum
warmup_bias_lr = 0.1  # warmup initial bias lr

epochs = 300
nb = 200 # nb是batch的个数,number of batches = 样本个数 /batch_size
nw = 1000 #nw = max(round(hyp['warmup_epochs'] * nb), 1000) #number of warmup iterations

def one_cycle(y1=0.0, y2=1.0, steps=100):
    # lambda function for sinusoidal ramp from y1 to y2
    return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1

model = testmodel()

pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
for k, v in model.named_modules():
    if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
        pg2.append(v.bias)  # biases
    if isinstance(v, nn.BatchNorm2d):
        pg0.append(v.weight)  # no decay
    elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
        pg1.append(v.weight)  # apply decay

optimizer = SGD(pg0, lr=lr0, momentum=momentum, nesterov=True)
optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay})  # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2})  # add pg2 (biases)

lf = one_cycle(1, lrf, epochs)
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

lr0,lr1,lr2 ,epoch_t= [], [], [],[]
optimizer.zero_grad()
for epoch in range(0,epochs):
    for i in range(nb):
        ni = i + nb * epoch
        if ni <= nw:#warmpup 预热阶段
            xi = [0, nw]
            for j, x in enumerate(optimizer.param_groups):
                x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, 0.01 * lf(epoch)])
                if 'momentum' in x:
                    x['momentum'] = np.interp(ni, xi, [0.8, 0.937])




    lr = [x['lr'] for x in optimizer.param_groups]
    lr0.append(lr[0])
    lr1.append(lr[1])
    lr2.append(lr[2])

    scheduler.step()
    epoch_t.append(epoch)
# 使用plt.subplot来创建小图.
# plt.subplot(221)表示将整个图像窗口分为2行2列, 当前位置为1.
# plt.subplot(222)表示将整个图像窗口分为2行2列, 当前位置为2.
# plt.subplot(223)表示将整个图像窗口分为2行2列, 当前位置为3.
plt.figure()
plt.subplot(221)
plt.plot(epoch_t, lr0, color="r",label='learning rate 0')
plt.legend()
print(lr0)
plt.subplot(222)
plt.plot(epoch_t, lr1, color="b",label='learning rate 1')
plt.legend()

plt.subplot(223)
plt.plot(epoch_t, lr2,color="g",label='learning rate 2')
plt.legend()

plt.show()

在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

版权声明:本文为CSDN博主「TheOldManAndTheSea」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/flyfish1986/article/details/119875059

TheOldManAndTheSea

我还没有学会写个人说明!

暂无评论

发表评论

相关推荐

目标检测YOLO系列------YOLO简介

YOLO以及各种变体已经广泛应用于目标检测算法所涉及到的方方面面,为了梳理YOLO系列算法建立YOLO系列专题,按照自己的理解讲解YOLO中的知识点和自己的一些思考。本文是开篇之作,首先简单介绍一下YO

目标检测算法(YOLOv1)

目标检测算法(YOLOv1) 论文题目:You Only Look Once: Unified, Real-Time Object Detection 网络架构 YOLOv1的模型架构参考GoogleNet,一共有24个卷积层,2个全连接

Day 14 - 安装与执行 YOLO

Day 14 - 安装与执行 YOLO 在 介绍影像辨识的处理流程 - Day 10 有提到 YOLO 模型是由 Joseph Redmon 所提出,而到了 YOLOV4 后才换成另外一群人继续发展,