detectron2的模型配置文件修改


一、cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml

_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
  MASK_ON: True
  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
  RESNETS:
#resnetX的配置
    STRIDE_IN_1X1: False  # this is a C2 model
    NUM_GROUPS: 32
    WIDTH_PER_GROUP: 8
#resnet的深度
    DEPTH: 152
#旋转卷积的开启
    DEFORM_ON_PER_STAGE: [False, True, True, True]
  ROI_HEADS:
    NAME: "CascadeROIHeads"
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_CONV: 4
    NUM_FC: 1
#使用GN在FastRCNNConvFCHead
    NORM: "GN"
    CLS_AGNOSTIC_BBOX_REG: True
  ROI_MASK_HEAD:
    NUM_CONV: 8
#使用GN在ROI_MASK_HEAD
    NORM: "GN"
  RPN:
    POST_NMS_TOPK_TRAIN: 2000
SOLVER:
  IMS_PER_BATCH: 128
  STEPS: (35000, 45000)
  MAX_ITER: 50000
  BASE_LR: 0.16
INPUT:
  MIN_SIZE_TRAIN: (640, 864)
#range为短边随机resize到(640, 864)内,同时长边不超过MAX_SIZE_TRAIN,
#若超过则为MAX_SIZE_TRAIN且按比例缩小为MIN_SIZE_TRAIN。
#choice为随机选择(640, 864)中的一个
  MIN_SIZE_TRAIN_SAMPLING: "range"
  MAX_SIZE_TRAIN: 1440
  CROP:
    ENABLED: True
TEST:
  EVAL_PERIOD: 2500

二、Base-RCNN-FPN.yaml

MODEL:
#总体结构组成
  META_ARCHITECTURE: "GeneralizedRCNN"
  #骨干网络选择
  BACKBONE:
    NAME: "build_resnet_fpn_backbone"
  RESNETS:
    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
  FPN:
    IN_FEATURES: ["res2", "res3", "res4", "res5"]
  ANCHOR_GENERATOR:
  #anchor的大小和缩放比例
    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
  RPN:
    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
    #选取的RPN框个数
    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
    # Detectron1 uses 2000 proposals per-batch,
    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
    POST_NMS_TOPK_TRAIN: 1000
    POST_NMS_TOPK_TEST: 1000
#ROI头的总体结构,IN_FEATURES为box,mask共用。
  ROI_HEADS:
    NAME: "StandardROIHeads"
    IN_FEATURES: ["p2", "p3", "p4", "p5"]
#ROI下box的子头
  ROI_BOX_HEAD:
    NAME: "FastRCNNConvFCHead"
    NUM_FC: 2
    POOLER_RESOLUTION: 7
#ROI下mask的子头
  ROI_MASK_HEAD:
    NAME: "MaskRCNNConvUpsampleHead"
    NUM_CONV: 4
    POOLER_RESOLUTION: 14
DATASETS:
  TRAIN: ("coco_2017_train",)
  TEST: ("coco_2017_val",)
SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
VERSION: 2

三、defaults.py

部分常用的配置参数

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .config import CfgNode as CN

_C = CN()

# The version number, to upgrade from old configs to new ones if any
# changes happen. It's recommended to keep a VERSION in your config file.
_C.VERSION = 2

_C.MODEL = CN()
#mask的开启
_C.MODEL.MASK_ON = False

#模型加载的路径
_C.MODEL.WEIGHTS = ""

# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = (800,)
# Sample size of smallest side by choice or random selection from range give by
# INPUT.MIN_SIZE_TRAIN
_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
_C.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333
# Mode for flipping images used in data augmentation during training
# choose one of ["horizontal, "vertical", "none"]
_C.INPUT.RANDOM_FLIP = "horizontal"

# `True` if cropping is used for data augmentation during training
_C.INPUT.CROP = CN({"ENABLED": False})

#if CROP.TYPE is "relative" or "relative_range" 
#裁减的范围(0, 1]
_C.INPUT.CROP.SIZE = [0.1, 0.5]

# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 4

# Tf True, when working on datasets that have instance annotations, the
# training dataloader will filter out images without associated annotations
_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True

# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.FPN = CN()

# Options: "" (no norm), "GN"
_C.MODEL.FPN.NORM = ""

# ---------------------------------------------------------------------------- #
# Anchor generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.ANCHOR_GENERATOR = CN()
# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
# Format: list
]. SIZES[i] specifies the list of sizes
# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true, # or len(SIZES) == 1 is true and size list SIZES[0] is used for all # IN_FEATURES. _C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]] # Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect # ratios are generated by an anchor generator. # Format: list
]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true, # or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used # for all IN_FEATURES. _C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] # Anchor angles. # list
], the angle in degrees, for each input feature map.
# ANGLES[i] specifies the list of angles for IN_FEATURES[i]. _C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]] # ---------------------------------------------------------------------------- # # ROI HEADS options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_HEADS = CN() # Number of foreground classes #除去背景的类别数 _C.MODEL.ROI_HEADS.NUM_CLASSES = 80 # ---------------------------------------------------------------------------- # # Box Head # ---------------------------------------------------------------------------- # _C.MODEL.ROI_BOX_HEAD = CN() # Options: "" (no norm), "GN", "SyncBN". _C.MODEL.ROI_BOX_HEAD.NORM = "" # ---------------------------------------------------------------------------- # # Mask Head # ---------------------------------------------------------------------------- # _C.MODEL.ROI_MASK_HEAD = CN() # Normalization method for the convolution layers. # Options: "" (no norm), "GN", "SyncBN". _C.MODEL.ROI_MASK_HEAD.NORM = "" # ---------------------------------------------------------------------------- # # RetinaNet Head # ---------------------------------------------------------------------------- # _C.MODEL.RETINANET = CN() # This is the number of foreground classes. _C.MODEL.RETINANET.NUM_CLASSES = 80 _C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] # Convolutions to use in the cls and bbox tower # NOTE: this doesn't include the last conv for logits _C.MODEL.RETINANET.NUM_CONVS = 4 # IoU overlap ratio [bg, fg] for labeling anchors. # Anchors with < bg are labeled negative (0) # Anchors with >= bg and < fg are ignored (-1) # Anchors with >= fg are labeled positive (1) _C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5] _C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1] # Prior prob for rare case (i.e. foreground) at the beginning of training. # This is used to set the bias for the logits layer of the classifier subnet. # This improves training stability in the case of heavy class imbalance. _C.MODEL.RETINANET.PRIOR_PROB = 0.01 # Inference cls score threshold, only anchors with score > INFERENCE_TH are # considered for inference (to improve speed) _C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05 # Select topk candidates before NMS _C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000 _C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5 # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets _C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) # Loss parameters _C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0 _C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25 _C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1 # Options are: "smooth_l1", "giou" _C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1" # One of BN, SyncBN, FrozenBN, GN # Only supports GN until unshared norm is implemented _C.MODEL.RETINANET.NORM = "" # ---------------------------------------------------------------------------- # # ResNe[X]t options (ResNets = {ResNet, ResNeXt} # Note that parts of a resnet may be used for both the backbone and the head # These options apply to both # ---------------------------------------------------------------------------- # _C.MODEL.RESNETS = CN() #RESNETS的网络深度 _C.MODEL.RESNETS.DEPTH = 50 # Options: FrozenBN, GN, "SyncBN", "BN" _C.MODEL.RESNETS.NORM = "FrozenBN" # Apply Deformable Convolution in stages # Specify if apply deform_conv on Res2, Res3, Res4, Res5 _C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False] # ---------------------------------------------------------------------------- # # Solver # ---------------------------------------------------------------------------- # _C.SOLVER = CN() # 学习率策略 _C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" #最大迭代次数 _C.SOLVER.MAX_ITER = 40000 #基础学习率 _C.SOLVER.BASE_LR = 0.001 #动量 _C.SOLVER.MOMENTUM = 0.9 _C.SOLVER.NESTEROV = False _C.SOLVER.WEIGHT_DECAY = 0.0001 # The weight decay that's applied to parameters of normalization layers # (typically the affine transformation) _C.SOLVER.WEIGHT_DECAY_NORM = 0.0 #学习率衰减比率 _C.SOLVER.GAMMA = 0.1 # 到制定的step时进行一次衰减 _C.SOLVER.STEPS = (30000,) #预热 _C.SOLVER.WARMUP_FACTOR = 1.0 / 1000 #从WARMUP_FACTOR到BASE_LR的step间隔 _C.SOLVER.WARMUP_ITERS = 1000 _C.SOLVER.WARMUP_METHOD = "linear" # 保存模型间隔 _C.SOLVER.CHECKPOINT_PERIOD = 5000 # batch大小 _C.SOLVER.IMS_PER_BATCH = 16 # Enable automatic mixed precision for training # Note that this does not change model's inference behavior. # To use AMP in inference, run inference under autocast() _C.SOLVER.AMP = CN({"ENABLED": False}) # ---------------------------------------------------------------------------- # # Specific test options # ---------------------------------------------------------------------------- # _C.TEST = CN() # The period (in terms of steps) to evaluate the model during training. # Set to 0 to disable. #间隔多少step进行测试 _C.TEST.EVAL_PERIOD = 0 #每张图片最大检出目标数 _C.TEST.DETECTIONS_PER_IMAGE = 100 # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # #输出目录 _C.OUTPUT_DIR = "./output" # Benchmark different cudnn algorithms. # If input images have very different sizes, this option will have large overhead # for about 10k iterations. It usually hurts total time, but can benefit for certain models. # If input images have the same or similar sizes, benchmark is often helpful. _C.CUDNN_BENCHMARK = False

版权声明:本文为CSDN博主「电饭锅22」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/wenghd22/article/details/111866702

电饭锅22

我还没有学会写个人说明!

暂无评论

发表评论

相关推荐

Yolov5训练自制数据集

一、准备
1.项目链接
https://github.com/ultralytics/yolov5
2.制作数据集
将标注好的图片放到data/images/train 和data/images/valid 文件夹下,将.