YOLOX改进之添加ASFF

文章内容:如何在YOLOX官网代码中添加 ASFF模块

环境:pytorch1.8

修改内容

(1)在 PAFPN尾部添加ASFF模块( YOLOX-s等版本)

(2)在 FPN尾部添加ASFF模块( YOLOX-Darknet53版本)

参考链接

论文链接https://arxiv.org/pdf/1911.09516v2.pdf

ASFF原理及代码参考https://blog.csdn.net/weixin_44119362/article/details/114289607

示意图如下

YOLOX改进之添加ASFF

使用方法:直接在PAFPN或FPN尾部添加即可(可自动进行维度匹配,不需要修改)

代码修改过程

1、在 YOLOXS版本的 PAFPN后添加 ASFF模块

(注意:这里是PAFPN该版本用于YOLOv5版的PAFPN中,不能用于YOLOv3的FPN)

步骤一:在YOLOX-main/yolox/models文件夹下创建 ASFF.py文件,内容如下:

import torch
import torch.nn as nn
import torch.nn.functional as F

def autopad(k, p=None):

    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
    return p

class Conv(nn.Module):

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        super(Conv, self).__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        return self.act(self.conv(x))

class ASFF(nn.Module):
    def __init__(self, level, multiplier=1, rfb=False, vis=False, act_cfg=True):
"""
        multiplier should be 1, 0.5
        which means, the channel of ASFF can be
        512, 256, 128 -> multiplier=0.5
        1024, 512, 256 -> multiplier=1
        For even smaller, you need change code manually.

"""
        super(ASFF, self).__init__()
        self.level = level
        self.dim = [int(1024*multiplier), int(512*multiplier),
                    int(256*multiplier)]

        self.inter_dim = self.dim[self.level]
        if level == 0:
            self.stride_level_1 = Conv(int(512*multiplier), self.inter_dim, 3, 2)

            self.stride_level_2 = Conv(int(256*multiplier), self.inter_dim, 3, 2)

            self.expand = Conv(self.inter_dim, int(
                1024*multiplier), 3, 1)
        elif level == 1:
            self.compress_level_0 = Conv(
                int(1024*multiplier), self.inter_dim, 1, 1)
            self.stride_level_2 = Conv(
                int(256*multiplier), self.inter_dim, 3, 2)
            self.expand = Conv(self.inter_dim, int(512*multiplier), 3, 1)
        elif level == 2:
            self.compress_level_0 = Conv(
                int(1024*multiplier), self.inter_dim, 1, 1)
            self.compress_level_1 = Conv(
                int(512*multiplier), self.inter_dim, 1, 1)
            self.expand = Conv(self.inter_dim, int(
                256*multiplier), 3, 1)

        compress_c = 8 if rfb else 16
        self.weight_level_0 = Conv(
            self.inter_dim, compress_c, 1, 1)
        self.weight_level_1 = Conv(
            self.inter_dim, compress_c, 1, 1)
        self.weight_level_2 = Conv(
            self.inter_dim, compress_c, 1, 1)

        self.weight_levels = Conv(
            compress_c*3, 3, 1, 1)
        self.vis = vis

    def forward(self, x):
"""
        #
        256, 512, 1024
        from small -> large
"""
        x_level_0=x[2]
        x_level_1=x[1]
        x_level_2=x[0]

        if self.level == 0:
            level_0_resized = x_level_0
            level_1_resized = self.stride_level_1(x_level_1)
            level_2_downsampled_inter = F.max_pool2d(
                x_level_2, 3, stride=2, padding=1)
            level_2_resized = self.stride_level_2(level_2_downsampled_inter)
        elif self.level == 1:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized = F.interpolate(
                level_0_compressed, scale_factor=2, mode='nearest')
            level_1_resized = x_level_1
            level_2_resized = self.stride_level_2(x_level_2)
        elif self.level == 2:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized = F.interpolate(
                level_0_compressed, scale_factor=4, mode='nearest')
            x_level_1_compressed = self.compress_level_1(x_level_1)
            level_1_resized = F.interpolate(
                x_level_1_compressed, scale_factor=2, mode='nearest')
            level_2_resized = x_level_2

        level_0_weight_v = self.weight_level_0(level_0_resized)
        level_1_weight_v = self.weight_level_1(level_1_resized)
        level_2_weight_v = self.weight_level_2(level_2_resized)

        levels_weight_v = torch.cat(
            (level_0_weight_v, level_1_weight_v, level_2_weight_v), 1)
        levels_weight = self.weight_levels(levels_weight_v)
        levels_weight = F.softmax(levels_weight, dim=1)

        fused_out_reduced = level_0_resized * levels_weight[:, 0:1, :, :] +\
            level_1_resized * levels_weight[:, 1:2, :, :] +\
            level_2_resized * levels_weight[:, 2:, :, :]

        out = self.expand(fused_out_reduced)

        if self.vis:
            return out, levels_weight, fused_out_reduced.sum(dim=1)
        else:
            return out

步骤二:在YOLOX-main/yolox/models/ yolo_pafpn.py中调用 ASFF模块

(1)导入

from .ASFF import ASFF

(2)在init中实例化


        self.asff_1 = ASFF(level = 0, multiplier = width)
        self.asff_2 = ASFF(level = 1, multiplier = width)
        self.asff_3 = ASFF(level = 2, multiplier = width)

    def forward(self, input):

(3)直接在PAFPN输出outputs后接上ASFF模块

        outputs = (pan_out2, pan_out1, pan_out0)

        pan_out0 = self.asff_1(outputs)
        pan_out1 = self.asff_2(outputs)
        pan_out2 = self.asff_3(outputs)
        outputs = (pan_out2, pan_out1, pan_out0)

        return outputs

2、在 YOLOX-Darknet53FPN后添加 ASFF模块

(注意:这里是用于 YOLOv3的FPN)

步骤一:在YOLOX-main/yolox/models文件夹下创建 ASFF_darknet.py文件,内容如下:

import torch
import torch.nn as nn
import torch.nn.functional as F
from .network_blocks import BaseConv

class ASFF(nn.Module):
    def __init__(self, level, rfb=False, vis=False):
        super(ASFF, self).__init__()
        self.level = level
        self.dim = [512, 256, 128]
        self.inter_dim = self.dim[self.level]
        if level==0:
            self.stride_level_1 = self._make_cbl(256, self.inter_dim, 3, 2)
            self.stride_level_2 = self._make_cbl(128, self.inter_dim, 3, 2)
            self.expand = self._make_cbl(self.inter_dim, 512, 3, 1)
        elif level==1:
            self.compress_level_0 = self._make_cbl(512, self.inter_dim, 1, 1)
            self.stride_level_2 = self._make_cbl(128, self.inter_dim, 3, 2)
            self.expand = self._make_cbl(self.inter_dim, 256, 3, 1)
        elif level==2:
            self.compress_level_0 = self._make_cbl(512, self.inter_dim, 1, 1)
            self.compress_level_1 = self._make_cbl(256, self.inter_dim, 1, 1)
            self.expand = self._make_cbl(self.inter_dim, 128, 3, 1)

        compress_c = 8 if rfb else 16

        self.weight_level_0 = self._make_cbl(self.inter_dim, compress_c, 1, 1)
        self.weight_level_1 = self._make_cbl(self.inter_dim, compress_c, 1, 1)
        self.weight_level_2 = self._make_cbl(self.inter_dim, compress_c, 1, 1)

        self.weight_levels = nn.Conv2d(compress_c*3, 3, kernel_size=1, stride=1, padding=0)
        self.vis= vis

    def _make_cbl(self, _in, _out, ks, stride):
        return BaseConv(_in, _out, ks, stride, act="lrelu")

    def forward(self, x_level_0, x_level_1, x_level_2):
        if self.level==0:
            level_0_resized = x_level_0
            level_1_resized = self.stride_level_1(x_level_1)

            level_2_downsampled_inter =F.max_pool2d(x_level_2, 3, stride=2, padding=1)
            level_2_resized = self.stride_level_2(level_2_downsampled_inter)

        elif self.level==1:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized =F.interpolate(level_0_compressed, scale_factor=2, mode='nearest')
            level_1_resized =x_level_1
            level_2_resized =self.stride_level_2(x_level_2)
        elif self.level==2:
            level_0_compressed = self.compress_level_0(x_level_0)
            level_0_resized =F.interpolate(level_0_compressed, scale_factor=4, mode='nearest')
            level_1_compressed = self.compress_level_1(x_level_1)
            level_1_resized =F.interpolate(level_1_compressed, scale_factor=2, mode='nearest')
            level_2_resized =x_level_2

        level_0_weight_v = self.weight_level_0(level_0_resized)
        level_1_weight_v = self.weight_level_1(level_1_resized)
        level_2_weight_v = self.weight_level_2(level_2_resized)
        levels_weight_v = torch.cat((level_0_weight_v, level_1_weight_v, level_2_weight_v),1)
        levels_weight = self.weight_levels(levels_weight_v)
        levels_weight = F.softmax(levels_weight, dim=1)

        fused_out_reduced = level_0_resized * levels_weight[:,0:1,:,:]+\
                            level_1_resized * levels_weight[:,1:2,:,:]+\
                            level_2_resized * levels_weight[:,2:,:,:]

        out = self.expand(fused_out_reduced)

        if self.vis:
            return out, levels_weight, fused_out_reduced.sum(dim=1)
        else:
            return out

步骤二:在YOLOX-main/yolox/models/ yolo_fpn.py中调用ASFF模块

(1)导入

from .ASFF_darknet import ASFF

(2)实例化 ASFF对象


        self.assf_5 = ASFF(level = 0)
        self.assf_4 = ASFF(level = 1)
        self.assf_3 = ASFF(level = 2)

    def _make_cbl(self, _in, _out, ks):
        return BaseConv(_in, _out, ks, stride=1, act="lrelu")

(3)在outputs后直接添加asff

        outputs = (out_dark3, out_dark4, x0)

        out_assf_5 = self.assf_5(x0, out_dark4, out_dark3)
        out_assf_4 = self.assf_4(x0, out_dark4, out_dark3)
        out_assf_3 = self.assf_3(x0, out_dark4, out_dark3)

        outputs = (out_assf_3, out_assf_4, out_assf_5)

        return outputs

效果:根据个人数据集而定。对我的数据集没变化。

权重大小变化:yoloxs(68.8M->110M)

速度变化:有所下降

上述代码链接
链接:https://pan.baidu.com/s/1ykfb-YHpJaLj4sQpMsCIKw
提取码:qrvg

Original: https://blog.csdn.net/weixin_45679938/article/details/122354725
Author: 你的陈某某
Title: YOLOX改进之添加ASFF

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/647788/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球