# 【目标检测】(8) ASPP改进加强特征提取模块，附Tensorflow完整代码

YOLOV4的主干网络代码可见我上一篇文章：https://blog.csdn.net/dgvv4/article/details/123818580

1. 方法介绍

YOLOv4 中使用 SPP 模块提取不同感受野的信息，但 没有充分体现全局信息和局部信息的语义关系。本文设计的 ASPP 引入不同扩张率的深度可分离卷积+空洞卷积操作 ，实现 SPP 中的池化操作，并将其与全局平均池化并联，组成一个新的特征金字塔模型，以此聚合多尺度上下文信息， 增强模型识别不同尺寸同一物体的能力

2. 空洞卷积

3. 代码复现

33深度卷积（DepthwiseConv）只处理特征图的长宽方向的信息，11逐点卷积（PointConv）只处理特征图通道方向的信息

#（1）深度可分离卷积+空洞卷积
def block(inputs, filters, rate):
'''
filters:1*1卷积下降的通道数
rate:空洞卷积的膨胀率
'''

# 3*3深度卷积，指定膨胀率
dilation_rate=rate, use_bias=False)(inputs)

x = layers.BatchNormalization()(x)  # 标准化
x = layers.Activation('relu')(x)  # 激活函数

# 1*1逐点卷积调整通道数
x = layers.Conv2D(filters, kernel_size=(1,1), strides=1, padding='same', use_bias=False)(x)

x = layers.BatchNormalization()(x)  # 标准化
x = layers.Activation('relu')(x)  # 激活函数

return x


#（2）aspp加强特征提取模块，inputs是网络输出的第三个有效特征层[13,13,1024]
def aspp(inputs):

# 获取输入图像的尺寸
b,h,w,c = inputs.shape

# 1*1标准卷积降低通道数[13,13,1024]==>[13,13,512]
x1 = layers.Conv2D(filters=512, kernel_size=(1,1), strides=1, padding='same', use_bias=False)(inputs)
x1 = layers.BatchNormalization()(x1)  # 标准化
x1 = layers.Activation('relu')(x1)  # 激活

# 膨胀率=1
x2 = block(inputs, filters=512, rate=1)
# 膨胀率=3
x3 = block(inputs, filters=512, rate=3)
# 膨胀率=5
x4 = block(inputs, filters=512, rate=5)

# 全局平均池化[13,13,1024]==>[None,1024]
x5 = layers.GlobalAveragePooling2D()(inputs)
# [None,1024]==>[1,1,1024]
x5 = layers.Reshape(target_shape=[1,1,-1])(x5)
# 1*1卷积减少通道数[1,1,1024]==>[1,1,512]
x5 = layers.Conv2D(filters=512, kernel_size=(1,1), strides=1, padding='same', use_bias=False)(x5)
x5 = layers.BatchNormalization()(x5)
x5 = layers.Activation('relu')(x5)
# 调整图像大小[1,1,512]==>[13,13,512]
x5 = tf.image.resize(x5, size=(h,w))

# 堆叠5个并行操作[13,13,512]==>[13,13,512*5]
x = layers.concatenate([x1,x2,x3,x4,x5])

# 1*1卷积调整通道
x = layers.Conv2D(filters=512, kernel_size=(1,1), strides=1, padding='same', use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
# 随机杀死神经元
x = layers.Dropout(rate=0.1)(x)

return x


#（3）查看网络结构
if __name__ == '__main__':

inputs = keras.Input(shape=[13,13,1024])  # 输入层
outputs = aspp(inputs)  # 结构aspp模型

# 构建网络模型
model = Model(inputs, outputs)
model.summary()


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to

==================================================================================================
input_1 (InputLayer)            [(None, 13, 13, 1024 0

__________________________________________________________________________________________________
depthwise_conv2d (DepthwiseConv (None, 13, 13, 1024) 9216        input_1[0][0]

__________________________________________________________________________________________________
depthwise_conv2d_1 (DepthwiseCo (None, 13, 13, 1024) 9216        input_1[0][0]

__________________________________________________________________________________________________
depthwise_conv2d_2 (DepthwiseCo (None, 13, 13, 1024) 9216        input_1[0][0]

__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1024)         0           input_1[0][0]

__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 13, 13, 1024) 4096        depthwise_conv2d[0][0]

__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 13, 13, 1024) 4096        depthwise_conv2d_1[0][0]

__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 13, 13, 1024) 4096        depthwise_conv2d_2[0][0]

__________________________________________________________________________________________________
reshape (Reshape)               (None, 1, 1, 1024)   0           global_average_pooling2d[0][0]
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 13, 13, 1024) 0           batch_normalization_1[0][0]

__________________________________________________________________________________________________
activation_3 (Activation)       (None, 13, 13, 1024) 0           batch_normalization_3[0][0]

__________________________________________________________________________________________________
activation_5 (Activation)       (None, 13, 13, 1024) 0           batch_normalization_5[0][0]

__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 1, 1, 512)    524288      reshape[0][0]

__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 13, 13, 512)  524288      input_1[0][0]

__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 13, 13, 512)  524288      activation_1[0][0]

__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 13, 13, 512)  524288      activation_3[0][0]

__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 13, 13, 512)  524288      activation_5[0][0]

__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 1, 1, 512)    2048        conv2d_4[0][0]

__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 13, 13, 512)  2048        conv2d[0][0]

__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 13, 13, 512)  2048        conv2d_1[0][0]

__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 13, 13, 512)  2048        conv2d_2[0][0]

__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 13, 13, 512)  2048        conv2d_3[0][0]

__________________________________________________________________________________________________
activation_7 (Activation)       (None, 1, 1, 512)    0           batch_normalization_7[0][0]

__________________________________________________________________________________________________
activation (Activation)         (None, 13, 13, 512)  0           batch_normalization[0][0]

__________________________________________________________________________________________________
activation_2 (Activation)       (None, 13, 13, 512)  0           batch_normalization_2[0][0]

__________________________________________________________________________________________________
activation_4 (Activation)       (None, 13, 13, 512)  0           batch_normalization_4[0][0]

__________________________________________________________________________________________________
activation_6 (Activation)       (None, 13, 13, 512)  0           batch_normalization_6[0][0]

__________________________________________________________________________________________________
tf.image.resize (TFOpLambda)    (None, 13, 13, 512)  0           activation_7[0][0]

__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 13, 13, 2560) 0           activation[0][0]

activation_2[0][0]

activation_4[0][0]

activation_6[0][0]

tf.image.resize[0][0]

__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 13, 13, 512)  1310720     concatenate[0][0]

__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 13, 13, 512)  2048        conv2d_5[0][0]

__________________________________________________________________________________________________
activation_8 (Activation)       (None, 13, 13, 512)  0           batch_normalization_8[0][0]
__________________________________________________________________________________________________
dropout (Dropout)               (None, 13, 13, 512)  0           activation_8[0][0]
==================================================================================================
Total params: 3,984,384
Trainable params: 3,972,096
Non-trainable params: 12,288
__________________________________________________________________________________________________


