# 探究torchAudio中wav2vec2的源码（二）——特征提取

extractor_conv_layer_config列表中的三元组的含义分别表示：out_channels、kernel_size、stride。

## ; 特征提取

[En]

The normalized model parameters are:

• GroupNorm：将channel方向分group，然后每个group内做归一化，算(C//G)HW的均值
• LayerNorm：channel方向做归一化，算CHW的均值

def _get_feature_extractor(
norm_mode: str,
shapes: List[Tuple[int, int, int]],
bias: bool,
) -> FeatureExtractor:
assert norm_mode in ["group_norm", "layer_norm"]
# &#x5757;
blocks = []
# &#x8F93;&#x5165;&#xFF1A;&#x8BED;&#x97F3;
in_channels = 1
# &#x7A77;&#x4E3E;&#x7ED3;&#x6784;&#x53C2;&#x6570;
for i, (out_channels, kernel_size, stride) in enumerate(shapes):
# &#x5F52;&#x4E00;&#x5316;&#x6A21;&#x578B;
normalization = None
# &#x5982;&#x679C;&#x5F52;&#x4E00;&#x5316;&#x6A21;&#x578B;&#x662F;&#x7EC4;&#x5F52;&#x4E00;&#x5316;
if norm_mode == "group_norm" and i == 0:
# &#x7EC4;&#x5F52;&#x4E00;&#x5316;&#x6A21;&#x578B;&#x6784;&#x9020;
normalization = nn.GroupNorm(
num_groups=out_channels,
num_channels=out_channels,
affine=True,
)
# &#x5982;&#x679C;&#x662F;&#x5C42;&#x5F52;&#x4E00;&#x5316;
elif norm_mode == "layer_norm":
# &#x5C42;&#x5F52;&#x4E00;&#x5316;&#x6A21;&#x578B;&#x6784;&#x9020;
normalization = LayerNorm(
normalized_shape=out_channels,
elementwise_affine=True,
)
# conv1d&#x6A21;&#x578B;&#x6784;&#x9020;&#xFF0C;&#x628A;&#x5F52;&#x4E00;&#x5316;&#x4E5F;&#x4F20;&#x8FDB;&#x53BB;
blocks.append(
ConvLayerBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
bias=bias,
layer_norm=normalization,
)
)
# &#x8F93;&#x51FA;channel&#x53D8;&#x8F93;&#x5165;channel&#xFF08;1&#x53D8;512&#x3001;512&#x53D8;512.....&#xFF09;
in_channels = out_channels
# &#x628A;&#x5217;&#x8868;blocks&#x8F6C;&#x6362;&#x6210;pytorch&#x6A21;&#x578B;&#x5217;&#x8868;&#x7136;&#x540E;&#x8FD4;&#x56DE;&#x63D0;&#x53D6;&#x7279;&#x5F81;&#x7684;&#x7ED3;&#x679C;
return FeatureExtractor(nn.ModuleList(blocks))

## 总结

