Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions egs/ami/s5b/conf/mfcc_hires.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--num-mel-bins=80 # similar to Google's setup.
--num-ceps=80 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
9 changes: 9 additions & 0 deletions egs/ami/s5b/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0 -l 'hostname=!a10*&!a18*&!b05*&!b06*'
option gpu=* -l gpu=$0 -q g.q -l 'hostname=!b05*&!b06*'
11 changes: 7 additions & 4 deletions egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,19 @@ awk '{print $1}' $tmpdir/segments | \
join $tmpdir/utt2spk_stm $tmpdir/segments | \
awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5;
if(spk_prev == spk && t_end_prev > t_beg) {
print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;";
print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end;
}
spk_prev=spk; t_end_prev=t_end;
}' > $tmpdir/segments_to_fix

if [ -s $tmpdir/segments_to_fix ]; then
if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then
echo "$0. Applying following fixes to segments"
cat $tmpdir/segments_to_fix
perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments
while read line; do
p1=`echo $line | awk -F'>' '{print $1}'`
p2=`echo $line | awk -F'>' '{print $2}'`
sed -ir "s:$p1:$p2:" $tmpdir/segments
done < $tmpdir/segments_to_fix
fi

# Copy stuff into its final locations [this has been moved from the format_data
Expand All @@ -125,4 +129,3 @@ local/convert2stm.pl $dir utt2spk_stm > $dir/stm
utils/validate_data_dir.sh --no-feats $dir

echo AMI $SET set data preparation succeeded.

13 changes: 8 additions & 5 deletions egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,21 +111,25 @@ awk '{print $1}' $tmpdir/segments | \
join $tmpdir/utt2spk_stm $tmpdir/segments | \
awk '{ utt=$1; spk=$2; wav=$3; t_beg=$4; t_end=$5;
if(spk_prev == spk && t_end_prev > t_beg) {
print "s/^"utt, wav, t_beg, t_end"$/"utt, wav, t_end_prev, t_end"/;";
print utt, wav, t_beg, t_end">"utt, wav, t_end_prev, t_end;
}
spk_prev=spk; t_end_prev=t_end;
}' > $tmpdir/segments_to_fix

if [ -s $tmpdir/segments_to_fix ]; then
if [ `cat $tmpdir/segments_to_fix | wc -l` -gt 0 ]; then
echo "$0. Applying following fixes to segments"
cat $tmpdir/segments_to_fix
perl -i -pf $tmpdir/segments_to_fix $tmpdir/segments
while read line; do
p1=`echo $line | awk -F'>' '{print $1}'`
p2=`echo $line | awk -F'>' '{print $2}'`
sed -ir "s:$p1:$p2:" $tmpdir/segments
done < $tmpdir/segments_to_fix
fi

# Copy stuff into its final locations [this has been moved from the format_data
# script]
mkdir -p $dir
for f in segments_to_fix spk2utt utt2spk utt2spk_stm wav.scp text segments reco2file_and_channel; do
for f in spk2utt utt2spk utt2spk_stm wav.scp text segments reco2file_and_channel; do
cp $tmpdir/$f $dir/$f || exit 1;
done

Expand All @@ -135,4 +139,3 @@ cp local/english.glm $dir/glm
utils/validate_data_dir.sh --no-feats $dir

echo AMI $DSET scenario and $SET set data preparation succeeded.

2 changes: 1 addition & 1 deletion egs/ami/s5b/local/chain/run_tdnn.sh
106 changes: 58 additions & 48 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true).
num_threads_ubm=32
ivector_transform_type=pca
nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
num_epochs=9
num_epochs=15
remove_egs=true

decode_iter=
# The rest are configs specific to this script. Most of the parameters
# are just hardcoded at this level, in the commands below.
train_stage=-10
tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
tdnn_affix=1i #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
#tdnn_affix=1i_swbd_wide_ep15 #affix for TDNN direory, e.g. "a" or "b", in case we change the configuration.
tdnn_affix=1j_34M_woaug #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
common_egs_dir= # you can set this to use previously dumped egs.


Expand All @@ -56,15 +57,15 @@ where "nvcc" is installed.
EOF
fi

local/nnet3/run_ivector_common.sh --stage $stage \
--mic $mic \
--nj $nj \
--min-seg-len $min_seg_len \
--train-set $train_set \
--gmm $gmm \
--num-threads-ubm $num_threads_ubm \
--ivector-transform-type "$ivector_transform_type" \
--nnet3-affix "$nnet3_affix"
#local/nnet3/run_ivector_common.sh --stage $stage \
# --mic $mic \
# --nj $nj \
# --min-seg-len $min_seg_len \
# --train-set $train_set \
# --gmm $gmm \
# --num-threads-ubm $num_threads_ubm \
# --ivector-transform-type "$ivector_transform_type" \
# --nnet3-affix "$nnet3_affix"

# Note: the first stage of the following script is stage 8.
local/nnet3/prepare_lores_feats.sh --stage $stage \
Expand Down Expand Up @@ -169,45 +170,47 @@ if [ $stage -le 15 ]; then

num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
opts="l2-regularize=0.02"
output_opts="l2-regularize=0.004"
affine_opts="l2-regularize=0.01"
tdnnf_opts="l2-regularize=0.01 bypass-scale=0.66"
linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
prefinal_opts="l2-regularize=0.01"
output_opts="l2-regularize=0.002"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=100 name=ivector
input dim=40 name=input

# please note that it is important to have input layer with the name=input
# as the layer immediately preceding the fixed-affine-layer to enable
# the use of short notation for the descriptor
input dim=80 name=input

# this takes the MFCCs and generates filterbank coefficients. The MFCCs
# are more compressible so we prefer to dump the MFCCs to disk rather
# than filterbanks.
idct-layer name=idct input=input dim=80 cepstral-lifter=22 affine-transform-file=$dir/configs/idct.mat include-in-init=true
batchnorm-component name=batchnorm0 input=idct include-in-init=true
spec-augment-layer name=spec-augment freq-max-proportion=0.5 time-zeroed-proportion=0.2 time-mask-max-frames=20 include-in-init=true
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat

# the first splicing is moved before the lda layer, so no splicing here
relu-batchnorm-layer name=tdnn1 dim=450 $opts
relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 $opts
relu-batchnorm-layer name=tdnn3 dim=450 $opts
relu-batchnorm-layer name=tdnn4 input=Append(-1,0,1) dim=450 $opts
relu-batchnorm-layer name=tdnn5 dim=450 $opts
relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 $opts
relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 $opts
relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=450 $opts
relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=450 $opts

## adding the layers for chain branch
relu-batchnorm-layer name=prefinal-chain input=tdnn9 dim=450 target-rms=0.5 $opts
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts

# adding the layers for xent branch
# This block prints the configs for a separate output that will be
# trained with a cross-entropy objective in the 'chain' models... this
# has the effect of regularizing the hidden parts of the model. we use
# 0.5 / args.xent_regularize as the learning rate factor- the factor of
# 0.5 / args.xent_regularize is suitable as it means the xent
# final-layer learns at a rate independent of the regularization
# constant; and the 0.5 was tuned so as to make the relative progress
# similar in the xent and regular final layers.
relu-batchnorm-layer name=prefinal-xent input=tdnn9 dim=450 target-rms=0.5 $opts
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
relu-batchnorm-layer name=tdnn1 $affine_opts dim=2136
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=0
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf14 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf15 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
linear-component name=prefinal-l dim=512 $linear_opts

prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
output-layer name=output include-log-softmax=false dim=$num_targets $output_opts

prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts

EOF

Expand All @@ -232,7 +235,7 @@ if [ $stage -le 16 ]; then
--egs.dir "$common_egs_dir" \
--egs.opts "--frames-overlap-per-eg 0" \
--egs.chunk-width 150 \
--trainer.num-chunk-per-minibatch 128 \
--trainer.num-chunk-per-minibatch 64 \
--trainer.frames-per-iter 1500000 \
--trainer.num-epochs $num_epochs \
--trainer.optimization.num-jobs-initial 2 \
Expand All @@ -256,15 +259,21 @@ if [ $stage -le 17 ]; then
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir
fi

iter_opts=
if [ ! -z $decode_iter ]; then
iter_opts=" --iter $decode_iter "
fi

if [ $stage -le 18 ]; then
rm $dir/.error 2>/dev/null || true
for decode_set in dev eval; do
(
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
--nj $nj --cmd "$decode_cmd" \
--nj $nj --cmd "$decode_cmd" $iter_opts \
--online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
--scoring-opts "--min-lmwt 5 " \
$graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1;
$graph_dir data/$mic/${decode_set}_hires \
$dir/decode_${decode_set}${decode_iter:+_$decode_iter} || exit 1;
) || touch $dir/.error &
done
wait
Expand All @@ -273,4 +282,5 @@ if [ $stage -le 18 ]; then
exit 1
fi
fi

exit 0
Loading