From 8d7b1269e5ec94bc7741c3f57f5a222a5bef2496 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 17:46:49 -0400 Subject: [PATCH 01/20] adding files for semisup training --- egs/madcat_ar/v1/local/chain/run_cnn.sh | 1 - .../v1/local/chain/run_cnn_chainali.sh | 1 - ...nn_e2eali_1a.sh => run_cnn_chainali_1a.sh} | 158 +++++------- .../v1/local/chain/run_cnn_e2eali.sh | 1 - .../run_cnn_1a.sh => run_cnn_e2eali_1a.sh} | 163 +++++------- egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh | 1 - .../chain/{tuning => }/run_e2e_cnn_1a.sh | 83 +++--- .../local/chain/tuning/run_cnn_chainali_1a.sh | 213 ---------------- .../local/chain/tuning/run_cnn_e2eali_1b.sh | 238 ------------------ .../v1/local/get_unique_utterances.py | 40 +++ .../v1/local/remove_sup_utts_from_unsup.py | 44 ++++ egs/madcat_ar/v1/local/train_lm.sh | 32 +-- egs/madcat_ar/v1/local/train_lm.unsup.sh | 98 ++++++++ egs/madcat_ar/v1/run.sh | 161 ------------ egs/madcat_ar/v1/run_end2end.sh | 141 +++++++++-- 15 files changed, 477 insertions(+), 898 deletions(-) delete mode 120000 egs/madcat_ar/v1/local/chain/run_cnn.sh delete mode 120000 egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh rename egs/madcat_ar/v1/local/chain/{tuning/run_cnn_e2eali_1a.sh => run_cnn_chainali_1a.sh} (55%) delete mode 120000 egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh rename egs/madcat_ar/v1/local/chain/{tuning/run_cnn_1a.sh => run_cnn_e2eali_1a.sh} (52%) delete mode 120000 egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh rename egs/madcat_ar/v1/local/chain/{tuning => }/run_e2e_cnn_1a.sh (61%) delete mode 100755 egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh delete mode 100755 egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh create mode 100755 egs/madcat_ar/v1/local/get_unique_utterances.py create mode 100755 egs/madcat_ar/v1/local/remove_sup_utts_from_unsup.py create mode 100755 egs/madcat_ar/v1/local/train_lm.unsup.sh delete mode 100755 egs/madcat_ar/v1/run.sh diff --git a/egs/madcat_ar/v1/local/chain/run_cnn.sh b/egs/madcat_ar/v1/local/chain/run_cnn.sh deleted file mode 120000 index df6f0a468c1..00000000000 --- a/egs/madcat_ar/v1/local/chain/run_cnn.sh +++ /dev/null @@ -1 +0,0 @@ -tuning/run_cnn_1a.sh \ No newline at end of file diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh deleted file mode 120000 index a864819f542..00000000000 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh +++ /dev/null @@ -1 +0,0 @@ -tuning/run_cnn_chainali_1a.sh \ No newline at end of file diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh similarity index 55% rename from egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh rename to egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index ee84ea0d83f..78b2e618e45 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -1,14 +1,26 @@ #!/bin/bash -# e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the -# lattice alignments and to build a tree +# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a +# System cnn_e2eali_1a_(dict_50k) cnn_e2eali_1a_(dict_50k + unk model) +# WER 13.30 11.94 +# CER 5.95 5.15 +# WER val 12.85 10.71 +# CER val 5.09 4.03 +# Final train prob -0.0562 +# Final valid prob -0.0634 +# Final train prob (xent) -0.8196 +# Final valid prob (xent) -0.8816 +# Parameters 3.96M + +# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a +# exp/chain/cnn_e2eali_1a: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.058->-0.058 (over 1) xent:train/valid[27,41,final]=(-2.67,-0.841,-0.820/-2.71,-0.892,-0.882) logprob:train/valid[27,41,final]=(-0.240,-0.060,-0.056/-0.245,-0.068,-0.063) set -e -o pipefail stage=0 - -nj=70 +nj=30 train_set=train +decode_val=true nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. e2echain_model_dir=exp/chain/e2e_cnn_1a @@ -22,20 +34,15 @@ frame_subsampling_factor=4 chunk_width=340,300,200,100 num_leaves=500 tdnn_dim=450 -# training options -srand=0 -remove_egs=true -lang_decode=data/lang -lang_rescore=data/lang_rescore_6g +lang_decode=lang_test +if $decode_val; then maybe_val=val; else maybe_val= ; fi # End configuration section. echo "$0 $@" # Print the command line for logging - . ./cmd.sh . ./path.sh . ./utils/parse_options.sh - if ! cuda-compiled; then cat < $dir/configs/network.xconfig input dim=40 name=input @@ -127,77 +128,51 @@ if [ $stage -le 4 ]; then conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim + conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim - + relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' mod?els... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi -if [ $stage -le 5 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi +if [ $stage -le 3 ]; then + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/train.py --stage=$train_stage \ - --cmd="$cmd" \ + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient=0.1 \ - --chain.l2-regularize=0.00005 \ - --chain.apply-deriv-weights=false \ - --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \ - --chain.frame-subsampling-factor=$frame_subsampling_factor \ - --chain.alignment-subsampling-factor=1 \ - --chain.left-tolerance 3 \ - --chain.right-tolerance 3 \ - --trainer.srand=$srand \ - --trainer.max-param-change=2.0 \ - --trainer.num-epochs=4 \ - --trainer.frames-per-iter=2000000 \ - --trainer.optimization.num-jobs-initial=3 \ - --trainer.optimization.num-jobs-final=16 \ - --trainer.optimization.initial-effective-lrate=0.001 \ - --trainer.optimization.final-effective-lrate=0.0001 \ - --trainer.optimization.shrink-value=1.0 \ - --trainer.num-chunk-per-minibatch=64,32 \ - --trainer.optimization.momentum=0.0 \ - --trainer.add-option="--optimization.memory-compression-level=2" \ - --egs.chunk-width=$chunk_width \ - --egs.dir="$common_egs_dir" \ - --egs.opts="--frames-overlap-per-eg 0" \ - --cleanup.remove-egs=$remove_egs \ - --use-gpu=true \ - --reporting.email="$reporting_email" \ - --feat-dir=$train_data_dir \ - --tree-dir=$tree_dir \ - --lat-dir=$lat_dir \ - --dir=$dir || exit 1; + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ + --chain.frame-subsampling-factor 4 \ + --chain.alignment-subsampling-factor 4 \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs false \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --dir $dir || exit 1; fi -if [ $stage -le 6 ]; then +if [ $stage -le 4 ]; then # The reason we are using data/lang here, instead of $lang, is just to # emphasize that it's not actually important to give mkgraph.sh the # lang directory with the matched topology (since it gets the @@ -210,15 +185,12 @@ if [ $stage -le 6 ]; then $dir $dir/graph || exit 1; fi -if [ $stage -le 7 ]; then - frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --frames-per-chunk $frames_per_chunk \ - --nj $nj --cmd "$cmd" \ - $dir/graph data/test $dir/decode_test || exit 1; - - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/test $dir/decode_test{,_rescored} || exit 1 +if [ $stage -le 5 ]; then + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done fi echo "Done. Date: $(date). Results:" diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh deleted file mode 120000 index fcf59f917c1..00000000000 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh +++ /dev/null @@ -1 +0,0 @@ -tuning/run_cnn_e2eali_1b.sh \ No newline at end of file diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh similarity index 52% rename from egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh rename to egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index d449805be1d..5320a4c80f5 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -1,40 +1,37 @@ #!/bin/bash - -# Copyright 2017 Hossein Hadian -# 2017 Chun Chieh Chang -# 2017 Ashish Arora - set -e -o pipefail - stage=0 -nj=70 +nj=30 train_set=train -gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it - # should have alignments for the specified training data. nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -ali=tri3_ali +affix=_1a_${train_set} #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +e2echain_model_dir=exp/chain/e2e_cnn_1a common_egs_dir= reporting_email= -# chain options train_stage=-10 xent_regularize=0.1 +frame_subsampling_factor=4 # training chunk-options chunk_width=340,300,200,100 num_leaves=500 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 tdnn_dim=450 -remove_egs=false -lang_decode=data/lang -lang_rescore=data/lang_rescore_6g +# training options +srand=0 +lang_decode=data/lang_test # End configuration section. echo "$0 $@" # Print the command line for logging + . ./cmd.sh . ./path.sh . ./utils/parse_options.sh + if ! cuda-compiled; then cat <$lat_dir/splice_opts fi if [ $stage -le 3 ]; then @@ -98,92 +93,73 @@ if [ $stage -le 3 ]; then # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use # those. The num-leaves is always somewhat less than the num-leaves from # the GMM baseline. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; fi + steps/nnet3/chain/build_tree.sh \ - --frame-subsampling-factor 4 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor 1 \ --context-opts "--context-width=2 --central-position=1" \ - --cmd "$cmd" $num_leaves $train_data_dir \ + --cmd "$cmd" $num_leaves ${train_data_dir} \ $lang $ali_dir $tree_dir fi - if [ $stage -le 4 ]; then - mkdir -p $dir echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input + conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim + relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' mod?els... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - relu-batchnorm-layer name=prefinal-xent input=tdnn4 dim=$tdnn_dim target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi if [ $stage -le 5 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/train.py --stage=$train_stage \ - --cmd="$cmd" \ + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient=0.1 \ - --chain.l2-regularize=0.00005 \ - --chain.apply-deriv-weights=false \ - --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \ - --chain.frame-subsampling-factor=4 \ - --chain.alignment-subsampling-factor=4 \ - --trainer.srand=0 \ - --trainer.max-param-change=2.0 \ - --trainer.num-epochs=4 \ - --trainer.frames-per-iter=2000000 \ - --trainer.optimization.num-jobs-initial=3 \ - --trainer.optimization.num-jobs-final=16 \ - --trainer.optimization.initial-effective-lrate=0.001 \ - --trainer.optimization.final-effective-lrate=0.0001 \ - --trainer.optimization.shrink-value=1.0 \ - --trainer.num-chunk-per-minibatch=64,32 \ - --egs.chunk-width=$chunk_width \ - --egs.dir="$common_egs_dir" \ - --egs.opts="--frames-overlap-per-eg 0" \ - --cleanup.remove-egs=$remove_egs \ - --use-gpu=true \ - --reporting.email="$reporting_email" \ - --feat-dir=$train_data_dir \ - --tree-dir=$tree_dir \ - --lat-dir=$lat_dir \ - --dir=$dir || exit 1; + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ + --chain.frame-subsampling-factor 4 \ + --chain.alignment-subsampling-factor 4 \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs false \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --dir $dir || exit 1; fi if [ $stage -le 6 ]; then @@ -193,6 +169,7 @@ if [ $stage -le 6 ]; then # topology file from the model). So you could give it a different # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. + utils/mkgraph.sh \ --self-loop-scale 1.0 $lang_decode \ $dir $dir/graph || exit 1; @@ -200,14 +177,12 @@ fi if [ $stage -le 7 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --frames-per-chunk $frames_per_chunk \ - --nj $nj --cmd "$cmd" \ - $dir/graph data/test $dir/decode_test || exit 1; - - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/test $dir/decode_test{,_rescored} || exit 1 + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done fi -echo "$0 Done. Date: $(date). Results:" +echo "Done. Date: $(date). Results:" local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh deleted file mode 120000 index d26ba0182ce..00000000000 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh +++ /dev/null @@ -1 +0,0 @@ -tuning/run_e2e_cnn_1a.sh \ No newline at end of file diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh similarity index 61% rename from egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh rename to egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index 3caf8ae4494..68bda0a9919 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -1,35 +1,20 @@ #!/bin/bash # Copyright 2017 Hossein Hadian - -# This script does end2end chain training (i.e. from scratch) -# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ -# System e2e_cnn_1a e2e_cnn_1a (with extra corpus text) -# WER 9.47 5.73 -# WER (rescored) 8.05 5.67 -# CER 2.45 1.45 -# CER (rescored) 2.10 1.42 -# Final train prob -0.0934 -0.0934 -# Final valid prob -0.0746 -0.0746 -# Final train prob (xent) -# Final valid prob (xent) -# Parameters 2.94M 2.94M - -# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/ -# exp/chain/e2e_cnn_1a/: num-iters=98 nj=6..16 num-params=2.9M dim=40->330 combine=-0.071->-0.070 (over 5) logprob:train/valid[64,97,final]=(-0.089,-0.084,-0.093/-0.075,-0.073,-0.075) set -e # configs for 'chain' stage=0 train_stage=-10 get_egs_stage=-10 -affix=1a +nj=30 # training options tdnn_dim=450 -minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16 +minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8 common_egs_dir= -cmvn_opts="--norm-means=false --norm-vars=false" train_set=train +affix=1a_$train_set +lang_decode=data/lang_test # End configuration section. echo "$0 $@" # Print the command line for logging @@ -46,7 +31,7 @@ EOF fi lang=data/lang_e2e -treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building) +treedir=exp/chain/e2e_bitree # it's actually just a trivial tree (no tree building) dir=exp/chain/e2e_cnn_${affix} if [ $stage -le 0 ]; then @@ -65,7 +50,7 @@ fi if [ $stage -le 1 ]; then steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \ --shared-phones true \ - --type mono \ + --type biphone \ data/$train_set $lang $treedir $cmd $treedir/log/make_phone_lm.log \ cat data/$train_set/text \| \ @@ -78,34 +63,33 @@ fi if [ $stage -le 2 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') - common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" - common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" + common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim + conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim + relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs fi if [ $stage -le 3 ]; then + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ --cmd "$cmd" \ - --feat.cmvn-opts "$cmvn_opts" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ @@ -114,20 +98,41 @@ if [ $stage -le 3 ]; then --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 4 \ - --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \ - --trainer.add-option="--optimization.memory-compression-level=2" \ --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 2000000 \ - --trainer.num-epochs 2 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 6 \ - --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ --trainer.max-param-change 2.0 \ - --cleanup.remove-egs true \ + --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $treedir \ --dir $dir || exit 1; fi + +if [ $stage -le 4 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + $dir $dir/graph || exit 1; +fi + +if [ $stage -le 5 ]; then + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done +fi + +echo "Done. Date: $(date). Results:" +local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh deleted file mode 100755 index 23c4d5c2036..00000000000 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ /dev/null @@ -1,213 +0,0 @@ -#!/bin/bash - -set -e -o pipefail - -stage=0 - -nj=70 -train_set=train -gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it - # should have alignments for the specified training data. -nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -ali=tri3_ali -chain_model_dir=exp/chain${nnet3_affix}/cnn_1a -common_egs_dir= -reporting_email= -lats_affix= -# chain options -train_stage=-10 -xent_regularize=0.1 -chunk_width=340,300,200,100 -num_leaves=500 -tdnn_dim=450 -remove_egs=false -lang_decode=data/lang -lang_rescore=data/lang_rescore_6g -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo - fi -fi - -if [ $stage -le 2 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ - --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ - ${train_data_dir} data/lang $chain_model_dir $lat_dir - cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts -fi - -if [ $stage -le 3 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. The num-leaves is always somewhat less than the num-leaves from - # the GMM baseline. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - steps/nnet3/chain/build_tree.sh \ - --frame-subsampling-factor 4 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$cmd" $num_leaves $train_data_dir \ - $lang $ali_dir $tree_dir -fi - -if [ $stage -le 4 ]; then - mkdir -p $dir - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" - common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=40 name=input - conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 - conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim - ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' mod?els... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - - -if [ $stage -le 5 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi - - steps/nnet3/chain/train.py --stage=$train_stage \ - --cmd="$cmd" \ - --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient=0.1 \ - --chain.l2-regularize=0.00005 \ - --chain.apply-deriv-weights=false \ - --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \ - --chain.frame-subsampling-factor=4 \ - --chain.alignment-subsampling-factor=1 \ - --trainer.srand=0 \ - --trainer.max-param-change=2.0 \ - --trainer.num-epochs=4 \ - --trainer.frames-per-iter=2000000 \ - --trainer.optimization.num-jobs-initial=3 \ - --trainer.optimization.num-jobs-final=16 \ - --trainer.optimization.initial-effective-lrate=0.001 \ - --trainer.optimization.final-effective-lrate=0.0001 \ - --trainer.optimization.shrink-value=1.0 \ - --trainer.num-chunk-per-minibatch=64,32 \ - --egs.chunk-width=$chunk_width \ - --egs.dir="$common_egs_dir" \ - --egs.opts="--frames-overlap-per-eg 0" \ - --cleanup.remove-egs=$remove_egs \ - --use-gpu=true \ - --reporting.email="$reporting_email" \ - --feat-dir=$train_data_dir \ - --tree-dir=$tree_dir \ - --lat-dir=$lat_dir \ - --dir=$dir || exit 1; -fi - -if [ $stage -le 6 ]; then - # The reason we are using data/lang here, instead of $lang, is just to - # emphasize that it's not actually important to give mkgraph.sh the - # lang directory with the matched topology (since it gets the - # topology file from the model). So you could give it a different - # lang directory, one that contained a wordlist and LM of your choice, - # as long as phones.txt was compatible. - utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ - $dir $dir/graph || exit 1; -fi - -if [ $stage -le 7 ]; then - frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --frames-per-chunk $frames_per_chunk \ - --nj $nj --cmd "$cmd" \ - $dir/graph data/test $dir/decode_test || exit 1; - - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/test $dir/decode_test{,_rescored} || exit 1 -fi - -echo "$0 Done. Date: $(date). Results:" -local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh deleted file mode 100755 index c6052b76e7f..00000000000 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ /dev/null @@ -1,238 +0,0 @@ -#!/bin/bash - -# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the -# lattice alignments and to build a tree - -# local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b -# System cnn_e2eali_1b -# WER 10.78 -# CER 2.99 -# Final train prob -0.0587 -# Final valid prob -0.0609 -# Final train prob (xent) -0.4471 -# Final valid prob (xent) -0.4653 -# Parameters 3.37M - -# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b -#exp/chain/cnn_e2eali_1b: num-iters=179 nj=8..16 num-params=3.4M dim=40->416 combine=-0.058->-0.058 (over 3) xent:train/valid[118,178,final]=(-0.463,-0.445,-0.447/-0.477,-0.462,-0.465) logprob:train/valid[118,178,final]=(-0.062,-0.059,-0.059/-0.063,-0.061,-0.061) - -set -e -o pipefail - -stage=0 - -nj=70 -train_set=train -nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1b #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -e2echain_model_dir=exp/chain/e2e_cnn_1a -common_egs_dir= -reporting_email= - -# chain options -train_stage=-10 -xent_regularize=0.1 -frame_subsampling_factor=4 -chunk_width=340,300,200,100 -num_leaves=500 -tdnn_dim=450 -# training options -srand=0 -remove_egs=true -lang_decode=data/lang -lang_rescore=data/lang_rescore_6g -# End configuration section. -echo "$0 $@" # Print the command line for logging - - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - - -if ! cuda-compiled; then - cat <$lang/topo - fi -fi - -if [ $stage -le 2 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ - --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ - ${train_data_dir} data/lang $e2echain_model_dir $lat_dir - echo "" >$lat_dir/splice_opts -fi - -if [ $stage -le 3 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. The num-leaves is always somewhat less than the num-leaves from - # the GMM baseline. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - - steps/nnet3/chain/build_tree.sh \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$cmd" $num_leaves ${train_data_dir} \ - $lang $ali_dir $tree_dir -fi - - -if [ $stage -le 4 ]; then - mkdir -p $dir - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" - common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=40 name=input - - conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 - conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim - - ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' mod?els... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - - -if [ $stage -le 5 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi - - steps/nnet3/chain/train.py --stage=$train_stage \ - --cmd="$cmd" \ - --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient=0.1 \ - --chain.l2-regularize=0.00005 \ - --chain.apply-deriv-weights=false \ - --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \ - --chain.frame-subsampling-factor=$frame_subsampling_factor \ - --chain.alignment-subsampling-factor=1 \ - --chain.left-tolerance 3 \ - --chain.right-tolerance 3 \ - --trainer.srand=$srand \ - --trainer.max-param-change=2.0 \ - --trainer.num-epochs=4 \ - --trainer.frames-per-iter=2000000 \ - --trainer.optimization.num-jobs-initial=3 \ - --trainer.optimization.num-jobs-final=16 \ - --trainer.optimization.initial-effective-lrate=0.001 \ - --trainer.optimization.final-effective-lrate=0.0001 \ - --trainer.optimization.shrink-value=1.0 \ - --trainer.num-chunk-per-minibatch=64,32 \ - --trainer.optimization.momentum=0.0 \ - --trainer.add-option="--optimization.memory-compression-level=2" \ - --egs.chunk-width=$chunk_width \ - --egs.dir="$common_egs_dir" \ - --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ - --cleanup.remove-egs=$remove_egs \ - --use-gpu=true \ - --reporting.email="$reporting_email" \ - --feat-dir=$train_data_dir \ - --tree-dir=$tree_dir \ - --lat-dir=$lat_dir \ - --dir=$dir || exit 1; -fi - -if [ $stage -le 6 ]; then - # The reason we are using data/lang here, instead of $lang, is just to - # emphasize that it's not actually important to give mkgraph.sh the - # lang directory with the matched topology (since it gets the - # topology file from the model). So you could give it a different - # lang directory, one that contained a wordlist and LM of your choice, - # as long as phones.txt was compatible. - - utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ - $dir $dir/graph || exit 1; -fi - -if [ $stage -le 7 ]; then - frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --frames-per-chunk $frames_per_chunk \ - --nj $nj --cmd "$cmd" \ - $dir/graph data/test $dir/decode_test || exit 1; - - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/test $dir/decode_test{,_rescored} || exit 1 -fi - -echo "Done. Date: $(date). Results:" -local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/get_unique_utterances.py b/egs/madcat_ar/v1/local/get_unique_utterances.py new file mode 100755 index 00000000000..f0cd268e026 --- /dev/null +++ b/egs/madcat_ar/v1/local/get_unique_utterances.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import argparse +import os +import numpy as np +import sys +import re +import io + +parser = argparse.ArgumentParser(description="""Removes dev/test set lines + corpus from stdin, and writes it to stdout.""") +parser.add_argument('text_file_path', type=str, + help='transcription location.') +args = parser.parse_args() + +def read_utterances(text_file_path): + utterance_dict = dict() + with open(text_file_path, 'r', encoding="utf8") as in_file: + for line in in_file: + words = line.strip().split() + transcript = ' '.join(words[1:]) + utterance_dict[words[0]] = transcript + return utterance_dict + +def get_unique_utterances(utterance_dict): + unique_utt_transcription_dict = dict() + for utt_id, transcript in utterance_dict.items(): + if transcript not in list(unique_utt_transcription_dict.values()): + unique_utt_transcription_dict[utt_id] = transcript + return unique_utt_transcription_dict + +### main ### +utterance_dict = read_utterances(args.text_file_path) +unique_utt_transcription_dict = get_unique_utterances(utterance_dict) + +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf8") +for k, v in unique_utt_transcription_dict.items(): + sys.stdout.write(k + "\n") + #print('{}'.format(k)) + #print('{} {}'.format(k, v)) diff --git a/egs/madcat_ar/v1/local/remove_sup_utts_from_unsup.py b/egs/madcat_ar/v1/local/remove_sup_utts_from_unsup.py new file mode 100755 index 00000000000..e997dd7f0f0 --- /dev/null +++ b/egs/madcat_ar/v1/local/remove_sup_utts_from_unsup.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import argparse +import os +import numpy as np +import sys +import re +import io + +parser = argparse.ArgumentParser(description="""Removes dev/test set lines + from the LOB corpus. Reads the + corpus from stdin, and writes it to stdout.""") +parser.add_argument('sup_text_path', type=str, + help='dev transcription location.') +parser.add_argument('unsup_text_path', type=str, + help='test transcription location.') +args = parser.parse_args() + +def read_utterances(text_file_path): + utterance_dict = dict() + with open(text_file_path, 'r', encoding="utf8") as in_file: + for line in in_file: + words = line.strip().split() + transcript = ' '.join(words[1:]) + utterance_dict[words[0]] = transcript + return utterance_dict + +def get_unique_utterances(): + unique_utt_transcription_dict = dict() + for utt_id, transcript in unsup_utterance_dict.items(): + if transcript not in list(sup_utterance_dict.values()): + unique_utt_transcription_dict[utt_id] = transcript + return unique_utt_transcription_dict + +### main ### +sup_utterance_dict = read_utterances(args.sup_text_path) +unsup_utterance_dict = read_utterances(args.unsup_text_path) +unique_utt_transcription_dict = get_unique_utterances() + +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf8") +for k, v in unique_utt_transcription_dict.items(): + sys.stdout.write(k + "\n") + #print('{}'.format(k)) + #print('{} {}'.format(k, v)) diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh index 903b288a834..6a70890d893 100755 --- a/egs/madcat_ar/v1/local/train_lm.sh +++ b/egs/madcat_ar/v1/local/train_lm.sh @@ -64,12 +64,6 @@ if [ $stage -le 0 ]; then # we can later fold the dev data into this. cat data/train/text | cut -d " " -f 2- > ${dir}/data/text/train.txt - if [ -d "data/local/gigawordcorpus/arb_gw_5/data" ]; then - cat data/local/gigawordcorpus/arb_gw_5/data/nhr_arb_combined.txt | \ - utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \ - | sed 's/@@//g' > ${dir}/data/text/corpus_text.txt - fi - # for reporting perplexities, we'll use the "real" dev set. # (the validation data is used as ${dir}/data/text/dev.txt to work # out interpolation weights.) @@ -78,7 +72,7 @@ if [ $stage -le 0 ]; then cut -d " " -f 2- < data/test/text > ${dir}/data/real_dev_set.txt # get the wordlist from MADCAT text - cat ${dir}/data/text/{train,corpus_text}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count + cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist fi @@ -89,7 +83,7 @@ if [ $stage -le 1 ]; then # Note: if you have more than one order, use a certain amount of words as the # vocab and want to restrict max memory for 'sort', echo "$0: training the unpruned LM" - min_counts='corpus_text=2 train=1' + min_counts='train=1' wordlist=${dir}/data/wordlist lm_name="`basename ${wordlist}`_${order}" @@ -106,25 +100,3 @@ if [ $stage -le 1 ]; then mkdir -p ${dir}/data/arpa format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz fi - -if [ $stage -le 2 ]; then - echo "$0: pruning the LM (to larger size)" - # Using 20 million n-grams for a big LM for rescoring purposes. - size=20000000 - prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big - - get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity' - mkdir -p ${dir}/data/arpa - format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz -fi - -if [ $stage -le 3 ]; then - echo "$0: pruning the LM (to smaller size)" - # Using 10 million n-grams for a smaller LM for graph building. Prune from the - # bigger-pruned LM, it'll be faster. - size=10000000 - prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small - - get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity' - format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz -fi diff --git a/egs/madcat_ar/v1/local/train_lm.unsup.sh b/egs/madcat_ar/v1/local/train_lm.unsup.sh new file mode 100755 index 00000000000..3b7f6a9b7ce --- /dev/null +++ b/egs/madcat_ar/v1/local/train_lm.unsup.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +# Copyright 2016 Vincent Nguyen +# 2016 Johns Hopkins University (author: Daniel Povey) +# 2017 Ashish Arora +# 2017 Hossein Hadian +# Apache 2.0 +# +# This script trains a LM on the training transcriptions. +# It is based on the example scripts distributed with PocoLM + +# It will check if pocolm is installed and if not will proceed with installation + +set -e +stage=0 +dir=data/local/local_lm +order=6 +echo "$0 $@" # Print the command line for logging +. ./utils/parse_options.sh || exit 1; + +lm_dir=${dir}/data + + +mkdir -p $dir +. ./path.sh || exit 1; # for KALDI_ROOT +export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH +( # First make sure the pocolm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d pocolm ]; then + echo Not installing the pocolm toolkit since it is already there. + else + echo "$0: Please install the PocoLM toolkit with: " + echo " cd ../../../tools; extras/install_pocolm.sh; cd -" + exit 1; + fi +) || exit 1; + +bypass_metaparam_optim_opt= +# If you want to bypass the metaparameter optimization steps with specific metaparameters +# un-comment the following line, and change the numbers to some appropriate values. +# You can find the values from output log of train_lm.py. +# These example numbers of metaparameters is for 4-gram model (with min-counts) +# running with train_lm.py. +# The dev perplexity should be close to the non-bypassed model. +# Note: to use these example parameters, you may need to remove the .done files +# to make sure the make_lm_dir.py be called and tain only 3-gram model +#for order in 3; do +#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done +if [ $stage -le 0 ]; then + mkdir -p ${dir}/data + mkdir -p ${dir}/data/text + + echo "$0: Getting the Data sources" + + rm ${dir}/data/text/* 2>/dev/null || true + + # use the validation data as the dev set. + # Note: the name 'dev' is treated specially by pocolm, it automatically + # becomes the dev set. + cat data/dev/text | cut -d " " -f 2- > ${dir}/data/text/dev.txt + + cat data/train_LM/text | cut -d " " -f 2- > ${dir}/data/text/train.txt + # for reporting perplexities, we'll use the "real" dev set. + # (the validation data is used as ${dir}/data/text/dev.txt to work + # out interpolation weights.) + # note, we can't put it in ${dir}/data/text/, because then pocolm would use + # it as one of the data sources. + cut -d " " -f 2- < data/test/text > ${dir}/data/real_dev_set.txt + + # get the wordlist from MADCAT text + cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count + cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist +fi + +if [ $stage -le 1 ]; then + # decide on the vocabulary. + # Note: you'd use --wordlist if you had a previously determined word-list + # that you wanted to use. + # Note: if you have more than one order, use a certain amount of words as the + # vocab and want to restrict max memory for 'sort', + echo "$0: training the unpruned LM" + min_counts='train=1' + wordlist=${dir}/data/wordlist + + lm_name="`basename ${wordlist}`_${order}" + if [ -n "${min_counts}" ]; then + lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`" + fi + unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm + train_lm.py --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \ + --limit-unk-history=true \ + ${bypass_metaparam_optim_opt} \ + ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir} + + get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' + mkdir -p ${dir}/data/arpa + format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.train80k.arpa.gz +fi diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh deleted file mode 100755 index 01bfdbed543..00000000000 --- a/egs/madcat_ar/v1/run.sh +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/bash - -# Copyright 2017 Chun Chieh Chang -# 2017 Ashish Arora -# 2017 Hossein Hadian - -set -e -stage=0 -nj=70 -decode_gmm=false -# download_dir{1,2,3} points to the database path on the JHU grid. If you have not -# already downloaded the database you can set it to a local directory -# This corpus can be purchased here: -# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/} -download_dir1=/export/corpora/LDC/LDC2012T15/data -download_dir2=/export/corpora/LDC/LDC2013T09/data -download_dir3=/export/corpora/LDC/LDC2013T15/data -writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab -writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab -writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab -data_splits_dir=data/download/data_splits -images_scp_dir=data/local -overwrite=false -subset=false -augment=false -use_extra_corpus_text=true -. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. - ## This relates to the queue. -. ./path.sh -. ./utils/parse_options.sh # e.g. this parses the above options - # if supplied. -./local/check_tools.sh -mkdir -p data/{train,test,dev}/data -mkdir -p data/local/{train,test,dev} - -if [ $stage -le 0 ]; then - if [ -f data/train/text ] && ! $overwrite; then - echo "$0: Not processing, probably script have run from wrong stage" - echo "Exiting with status 1 to avoid data corruption" - exit 1; - fi - local/prepare_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \ - --download_dir2 $download_dir2 --download_dir3 $download_dir3 \ - --use_extra_corpus_text $use_extra_corpus_text - - for set in test train dev; do - data_split_file=$data_splits_dir/madcat.$set.raw.lineid - local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \ - --download_dir1 $download_dir1 --download_dir2 $download_dir2 \ - --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \ - --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \ - --data data/local/$set --subset $subset --augment $augment || exit 1 - done - - echo "$0: Processing data..." - for set in dev train test; do - local/process_data.py $download_dir1 $download_dir2 $download_dir3 \ - $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \ - $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset - image/fix_data_dir.sh data/${set} - done -fi - - -if [ $stage -le 1 ]; then - for dataset in test train; do - local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset - steps/compute_cmvn_stats.sh data/$dataset || exit 1; - done - utils/fix_data_dir.sh data/train -fi - -if [ $stage -le 2 ]; then - echo "$0: Preparing BPE..." - cut -d' ' -f2- data/train/text | utils/lang/bpe/reverse.py | \ - utils/lang/bpe/prepend_words.py | \ - utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt - - for set in test train dev; do - cut -d' ' -f1 data/$set/text > data/$set/ids - cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \ - utils/lang/bpe/prepend_words.py | \ - utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \ - | sed 's/@@//g' > data/$set/bpe_text - - mv data/$set/text data/$set/text.old - paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text - rm -f data/$set/bpe_text data/$set/ids - done - - echo "$0:Preparing dictionary and lang..." - local/prepare_dict.sh - utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \ - data/local/dict "" data/lang/temp data/lang - utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang -fi - -if [ $stage -le 3 ]; then - echo "$0: Estimating a language model for decoding..." - local/train_lm.sh - utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \ - data/local/dict/lexicon.txt data/lang - utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ - data/lang data/lang_rescore_6g -fi - -if [ $stage -le 4 ]; then - steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train \ - data/lang exp/mono -fi - -if [ $stage -le 5 ] && $decode_gmm; then - utils/mkgraph.sh --mono data/lang exp/mono exp/mono/graph - - steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \ - exp/mono/decode_test -fi - -if [ $stage -le 6 ]; then - steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \ - exp/mono exp/mono_ali - - steps/train_deltas.sh --cmd $cmd 500 20000 data/train data/lang \ - exp/mono_ali exp/tri -fi - -if [ $stage -le 7 ] && $decode_gmm; then - utils/mkgraph.sh data/lang exp/tri exp/tri/graph - - steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \ - exp/tri/decode_test -fi - -if [ $stage -le 8 ]; then - steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \ - exp/tri exp/tri_ali - - steps/train_lda_mllt.sh --cmd $cmd \ - --splice-opts "--left-context=3 --right-context=3" 500 20000 \ - data/train data/lang exp/tri_ali exp/tri3 -fi - -if [ $stage -le 9 ] && $decode_gmm; then - utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph - - steps/decode.sh --nj $nj --cmd $cmd exp/tri3/graph \ - data/test exp/tri3/decode_test -fi - -if [ $stage -le 10 ]; then - steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \ - data/train data/lang exp/tri3 exp/tri3_ali -fi - -if [ $stage -le 11 ]; then - local/chain/run_cnn.sh -fi - -if [ $stage -le 12 ]; then - local/chain/run_cnn_chainali.sh --stage 2 -fi diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 62f4eeb7c71..f90f352ac0e 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -62,17 +62,20 @@ if [ $stage -le 0 ]; then fi if [ $stage -le 1 ]; then - echo "$0: Obtaining image groups. calling get_image2num_frames $(date)." - image/get_image2num_frames.py data/train - image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train + #echo "$0: Obtaining image groups. calling get_image2num_frames $(date)." + #image/get_image2num_frames.py data/train + #image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train - for set in test dev train; do + image/get_image2num_frames.py data/dev + image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/dev + + for set in dev; do echo "$0: Extracting features and calling compute_cmvn_stats for dataset: $set. $(date)" local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set steps/compute_cmvn_stats.sh data/$set || exit 1; done echo "$0: Fixing data directory for train dataset $(date)." - utils/fix_data_dir.sh data/train + utils/fix_data_dir.sh data/dev fi if [ $stage -le 2 ]; then @@ -92,7 +95,9 @@ if [ $stage -le 2 ]; then paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text rm -f data/$set/bpe_text data/$set/ids done +fi +if [ $stage -le 3 ]; then echo "$0:Preparing dictionary and lang..." local/prepare_dict.sh utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \ @@ -100,34 +105,118 @@ if [ $stage -le 2 ]; then utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang fi -if [ $stage -le 3 ]; then - echo "$0: Calling the flat-start chain recipe... $(date)." - local/chain/run_e2e_cnn.sh -fi - -lang_decode=data/lang -lang_rescore=data/lang_rescore_6g -decode_e2e=true +# training language model +lang_decode=data/lang_test if [ $stage -le 4 ]; then echo "$0: Estimating a language model for decoding..." local/train_lm.sh - utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \ + utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ data/local/dict/lexicon.txt $lang_decode - utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ - data/lang $lang_rescore fi -if [ $stage -le 5 ] && $decode_e2e; then - echo "$0: $(date) stage 5: decoding end2end setup..." - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode \ - exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; +if [ $stage -le 5 ]; then + echo "$0:Preparing supervised and unsupervised data..." + #local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full + head -40000 data/train/uttlist.full > data/train/uttlist.40k + utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup + tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k + utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj $nj --cmd "$cmd" \ - exp/chain/e2e_cnn_1a/graph data/test exp/chain/e2e_cnn_1a/decode_test || exit 1; + utils/subset_data_dir.sh data/dev 4000 data/train_sup4k + local/get_unique_utterances.py data/train_sup4k/text.old > data/train_sup4k/uttlist + utils/subset_data_dir.sh --utt-list data/train_sup4k/uttlist data/train_sup4k data/train_sup - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/test exp/chain/e2e_cnn_1a/decode_test{,_rescored} || exit 1 + local/remove_sup_utts_from_unsup.py data/train_sup/text.old data/train_unsup/text.old > data/local/unsup_uttlist + utils/subset_data_dir.sh --utt-list data/local/unsup_uttlist data/train_unsup data/train_unsup_unique + + cp data/train/allowed_lengths.txt data/train_unsup_unique/allowed_lengths.txt + cp data/dev/allowed_lengths.txt data/train_sup/allowed_lengths.txt +fi + +if [ $stage -le 6 ]; then + echo "$0: Estimating a language model for decoding..." + local/train_lm.unsup.sh + utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.train80k.arpa.gz \ + data/local/dict/lexicon.txt data/lang_decode_unsup +fi + +if [ $stage -le 7 ]; then + utils/combine_data.sh data/semisup \ + data/train_sup data/train_unsup_unique || exit 1 +fi + +train_set=train_sup +# training flat-start system +if [ $stage -le 7 ]; then + echo "$0: Calling the flat-start chain recipe... $(date)." + local/chain/run_e2e_cnn_1a.sh --train-set train_sup --nj 30 +fi + +if [ $stage -le 8 ]; then + echo "$0: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + data/train_sup data/lang_e2e exp/chain/e2e_cnn_1a_$train_set exp/chain/flatstartali_$train_set +fi +exit +# training e2eali system +if [ $stage -le 9 ]; then + echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 +fi + +if [ $stage -le 8 ]; then + echo "$0: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + data/train_sup data/lang_chain exp/chain/e2e_cnn_1a.semisup exp/chain/e2eali_train_sup +fi + +if [ $stage -le 11 ]; then + echo "$0: chain model using the chainali alignments..." + local/chain/run_cnn_chainali_1a.sh --stage 2 --train-set train_sup +fi + +# training baseline system +if [ $stage -le 9 ]; then + echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_e2eali.sh --train-set semisup --nj 50 +fi + +if [ $stage -le 10 ]; then + echo "$0: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + data/semisup data/lang_chain exp/chain/cnn_e2eali_1b.semisup exp/chain/chain_ali_train.semisup +fi + +if [ $stage -le 11 ]; then + echo "$0: chain model using the chainali alignments..." + local/chain/tuning/run_cnn_chainali_1a.sh --stage 2 --train-set semisup +fi +exit +# training semi-supervised system +if [ $stage -le 12 ]; then + local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh \ + --supervised-set train_sup \ + --unsupervised-set train_unsup_unique \ + --sup-chain-dir exp/chain/cnn_chainali_1a.ep5 \ + --sup-lat-dir exp/chain/e2e_train_sup_lats_chain \ + --sup-tree-dir exp/chain/tree_chainali \ + --tdnn-affix _1b_tol1_beam4.blchainali.uncon \ + --stage 10 \ + --exp-root exp/semisup.unsup40k || exit 1 +fi + +# training oracle system +if [ $stage -le 13 ]; then + echo "$0: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + data/semisup data/lang_chain exp/chain/cnn_chainali_1a.ep5 exp/chain/e2e_ali_train.semisup50kblchainali +fi - echo "$0: Done. Date: $(date). Results:" - local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ +if [ $stage -le 14 ]; then + echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup --nj 50 --stage 4 fi From ecfa5f7116c53134a208742653035da2d95ff033 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 17:49:36 -0400 Subject: [PATCH 02/20] minor changes --- .../v1/local/chain/run_cnn_chainali_1a.sh | 15 --------------- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 2 +- egs/wsj/s5/utils/combine_data.sh | 2 +- egs/wsj/s5/utils/subset_data_dir.sh | 2 ++ 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 78b2e618e45..900998ed475 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -1,20 +1,5 @@ #!/bin/bash - -# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a -# System cnn_e2eali_1a_(dict_50k) cnn_e2eali_1a_(dict_50k + unk model) -# WER 13.30 11.94 -# CER 5.95 5.15 -# WER val 12.85 10.71 -# CER val 5.09 4.03 -# Final train prob -0.0562 -# Final valid prob -0.0634 -# Final train prob (xent) -0.8196 -# Final valid prob (xent) -0.8816 -# Parameters 3.96M - # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a -# exp/chain/cnn_e2eali_1a: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.058->-0.058 (over 1) xent:train/valid[27,41,final]=(-2.67,-0.841,-0.820/-2.71,-0.892,-0.882) logprob:train/valid[27,41,final]=(-0.240,-0.060,-0.056/-0.245,-0.068,-0.063) - set -e -o pipefail stage=0 diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 9996820d6d3..504fc929198 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -75,7 +75,7 @@ lattice_prune_beam= # If supplied, the lattices will be pruned to this b # before being used to get supervisions. acwt=0.1 # For pruning deriv_weights_scp= -generate_egs_scp=false +generate_egs_scp=true echo "$0 $@" # Print the command line for logging diff --git a/egs/wsj/s5/utils/combine_data.sh b/egs/wsj/s5/utils/combine_data.sh index a43cf9d77f3..a689fa2b98d 100755 --- a/egs/wsj/s5/utils/combine_data.sh +++ b/egs/wsj/s5/utils/combine_data.sh @@ -94,7 +94,7 @@ else echo "$0 [info]: not combining segments as it does not exist" fi -for file in utt2spk utt2lang utt2dur reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do +for file in utt2spk utt2lang utt2dur reco2dur feats.scp text text.old cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender $extra_files; do exists_somewhere=false absent_somewhere=false for d in $*; do diff --git a/egs/wsj/s5/utils/subset_data_dir.sh b/egs/wsj/s5/utils/subset_data_dir.sh index 93ee0971b88..526bfd7aa2a 100755 --- a/egs/wsj/s5/utils/subset_data_dir.sh +++ b/egs/wsj/s5/utils/subset_data_dir.sh @@ -111,9 +111,11 @@ function do_filtering { [ -f $srcdir/utt2num_frames ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2num_frames >$destdir/utt2num_frames [ -f $srcdir/utt2uniq ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp + [ -f $srcdir/images.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/images.scp >$destdir/images.scp [ -f $srcdir/spk2warp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp [ -f $srcdir/utt2warp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text + [ -f $srcdir/text.old ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text.old >$destdir/text.old [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp if [ -f $srcdir/segments ]; then From a5fddf8ee54f50b88edcecef7ebe9d1bf41a1559 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 17:53:57 -0400 Subject: [PATCH 03/20] minor change --- egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh | 2 +- egs/madcat_ar/v1/run_end2end.sh | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index 5320a4c80f5..b658174cdca 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -147,7 +147,7 @@ if [ $stage -le 5 ]; then --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 4 \ --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 1500000 \ + --trainer.frames-per-iter 100000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ --trainer.optimization.num-jobs-initial 5 \ diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index 68bda0a9919..bcf56850635 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -99,7 +99,7 @@ if [ $stage -le 3 ]; then --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 4 \ --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 1500000 \ + --trainer.frames-per-iter 100000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ --trainer.optimization.num-jobs-initial 5 \ diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index f90f352ac0e..c5022fe0050 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -147,12 +147,12 @@ fi train_set=train_sup # training flat-start system -if [ $stage -le 7 ]; then +if [ $stage -le 8 ]; then echo "$0: Calling the flat-start chain recipe... $(date)." local/chain/run_e2e_cnn_1a.sh --train-set train_sup --nj 30 fi -if [ $stage -le 8 ]; then +if [ $stage -le 9 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ @@ -160,7 +160,7 @@ if [ $stage -le 8 ]; then fi exit # training e2eali system -if [ $stage -le 9 ]; then +if [ $stage -le 10 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 fi From 2b19b2c01a9afe128abc39c1f4e2b8f65ef87a61 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 19:44:39 -0400 Subject: [PATCH 04/20] making chain scripts similar --- .../v1/local/chain/run_cnn_chainali_1a.sh | 89 +++++++++---------- .../v1/local/chain/run_cnn_e2eali_1a.sh | 47 ++++------ .../v1/local/chain/run_e2e_cnn_1a.sh | 14 +-- egs/madcat_ar/v1/run_end2end.sh | 71 ++++++--------- 4 files changed, 92 insertions(+), 129 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 900998ed475..4e43f31c447 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -1,29 +1,15 @@ #!/bin/bash -# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a set -e -o pipefail - stage=0 -nj=30 +nj=70 train_set=train -decode_val=true -nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -e2echain_model_dir=exp/chain/e2e_cnn_1a -common_egs_dir= -reporting_email= - -# chain options train_stage=-10 -xent_regularize=0.1 -frame_subsampling_factor=4 chunk_width=340,300,200,100 num_leaves=500 tdnn_dim=450 -lang_decode=lang_test -if $decode_val; then maybe_val=val; else maybe_val= ; fi +lang_decode=data/lang_test # End configuration section. echo "$0 $@" # Print the command line for logging - . ./cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -36,20 +22,24 @@ where "nvcc" is installed. EOF fi -ali_dir=exp/chain/e2e_ali_train -lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats -dir=exp/chain${nnet3_affix}/cnn_e2eali${affix} +affix=_1a_${train_set} +chain_model_dir=exp/chain/cnn_e2eali${affix} +#ali_dir=exp/chain/e2eali_$train_set +lat_dir=exp/chain/chainali_${train_set}_lats +dir=exp/chain/cnn_chainali${affix} train_data_dir=data/${train_set} -tree_dir=exp/chain${nnet3_affix}/tree_e2e - +#use end2endali tree +tree_dir=exp/chain/tree_chainali_${train_set} +tree_dir=exp/chain/tree_e2eali_${train_set} # the 'lang' directory is created by this script. # If you create such a directory with a non-standard topology # you should probably name it differently. lang=data/lang_chain -for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do +for f in $train_data_dir/feats.scp; do [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 done + if [ $stage -le 1 ]; then echo "$0: creating lang directory $lang with chain-type topology" # Create a version of the lang/ directory that has one state per phone in the @@ -79,27 +69,26 @@ if [ $stage -le 2 ]; then steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ - ${train_data_dir} data/lang $e2echain_model_dir $lat_dir - echo "" >$lat_dir/splice_opts + ${train_data_dir} data/lang $chain_model_dir $lat_dir + cp exp/chain/e2eali_${train_set}_lats/splice_opts $lat_dir/splice_opts fi -if [ $stage -le 3 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. The num-leaves is always somewhat less than the num-leaves from - # the GMM baseline. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - - steps/nnet3/chain/build_tree.sh \ - --frame-subsampling-factor 4 \ - --alignment-subsampling-factor 1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$cmd" $num_leaves $train_data_dir \ - $lang $ali_dir $tree_dir -fi +#if [ $stage -le 3 ]; then +# # Build a tree using our new topology. We know we have alignments for the +# # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use +# # those. The num-leaves is always somewhat less than the num-leaves from +# # the GMM baseline. +# if [ -f $tree_dir/final.mdl ]; then +# echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." +# exit 1; +# fi +# steps/nnet3/chain/build_tree.sh \ +# --frame-subsampling-factor 4 \ +# --alignment-subsampling-factor 1 \ +# --context-opts "--context-width=2 --central-position=1" \ +# --cmd "$cmd" $num_leaves $train_data_dir \ +# $lang $ali_dir $tree_dir +#fi if [ $stage -le 2 ]; then echo "$0: creating neural net configs using the xconfig parser"; @@ -109,7 +98,6 @@ if [ $stage -le 2 ]; then mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input - conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 @@ -129,7 +117,6 @@ fi if [ $stage -le 3 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ @@ -140,13 +127,13 @@ if [ $stage -le 3 ]; then --egs.stage $get_egs_stage \ --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ - --chain.alignment-subsampling-factor 4 \ - --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 1500000 \ + --chain.alignment-subsampling-factor 1 \ + --trainer.num-chunk-per-minibatch 16,8 \ + --trainer.frames-per-iter 500000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 5 \ - --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 4 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ @@ -154,6 +141,10 @@ if [ $stage -le 3 ]; then --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $treedir \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --egs.chunk-width=$chunk_width + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ --dir $dir || exit 1; fi @@ -164,13 +155,13 @@ if [ $stage -le 4 ]; then # topology file from the model). So you could give it a different # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. - utils/mkgraph.sh \ --self-loop-scale 1.0 $lang_decode \ $dir $dir/graph || exit 1; fi if [ $stage -le 5 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) for decode_set in test; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index b658174cdca..718c093e644 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -1,37 +1,19 @@ #!/bin/bash set -e -o pipefail stage=0 - -nj=30 +nj=70 train_set=train -nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1a_${train_set} #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. -e2echain_model_dir=exp/chain/e2e_cnn_1a -common_egs_dir= -reporting_email= - train_stage=-10 -xent_regularize=0.1 -frame_subsampling_factor=4 -# training chunk-options chunk_width=340,300,200,100 num_leaves=500 -# we don't need extra left/right context for TDNN systems. -chunk_left_context=0 -chunk_right_context=0 tdnn_dim=450 -# training options -srand=0 lang_decode=data/lang_test # End configuration section. echo "$0 $@" # Print the command line for logging - - . ./cmd.sh . ./path.sh . ./utils/parse_options.sh - if ! cuda-compiled; then cat < $dir/configs/network.xconfig input dim=40 name=input - conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 @@ -134,7 +117,6 @@ fi if [ $stage -le 5 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ @@ -145,13 +127,13 @@ if [ $stage -le 5 ]; then --egs.stage $get_egs_stage \ --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ - --chain.alignment-subsampling-factor 4 \ - --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 100000 \ + --chain.alignment-subsampling-factor 1 \ + --trainer.num-chunk-per-minibatch 16,8 \ + --trainer.frames-per-iter 500000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 5 \ - --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 4 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ @@ -159,6 +141,10 @@ if [ $stage -le 5 ]; then --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $treedir \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --egs.chunk-width=$chunk_width + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ --dir $dir || exit 1; fi @@ -169,7 +155,6 @@ if [ $stage -le 6 ]; then # topology file from the model). So you could give it a different # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. - utils/mkgraph.sh \ --self-loop-scale 1.0 $lang_decode \ $dir $dir/graph || exit 1; diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index bcf56850635..3704157dfef 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -10,10 +10,9 @@ nj=30 # training options tdnn_dim=450 -minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8 +minibatch_size=150=32,16/300=16,8/600=8,4/1200=4,2 common_egs_dir= train_set=train -affix=1a_$train_set lang_decode=data/lang_test # End configuration section. echo "$0 $@" # Print the command line for logging @@ -30,8 +29,9 @@ where "nvcc" is installed. EOF fi +affix=1a_$train_set lang=data/lang_e2e -treedir=exp/chain/e2e_bitree # it's actually just a trivial tree (no tree building) +treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building) dir=exp/chain/e2e_cnn_${affix} if [ $stage -le 0 ]; then @@ -50,7 +50,7 @@ fi if [ $stage -le 1 ]; then steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \ --shared-phones true \ - --type biphone \ + --type mono \ data/$train_set $lang $treedir $cmd $treedir/log/make_phone_lm.log \ cat data/$train_set/text \| \ @@ -99,11 +99,11 @@ if [ $stage -le 3 ]; then --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 4 \ --trainer.num-chunk-per-minibatch $minibatch_size \ - --trainer.frames-per-iter 100000 \ + --trainer.frames-per-iter 500000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 5 \ - --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 3 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index c5022fe0050..302737af265 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -152,71 +152,58 @@ if [ $stage -le 8 ]; then local/chain/run_e2e_cnn_1a.sh --train-set train_sup --nj 30 fi +# alignments are used in tree if [ $stage -le 9 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ data/train_sup data/lang_e2e exp/chain/e2e_cnn_1a_$train_set exp/chain/flatstartali_$train_set fi -exit + # training e2eali system if [ $stage -le 10 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 fi -if [ $stage -le 8 ]; then - echo "$0: Aligning the training data using the e2e chain model..." - steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ - data/train_sup data/lang_chain exp/chain/e2e_cnn_1a.semisup exp/chain/e2eali_train_sup -fi +# no need for alignments, use same tree from end2endali +#if [ $stage -le 11 ]; then +# echo "$0: Aligning the training data using the e2e chain model..." +# steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ +# --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ +# data/train_sup data/lang_chain exp/chain/cnn_e2eali_1a_$train_set exp/chain/e2eali_$train_set +#fi -if [ $stage -le 11 ]; then +# training baseline system +if [ $stage -le 12 ]; then echo "$0: chain model using the chainali alignments..." local/chain/run_cnn_chainali_1a.sh --stage 2 --train-set train_sup fi -# training baseline system -if [ $stage -le 9 ]; then - echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_e2eali.sh --train-set semisup --nj 50 -fi +train_set=semisup +## no need for alignments, use same tree from end2endali +#if [ $stage -le 13 ]; then +# echo "$0: Aligning the training data using the e2e chain model..." +# steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ +# --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ +# data/semisup data/lang_chain exp/chain/cnn_chainali_1a_$train_set exp/chain/e2eali_$train_set +#fi -if [ $stage -le 10 ]; then - echo "$0: Aligning the training data using the e2e chain model..." - steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ - data/semisup data/lang_chain exp/chain/cnn_e2eali_1b.semisup exp/chain/chain_ali_train.semisup +# training oracle system +if [ $stage -le 14 ]; then + echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup --nj 50 fi -if [ $stage -le 11 ]; then - echo "$0: chain model using the chainali alignments..." - local/chain/tuning/run_cnn_chainali_1a.sh --stage 2 --train-set semisup -fi -exit # training semi-supervised system -if [ $stage -le 12 ]; then +train_set=train_sup +if [ $stage -le 15 ]; then local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh \ --supervised-set train_sup \ --unsupervised-set train_unsup_unique \ - --sup-chain-dir exp/chain/cnn_chainali_1a.ep5 \ - --sup-lat-dir exp/chain/e2e_train_sup_lats_chain \ - --sup-tree-dir exp/chain/tree_chainali \ - --tdnn-affix _1b_tol1_beam4.blchainali.uncon \ - --stage 10 \ + --sup-chain-dir exp/chain/cnn_chainali_1a_$train_set \ + --sup-lat-dir exp/chain/chainali_${train_set}_lats\ + --sup-tree-dir exp/chain/tree_e2eali_${train_set} \ + --tdnn-affix _1a_tol1_beam4 \ --exp-root exp/semisup.unsup40k || exit 1 fi - -# training oracle system -if [ $stage -le 13 ]; then - echo "$0: Aligning the training data using the e2e chain model..." - steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ - data/semisup data/lang_chain exp/chain/cnn_chainali_1a.ep5 exp/chain/e2e_ali_train.semisup50kblchainali -fi - -if [ $stage -le 14 ]; then - echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup --nj 50 --stage 4 -fi From 2b87a5ed4c1e370f690509d6eb9ba78a20fe7d1d Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 19:58:48 -0400 Subject: [PATCH 05/20] adding oracle recipe --- .../v1/local/chain/run_cnn_chainali_1a.sh | 1 + .../run_cnn_chainali_semisupervised_1a.sh | 181 ++++++++++++++++++ .../v1/local/chain/run_cnn_e2eali_1a.sh | 1 + egs/madcat_ar/v1/local/train_lm.sh | 11 ++ egs/madcat_ar/v1/run_end2end.sh | 4 +- 5 files changed, 197 insertions(+), 1 deletion(-) create mode 100755 egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 4e43f31c447..d87b101922a 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -141,6 +141,7 @@ if [ $stage -le 3 ]; then --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $treedir \ + --lat-dir $lat_dir \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ --egs.chunk-width=$chunk_width diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh new file mode 100755 index 00000000000..8623ebeb6b7 --- /dev/null +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -0,0 +1,181 @@ +#!/bin/bash +set -e -o pipefail +stage=0 +nj=70 +train_set=train +train_stage=-10 +chunk_width=340,300,200,100 +num_leaves=500 +tdnn_dim=450 +lang_decode=data/lang_test +# End configuration section. +echo "$0 $@" # Print the command line for logging +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 2 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ + --acoustic-scale 1.0 \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + ${train_data_dir} data/lang $chain_model_dir $lat_dir + cp exp/chain/e2eali_${train_set}_lats/splice_opts $lat_dir/splice_opts +fi + +#if [ $stage -le 3 ]; then +# # Build a tree using our new topology. We know we have alignments for the +# # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use +# # those. The num-leaves is always somewhat less than the num-leaves from +# # the GMM baseline. +# if [ -f $tree_dir/final.mdl ]; then +# echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." +# exit 1; +# fi +# steps/nnet3/chain/build_tree.sh \ +# --frame-subsampling-factor 4 \ +# --alignment-subsampling-factor 1 \ +# --context-opts "--context-width=2 --central-position=1" \ +# --cmd "$cmd" $num_leaves $train_data_dir \ +# $lang $ali_dir $tree_dir +#fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=40 name=input + conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi + +if [ $stage -le 3 ]; then + # no need to store the egs in a shared storage because we always + # remove them. Anyway, it takes only 5 minutes to generate them. + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + --cmd "$cmd" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ + --chain.xent-regularize $xent_regularize \ + --chain.frame-subsampling-factor 4 \ + --chain.alignment-subsampling-factor 1 \ + --trainer.num-chunk-per-minibatch 16,8 \ + --trainer.frames-per-iter 500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 4 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs false \ + --feat-dir data/${train_set} \ + --tree-dir $treedir \ + --lat-dir $lat_dir \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --egs.chunk-width=$chunk_width + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ + --dir $dir || exit 1; +fi + +if [ $stage -le 4 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + $dir $dir/graph || exit 1; +fi + +if [ $stage -le 5 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done +fi + +echo "Done. Date: $(date). Results:" +local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index 718c093e644..e00636a182b 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -141,6 +141,7 @@ if [ $stage -le 5 ]; then --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $treedir \ + --lat-dir $lat_dir \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ --egs.chunk-width=$chunk_width diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh index 6a70890d893..9a0c8271f5a 100755 --- a/egs/madcat_ar/v1/local/train_lm.sh +++ b/egs/madcat_ar/v1/local/train_lm.sh @@ -100,3 +100,14 @@ if [ $stage -le 1 ]; then mkdir -p ${dir}/data/arpa format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz fi + +if [ $stage -le 2 ]; then + echo "$0: pruning the LM (to larger size)" + # Using 1 million n-grams for a big LM for rescoring purposes. + size=1000000 + prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big + + get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity' + mkdir -p ${dir}/data/arpa + format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz +fi diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 302737af265..fe64122afe7 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -110,8 +110,10 @@ lang_decode=data/lang_test if [ $stage -le 4 ]; then echo "$0: Estimating a language model for decoding..." local/train_lm.sh - utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ + utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \ data/local/dict/lexicon.txt $lang_decode + utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ + data/lang data/lang_rescore_6g fi if [ $stage -le 5 ]; then From c45a4044f766bdd80a4c48bf1aaf41740aec7b52 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 21:18:34 -0400 Subject: [PATCH 06/20] fixing bugs --- egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh | 12 +++++------- egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh | 12 +++++------- egs/madcat_ar/v1/run_end2end.sh | 2 +- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index d87b101922a..22e569ad915 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -92,7 +92,7 @@ fi if [ $stage -le 2 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs @@ -117,15 +117,13 @@ fi if [ $stage -le 3 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage=$train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ --egs.dir "$common_egs_dir" \ - --egs.stage $get_egs_stage \ - --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ --trainer.num-chunk-per-minibatch 16,8 \ @@ -140,11 +138,11 @@ if [ $stage -le 3 ]; then --trainer.max-param-change 2.0 \ --cleanup.remove-egs false \ --feat-dir data/${train_set} \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ + --tree-dir $tree_dir \ + --lat-dir=$lat_dir \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ - --egs.chunk-width=$chunk_width + --egs.chunk-width=$chunk_width \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ --dir $dir || exit 1; fi diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index e00636a182b..b0484deaae9 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -92,7 +92,7 @@ fi if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs @@ -117,15 +117,13 @@ fi if [ $stage -le 5 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage=$train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ --egs.dir "$common_egs_dir" \ - --egs.stage $get_egs_stage \ - --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ --trainer.num-chunk-per-minibatch 16,8 \ @@ -140,11 +138,11 @@ if [ $stage -le 5 ]; then --trainer.max-param-change 2.0 \ --cleanup.remove-egs false \ --feat-dir data/${train_set} \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ + --tree-dir $tree_dir \ + --lat-dir=$lat_dir \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ - --egs.chunk-width=$chunk_width + --egs.chunk-width=$chunk_width \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ --dir $dir || exit 1; fi diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index fe64122afe7..d9d18cfebbc 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -165,7 +165,7 @@ fi # training e2eali system if [ $stage -le 10 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 + local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 --stage 4 fi # no need for alignments, use same tree from end2endali From 08583abb8cbf492930287e2f34d578984e777f34 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 21:26:32 -0400 Subject: [PATCH 07/20] minor change --- egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh | 2 +- egs/madcat_ar/v1/run_end2end.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index 3704157dfef..5d1fb656fa0 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -6,7 +6,7 @@ set -e stage=0 train_stage=-10 get_egs_stage=-10 -nj=30 +nj=70 # training options tdnn_dim=450 diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index d9d18cfebbc..0598d7f8435 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -151,7 +151,7 @@ train_set=train_sup # training flat-start system if [ $stage -le 8 ]; then echo "$0: Calling the flat-start chain recipe... $(date)." - local/chain/run_e2e_cnn_1a.sh --train-set train_sup --nj 30 + local/chain/run_e2e_cnn_1a.sh --train-set train_sup fi # alignments are used in tree @@ -165,7 +165,7 @@ fi # training e2eali system if [ $stage -le 10 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --nj 50 --stage 4 + local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --stage 4 fi # no need for alignments, use same tree from end2endali @@ -194,7 +194,7 @@ train_set=semisup # training oracle system if [ $stage -le 14 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup --nj 50 + local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup fi # training semi-supervised system From 8e7734c8b0b1fea24bd5f323932e9760d18deb11 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Mon, 11 Mar 2019 23:42:58 -0400 Subject: [PATCH 08/20] updating semisup code --- .../v1/local/chain/run_cnn_chainali_1a.sh | 11 +- .../run_cnn_chainali_semisupervised_1a.sh | 40 +-- .../run_cnn_chainali_semisupervised_1b.sh | 290 ++++++++++++++++++ .../v1/local/chain/run_cnn_e2eali_1a.sh | 3 + .../v1/local/chain/run_e2e_cnn_1a.sh | 3 + egs/madcat_ar/v1/run_end2end.sh | 12 +- 6 files changed, 330 insertions(+), 29 deletions(-) create mode 100755 egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 22e569ad915..cb91ca65add 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -8,6 +8,7 @@ chunk_width=340,300,200,100 num_leaves=500 tdnn_dim=450 lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh @@ -90,7 +91,7 @@ fi # $lang $ali_dir $tree_dir #fi -if [ $stage -le 2 ]; then +if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" @@ -114,7 +115,7 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs fi -if [ $stage -le 3 ]; then +if [ $stage -le 5 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. steps/nnet3/chain/train.py --stage=$train_stage \ @@ -147,7 +148,7 @@ if [ $stage -le 3 ]; then --dir $dir || exit 1; fi -if [ $stage -le 4 ]; then +if [ $stage -le 6 ]; then # The reason we are using data/lang here, instead of $lang, is just to # emphasize that it's not actually important to give mkgraph.sh the # lang directory with the matched topology (since it gets the @@ -159,13 +160,15 @@ if [ $stage -le 4 ]; then $dir $dir/graph || exit 1; fi -if [ $stage -le 5 ]; then +if [ $stage -le 7 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) for decode_set in test; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 fi echo "Done. Date: $(date). Results:" diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 8623ebeb6b7..2cb561c8161 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -8,6 +8,8 @@ chunk_width=340,300,200,100 num_leaves=500 tdnn_dim=450 lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g +dropout_schedule='0,0@0.20,0.2@0.50,0' # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh @@ -23,14 +25,14 @@ EOF fi affix=_1a_semisup${train_set} -chain_model_dir=exp/chain/cnn_e2eali${affix} +chain_model_dir=exp/chain/cnn_e2eali_1a_train_sup #ali_dir=exp/chain/e2eali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix} train_data_dir=data/${train_set} #use end2endali tree tree_dir=exp/chain/tree_chainali_${train_set} -tree_dir=exp/chain/tree_e2eali_${train_set} +tree_dir=exp/chain/tree_e2eali_train_sup # the 'lang' directory is created by this script. # If you create such a directory with a non-standard topology # you should probably name it differently. @@ -71,7 +73,7 @@ if [ $stage -le 2 ]; then --acoustic-scale 1.0 \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ ${train_data_dir} data/lang $chain_model_dir $lat_dir - cp exp/chain/e2eali_${train_set}_lats/splice_opts $lat_dir/splice_opts + cp exp/chain/e2eali_train_sup_lats/splice_opts $lat_dir/splice_opts fi #if [ $stage -le 3 ]; then @@ -91,7 +93,7 @@ fi # $lang $ali_dir $tree_dir #fi -if [ $stage -le 2 ]; then +if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) @@ -120,43 +122,41 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs fi -if [ $stage -le 3 ]; then +if [ $stage -le 5 ]; then # no need to store the egs in a shared storage because we always # remove them. Anyway, it takes only 5 minutes to generate them. - steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage=$train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ --egs.dir "$common_egs_dir" \ - --egs.stage $get_egs_stage \ - --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ - --chain.xent-regularize $xent_regularize \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ - --trainer.num-chunk-per-minibatch 16,8 \ - --trainer.frames-per-iter 500000 \ - --trainer.num-epochs 4 \ + --trainer.num-chunk-per-minibatch 32,16 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 2 \ - --trainer.optimization.num-jobs-final 4 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ --trainer.max-param-change 2.0 \ + --trainer.dropout-schedule $dropout_schedule \ --cleanup.remove-egs false \ --feat-dir data/${train_set} \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ + --tree-dir $tree_dir \ + --lat-dir=$lat_dir \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ - --egs.chunk-width=$chunk_width + --egs.chunk-width=$chunk_width \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ --dir $dir || exit 1; fi -if [ $stage -le 4 ]; then +if [ $stage -le 6 ]; then # The reason we are using data/lang here, instead of $lang, is just to # emphasize that it's not actually important to give mkgraph.sh the # lang directory with the matched topology (since it gets the @@ -168,13 +168,15 @@ if [ $stage -le 4 ]; then $dir $dir/graph || exit 1; fi -if [ $stage -le 5 ]; then +if [ $stage -le 7 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) for decode_set in test; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 fi echo "Done. Date: $(date). Results:" diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh new file mode 100755 index 00000000000..ea0558d17d9 --- /dev/null +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh @@ -0,0 +1,290 @@ +#!/bin/bash + +set -e -o pipefail +stage=0 # Start from -1 for supervised seed system training +train_stage=-100 +nj=70 +test_nj=30 + +# The following 3 options decide the output directory for semi-supervised +# chain system +# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix} +exp_root=exp/semisup_100k +chain_affix= # affix for chain dir +tdnn_affix=_semisup.uncon # affix for semi-supervised chain system + +# Datasets-Expects supervised_set and unsupervised_set +supervised_set=train +unsupervised_set=train_unsup +# Input seed system +sup_chain_dir=exp/chain/cnn_e2eali_1b # supervised chain system +sup_lat_dir=exp/chain/e2e_train_lats # Seed model options +sup_tree_dir=exp/chain/tree_e2e # tree directory for supervised chain system + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation +# Neural network opts +xent_regularize=0.1 +tdnn_dim=550 +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh +lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g +dropout_schedule='0,0@0.20,0.2@0.50,0' +dir=$exp_root/chain$chain_affix/tdnn$tdnn_affix +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=40 name=input + conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + output name=output-0 input=output.affine + output name=output-1 input=output.affine + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +# Get values for $model_left_context, $model_right_context +. $dir/configs/vars + +left_context=$model_left_context +right_context=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_$supervised_set + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$cmd" \ + --left-tolerance 1 --right-tolerance 1 \ + --left-context $egs_left_context --right-context $egs_right_context \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor 1 \ + --frames-overlap-per-eg 0 --constrained false \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --generate-egs-scp true \ + data/${supervised_set} $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=340,300,200,100 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_$unsupervised_set + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_$unsupervised_set/weights.scp \ + --generate-egs-scp true $unsup_egs_opts \ + data/$unsupervised_set $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$cmd" \ + --block-size 64 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +chunk_width=340,300,200,100 +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --egs.chunk-width=$chunk_width \ + --cmd "$cmd" \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.frame-subsampling-factor=$frame_subsampling_factor \ + --chain.alignment-subsampling-factor=1 \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --trainer.srand=0 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=32,16 \ + --trainer.optimization.momentum=0.0 \ + --trainer.frames-per-iter=1500000 \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs 2 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ + --cleanup.remove-egs false \ + --feat-dir data/$supervised_set \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; + +fi + +if [ $stage -le 17 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + $dir $dir/graph || exit 1; +fi + +if [ $stage -le 18 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 +fi + +echo "Done. Date: $(date). Results:" +local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index b0484deaae9..42694424f10 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -8,6 +8,7 @@ chunk_width=340,300,200,100 num_leaves=500 tdnn_dim=450 lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh @@ -166,6 +167,8 @@ if [ $stage -le 7 ]; then --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 fi echo "Done. Date: $(date). Results:" diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index 5d1fb656fa0..67d0020d151 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -14,6 +14,7 @@ minibatch_size=150=32,16/300=16,8/600=8,4/1200=4,2 common_egs_dir= train_set=train lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g # End configuration section. echo "$0 $@" # Print the command line for logging @@ -132,6 +133,8 @@ if [ $stage -le 5 ]; then --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 fi echo "Done. Date: $(date). Results:" diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 0598d7f8435..4f2fbc4642d 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -119,10 +119,10 @@ fi if [ $stage -le 5 ]; then echo "$0:Preparing supervised and unsupervised data..." #local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full - head -40000 data/train/uttlist.full > data/train/uttlist.40k - utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup - tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k - utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM + #head -40000 data/train/uttlist.full > data/train/uttlist.40k + #utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup + #tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k + #utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM utils/subset_data_dir.sh data/dev 4000 data/train_sup4k local/get_unique_utterances.py data/train_sup4k/text.old > data/train_sup4k/uttlist @@ -194,13 +194,13 @@ train_set=semisup # training oracle system if [ $stage -le 14 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_chainali_semisupervised_1b.sh --train-set semisup + local/chain/run_cnn_chainali_semisupervised_1a.sh --train-set semisup --stage 4 fi # training semi-supervised system train_set=train_sup if [ $stage -le 15 ]; then - local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh \ + local/chain/run_cnn_chainali_semisupervised_1b.sh \ --supervised-set train_sup \ --unsupervised-set train_unsup_unique \ --sup-chain-dir exp/chain/cnn_chainali_1a_$train_set \ From 4d92b6f1e03c2783b2fbb5784dda4e9a9cdd0cbd Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 15:35:54 -0400 Subject: [PATCH 09/20] making semisup and oracle similar --- .../v1/local/chain/run_cnn_chainali_1a.sh | 2 +- .../run_cnn_chainali_semisupervised_1a.sh | 51 ++++++++++++++----- .../run_cnn_chainali_semisupervised_1b.sh | 22 +++----- .../v1/local/chain/run_cnn_e2eali_1a.sh | 2 +- .../v1/local/chain/run_e2e_cnn_1a.sh | 2 +- egs/madcat_ar/v1/run_end2end.sh | 4 +- 6 files changed, 52 insertions(+), 31 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index cb91ca65add..fa3c43480c1 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -123,7 +123,7 @@ if [ $stage -le 5 ]; then --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ + --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 2cb561c8161..ecd660178c1 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -24,9 +24,9 @@ where "nvcc" is installed. EOF fi -affix=_1a_semisup${train_set} -chain_model_dir=exp/chain/cnn_e2eali_1a_train_sup -#ali_dir=exp/chain/e2eali_$train_set +affix=_1a_oracle.denfst.ep4.filterwidthheight${train_set} +chain_model_dir=exp/chain/cnn_chainali_1a_train_sup +#ali_dir=exp/chain/chainali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix} train_data_dir=data/${train_set} @@ -38,6 +38,7 @@ tree_dir=exp/chain/tree_e2eali_train_sup # you should probably name it differently. lang=data/lang_chain xent_regularize=0.1 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation for f in $train_data_dir/feats.scp; do [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 done @@ -73,7 +74,7 @@ if [ $stage -le 2 ]; then --acoustic-scale 1.0 \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ ${train_data_dir} data/lang $chain_model_dir $lat_dir - cp exp/chain/e2eali_train_sup_lats/splice_opts $lat_dir/splice_opts + cp exp/chain/chainali_train_sup_lats/splice_opts $lat_dir/splice_opts fi #if [ $stage -le 3 ]; then @@ -93,7 +94,24 @@ fi # $lang $ali_dir $tree_dir #fi +# Get best path alignment and lattice posterior of best path alignment to be if [ $stage -le 4 ]; then + steps/best_path_weights.sh --cmd "${cmd}" --acwt 0.1 \ + data/train_unsup_unique \ + $lat_dir \ + $chain_model_dir/best_path_train_unsup_unique +fi + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 5 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$cmd" \ + --lm_opts '--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000' \ + $tree_dir $chain_model_dir/best_path_train_unsup_unique \ + $dir +fi + +if [ $stage -le 6 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) @@ -122,24 +140,31 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs fi -if [ $stage -le 5 ]; then - # no need to store the egs in a shared storage because we always - # remove them. Anyway, it takes only 5 minutes to generate them. +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 7 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi steps/nnet3/chain/train.py --stage=$train_stage \ --cmd "$cmd" \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ + --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ --trainer.num-chunk-per-minibatch 32,16 \ --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 2 \ + --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 5 \ - --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 5 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ @@ -156,7 +181,7 @@ if [ $stage -le 5 ]; then --dir $dir || exit 1; fi -if [ $stage -le 6 ]; then +if [ $stage -le 8 ]; then # The reason we are using data/lang here, instead of $lang, is just to # emphasize that it's not actually important to give mkgraph.sh the # lang directory with the matched topology (since it gets the @@ -168,7 +193,7 @@ if [ $stage -le 6 ]; then $dir $dir/graph || exit 1; fi -if [ $stage -le 7 ]; then +if [ $stage -le 9 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) for decode_set in test; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh index ea0558d17d9..3f523b854fc 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh @@ -4,22 +4,18 @@ set -e -o pipefail stage=0 # Start from -1 for supervised seed system training train_stage=-100 nj=70 -test_nj=30 - -# The following 3 options decide the output directory for semi-supervised -# chain system # dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix} exp_root=exp/semisup_100k chain_affix= # affix for chain dir tdnn_affix=_semisup.uncon # affix for semi-supervised chain system # Datasets-Expects supervised_set and unsupervised_set -supervised_set=train +supervised_set=train_sup unsupervised_set=train_unsup # Input seed system -sup_chain_dir=exp/chain/cnn_e2eali_1b # supervised chain system -sup_lat_dir=exp/chain/e2e_train_lats # Seed model options -sup_tree_dir=exp/chain/tree_e2e # tree directory for supervised chain system +sup_chain_dir=exp/chain/cnn_chainali_1b # supervised chain system +sup_lat_dir=exp/chain/chainali_train_sup_lats # Seed model options +sup_tree_dir=exp/chain/tree_e2eali_train_sup # tree directory for supervised chain system # Semi-supervised options supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. @@ -103,7 +99,6 @@ fi if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" @@ -237,7 +232,7 @@ if [ $stage -le 15 ]; then --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights=false \ + --chain.apply-deriv-weights=true \ --chain.frame-subsampling-factor=$frame_subsampling_factor \ --chain.alignment-subsampling-factor=1 \ --chain.left-tolerance 1 \ @@ -248,10 +243,10 @@ if [ $stage -le 15 ]; then --trainer.optimization.momentum=0.0 \ --trainer.frames-per-iter=1500000 \ --trainer.max-param-change=2.0 \ - --trainer.num-epochs 2 \ + --trainer.num-epochs 4 \ --trainer.dropout-schedule $dropout_schedule \ - --trainer.optimization.num-jobs-initial 5 \ - --trainer.optimization.num-jobs-final 8 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 5 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ @@ -260,7 +255,6 @@ if [ $stage -le 15 ]; then --tree-dir $sup_tree_dir \ --lat-dir $sup_lat_dir \ --dir $dir || exit 1; - fi if [ $stage -le 17 ]; then diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index 42694424f10..92afb9b2495 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -123,7 +123,7 @@ if [ $stage -le 5 ]; then --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ + --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh index 67d0020d151..6b448b04879 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh @@ -93,7 +93,7 @@ if [ $stage -le 3 ]; then --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ + --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ --egs.stage $get_egs_stage \ --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 4f2fbc4642d..35a0ffea146 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -133,6 +133,8 @@ if [ $stage -le 5 ]; then cp data/train/allowed_lengths.txt data/train_unsup_unique/allowed_lengths.txt cp data/dev/allowed_lengths.txt data/train_sup/allowed_lengths.txt + + utils/subset_data_dir.sh data/test 5000 data/test_5k fi if [ $stage -le 6 ]; then @@ -204,7 +206,7 @@ if [ $stage -le 15 ]; then --supervised-set train_sup \ --unsupervised-set train_unsup_unique \ --sup-chain-dir exp/chain/cnn_chainali_1a_$train_set \ - --sup-lat-dir exp/chain/chainali_${train_set}_lats\ + --sup-lat-dir exp/chain/chainali_${train_set}_lats \ --sup-tree-dir exp/chain/tree_e2eali_${train_set} \ --tdnn-affix _1a_tol1_beam4 \ --exp-root exp/semisup.unsup40k || exit 1 From 9c4e5af623e88b57a15e4aad6b0df03ba8970da3 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 15:41:24 -0400 Subject: [PATCH 10/20] cosmetic change --- .../local/chain/run_cnn_chainali_oracle_1a.sh | 208 ++++++++++++ .../run_cnn_chainali_semisupervised_1a.sh | 296 +++++++++++------- .../run_cnn_chainali_semisupervised_1b.sh | 284 ----------------- egs/madcat_ar/v1/run_end2end.sh | 46 ++- 4 files changed, 413 insertions(+), 421 deletions(-) create mode 100755 egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh delete mode 100755 egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh new file mode 100755 index 00000000000..ecd660178c1 --- /dev/null +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -0,0 +1,208 @@ +#!/bin/bash +set -e -o pipefail +stage=0 +nj=70 +train_set=train +train_stage=-10 +chunk_width=340,300,200,100 +num_leaves=500 +tdnn_dim=450 +lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g +dropout_schedule='0,0@0.20,0.2@0.50,0' +# End configuration section. +echo "$0 $@" # Print the command line for logging +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 2 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ + --acoustic-scale 1.0 \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + ${train_data_dir} data/lang $chain_model_dir $lat_dir + cp exp/chain/chainali_train_sup_lats/splice_opts $lat_dir/splice_opts +fi + +#if [ $stage -le 3 ]; then +# # Build a tree using our new topology. We know we have alignments for the +# # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use +# # those. The num-leaves is always somewhat less than the num-leaves from +# # the GMM baseline. +# if [ -f $tree_dir/final.mdl ]; then +# echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." +# exit 1; +# fi +# steps/nnet3/chain/build_tree.sh \ +# --frame-subsampling-factor 4 \ +# --alignment-subsampling-factor 1 \ +# --context-opts "--context-width=2 --central-position=1" \ +# --cmd "$cmd" $num_leaves $train_data_dir \ +# $lang $ali_dir $tree_dir +#fi + +# Get best path alignment and lattice posterior of best path alignment to be +if [ $stage -le 4 ]; then + steps/best_path_weights.sh --cmd "${cmd}" --acwt 0.1 \ + data/train_unsup_unique \ + $lat_dir \ + $chain_model_dir/best_path_train_unsup_unique +fi + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 5 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$cmd" \ + --lm_opts '--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000' \ + $tree_dir $chain_model_dir/best_path_train_unsup_unique \ + $dir +fi + +if [ $stage -le 6 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=40 name=input + conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 7 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd "$cmd" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights true \ + --egs.dir "$common_egs_dir" \ + --chain.frame-subsampling-factor 4 \ + --chain.alignment-subsampling-factor 1 \ + --trainer.num-chunk-per-minibatch 32,16 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.momentum 0 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 5 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.shrink-value 1.0 \ + --trainer.max-param-change 2.0 \ + --trainer.dropout-schedule $dropout_schedule \ + --cleanup.remove-egs false \ + --feat-dir data/${train_set} \ + --tree-dir $tree_dir \ + --lat-dir=$lat_dir \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --egs.chunk-width=$chunk_width \ + --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ + --dir $dir || exit 1; +fi + +if [ $stage -le 8 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + utils/mkgraph.sh \ + --self-loop-scale 1.0 $lang_decode \ + $dir $dir/graph || exit 1; +fi + +if [ $stage -le 9 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + for decode_set in test; do + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$cmd" \ + $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; + done + steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ + data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 +fi + +echo "Done. Date: $(date). Results:" +local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index ecd660178c1..3f523b854fc 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -1,21 +1,44 @@ #!/bin/bash + set -e -o pipefail -stage=0 +stage=0 # Start from -1 for supervised seed system training +train_stage=-100 nj=70 -train_set=train -train_stage=-10 -chunk_width=340,300,200,100 -num_leaves=500 -tdnn_dim=450 -lang_decode=data/lang_test -lang_rescore=data/lang_rescore_6g -dropout_schedule='0,0@0.20,0.2@0.50,0' +# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix} +exp_root=exp/semisup_100k +chain_affix= # affix for chain dir +tdnn_affix=_semisup.uncon # affix for semi-supervised chain system + +# Datasets-Expects supervised_set and unsupervised_set +supervised_set=train_sup +unsupervised_set=train_unsup +# Input seed system +sup_chain_dir=exp/chain/cnn_chainali_1b # supervised chain system +sup_lat_dir=exp/chain/chainali_train_sup_lats # Seed model options +sup_tree_dir=exp/chain/tree_e2eali_train_sup # tree directory for supervised chain system + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation +# Neural network opts +xent_regularize=0.1 +tdnn_dim=550 # End configuration section. echo "$0 $@" # Print the command line for logging + . ./cmd.sh . ./path.sh . ./utils/parse_options.sh - +lang_decode=data/lang_test +lang_rescore=data/lang_rescore_6g +dropout_schedule='0,0@0.20,0.2@0.50,0' +dir=$exp_root/chain$chain_affix/tdnn$tdnn_affix if ! cuda-compiled; then cat <$lang/topo - fi +if [ ! -f $graphdir/HCLG.fst ]; then + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_decode_unsup $sup_chain_dir $graphdir fi -if [ $stage -le 2 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ - --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ - ${train_data_dir} data/lang $chain_model_dir $lat_dir - cp exp/chain/chainali_train_sup_lats/splice_opts $lat_dir/splice_opts +# Decode unsupervised data and write lattices in non-compact +if [ $stage -le 4 ]; then + steps/nnet3/decode_semisup.sh --num-threads 4 --nj 45 --cmd "$cmd" --beam 15 \ + --frames-per-chunk 340 \ + --acwt 1.0 --post-decode-acwt 10.0 --write-compact false \ + --scoring-opts "--min-lmwt 8 --max-lmwt 8" --word-determinize false \ + $graphdir data/$unsupervised_set $sup_chain_dir/decode_${unsupervised_set} fi -#if [ $stage -le 3 ]; then -# # Build a tree using our new topology. We know we have alignments for the -# # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use -# # those. The num-leaves is always somewhat less than the num-leaves from -# # the GMM baseline. -# if [ -f $tree_dir/final.mdl ]; then -# echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." -# exit 1; -# fi -# steps/nnet3/chain/build_tree.sh \ -# --frame-subsampling-factor 4 \ -# --alignment-subsampling-factor 1 \ -# --context-opts "--context-width=2 --central-position=1" \ -# --cmd "$cmd" $num_leaves $train_data_dir \ -# $lang $ali_dir $tree_dir -#fi - # Get best path alignment and lattice posterior of best path alignment to be -if [ $stage -le 4 ]; then +if [ $stage -le 8 ]; then steps/best_path_weights.sh --cmd "${cmd}" --acwt 0.1 \ - data/train_unsup_unique \ - $lat_dir \ - $chain_model_dir/best_path_train_unsup_unique + data/$unsupervised_set \ + $sup_chain_dir/decode_$unsupervised_set \ + $sup_chain_dir/best_path_$unsupervised_set +fi + +frame_subsampling_factor=4 +if [ -f $sup_chain_dir/frame_subsampling_factor ]; then + frame_subsampling_factor=$(cat $sup_chain_dir/frame_subsampling_factor) fi +cmvn_opts=$(cat $sup_chain_dir/cmvn_opts) || exit 1 + +diff $sup_tree_dir/tree $sup_chain_dir/tree || { echo "$0: $sup_tree_dir/tree and $sup_chain_dir/tree differ"; exit 1; } # Train denominator FST using phone alignments from # supervised and unsupervised data -if [ $stage -le 5 ]; then +if [ $stage -le 10 ]; then steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$cmd" \ --lm_opts '--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000' \ - $tree_dir $chain_model_dir/best_path_train_unsup_unique \ + $sup_tree_dir $sup_chain_dir/best_path_$unsupervised_set \ $dir fi -if [ $stage -le 6 ]; then +if [ $stage -le 11 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" @@ -135,9 +121,101 @@ if [ $stage -le 6 ]; then output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + output name=output-0 input=output.affine + output name=output-1 input=output.affine + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +# Get values for $model_left_context, $model_right_context +. $dir/configs/vars + +left_context=$model_left_context +right_context=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_$supervised_set + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$cmd" \ + --left-tolerance 1 --right-tolerance 1 \ + --left-context $egs_left_context --right-context $egs_right_context \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor 1 \ + --frames-overlap-per-eg 0 --constrained false \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --generate-egs-scp true \ + data/${supervised_set} $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=340,300,200,100 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_$unsupervised_set + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_$unsupervised_set/weights.scp \ + --generate-egs-scp true $unsup_egs_opts \ + data/$unsupervised_set $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$cmd" \ + --block-size 64 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. fi if [ $train_stage -le -4 ]; then @@ -145,43 +223,41 @@ if [ $train_stage -le -4 ]; then train_stage=-4 fi -if [ $stage -le 7 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi - steps/nnet3/chain/train.py --stage=$train_stage \ +chunk_width=340,300,200,100 +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --egs.chunk-width=$chunk_width \ --cmd "$cmd" \ - --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --egs.dir "$common_egs_dir" \ - --chain.frame-subsampling-factor 4 \ - --chain.alignment-subsampling-factor 1 \ - --trainer.num-chunk-per-minibatch 32,16 \ - --trainer.frames-per-iter 1500000 \ + --chain.apply-deriv-weights=true \ + --chain.frame-subsampling-factor=$frame_subsampling_factor \ + --chain.alignment-subsampling-factor=1 \ + --chain.left-tolerance 1 \ + --chain.right-tolerance 1 \ + --trainer.srand=0 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=32,16 \ + --trainer.optimization.momentum=0.0 \ + --trainer.frames-per-iter=1500000 \ + --trainer.max-param-change=2.0 \ --trainer.num-epochs 4 \ - --trainer.optimization.momentum 0 \ + --trainer.dropout-schedule $dropout_schedule \ --trainer.optimization.num-jobs-initial 3 \ --trainer.optimization.num-jobs-final 5 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.shrink-value 1.0 \ - --trainer.max-param-change 2.0 \ - --trainer.dropout-schedule $dropout_schedule \ - --cleanup.remove-egs false \ - --feat-dir data/${train_set} \ - --tree-dir $tree_dir \ - --lat-dir=$lat_dir \ - --chain.left-tolerance 1 \ - --chain.right-tolerance 1 \ - --egs.chunk-width=$chunk_width \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ - --dir $dir || exit 1; + --cleanup.remove-egs false \ + --feat-dir data/$supervised_set \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; fi -if [ $stage -le 8 ]; then +if [ $stage -le 17 ]; then # The reason we are using data/lang here, instead of $lang, is just to # emphasize that it's not actually important to give mkgraph.sh the # lang directory with the matched topology (since it gets the @@ -193,7 +269,7 @@ if [ $stage -le 8 ]; then $dir $dir/graph || exit 1; fi -if [ $stage -le 9 ]; then +if [ $stage -le 18 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) for decode_set in test; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh deleted file mode 100755 index 3f523b854fc..00000000000 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1b.sh +++ /dev/null @@ -1,284 +0,0 @@ -#!/bin/bash - -set -e -o pipefail -stage=0 # Start from -1 for supervised seed system training -train_stage=-100 -nj=70 -# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix} -exp_root=exp/semisup_100k -chain_affix= # affix for chain dir -tdnn_affix=_semisup.uncon # affix for semi-supervised chain system - -# Datasets-Expects supervised_set and unsupervised_set -supervised_set=train_sup -unsupervised_set=train_unsup -# Input seed system -sup_chain_dir=exp/chain/cnn_chainali_1b # supervised chain system -sup_lat_dir=exp/chain/chainali_train_sup_lats # Seed model options -sup_tree_dir=exp/chain/tree_e2eali_train_sup # tree directory for supervised chain system - -# Semi-supervised options -supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. - # Can be used to scale down the effect of unsupervised data - # by using a smaller scale for it e.g. 1.0,0.3 -lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation - -sup_egs_dir= # Supply this to skip supervised egs creation -unsup_egs_dir= # Supply this to skip unsupervised egs creation -unsup_egs_opts= # Extra options to pass to unsupervised egs creation -# Neural network opts -xent_regularize=0.1 -tdnn_dim=550 -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh -lang_decode=data/lang_test -lang_rescore=data/lang_rescore_6g -dropout_schedule='0,0@0.20,0.2@0.50,0' -dir=$exp_root/chain$chain_affix/tdnn$tdnn_affix -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=40 name=input - conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 dropout-proportion=0.0 - conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 dropout-proportion=0.0 - conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 - relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim dropout-proportion=0.0 - relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 - relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 - relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 - output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - # We use separate outputs for supervised and unsupervised data - # so we can properly track the train and valid objectives. - output name=output-0 input=output.affine - output name=output-1 input=output.affine - output name=output-0-xent input=output-xent.log-softmax - output name=output-1-xent input=output-xent.log-softmax -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -# Get values for $model_left_context, $model_right_context -. $dir/configs/vars - -left_context=$model_left_context -right_context=$model_right_context - -egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") -egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") - -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_$supervised_set - frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$cmd" \ - --left-tolerance 1 --right-tolerance 1 \ - --left-context $egs_left_context --right-context $egs_right_context \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 1 \ - --frames-overlap-per-eg 0 --constrained false \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --generate-egs-scp true \ - data/${supervised_set} $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsup_frames_per_eg=340,300,200,100 # Using a frames-per-eg of 150 for unsupervised data - # was found to be better than allowing smaller chunks - # (160,140,110,80) like for supervised system -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when - # creating numerator supervision -lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs - # for unsupervised data -tolerance=1 # frame-tolerance for chain training - -unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set -if [ -z "$unsup_egs_dir" ]; then - unsup_egs_dir=$dir/egs_$unsupervised_set - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - steps/nnet3/chain/get_egs.sh \ - --cmd "$cmd" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --deriv-weights-scp $sup_chain_dir/best_path_$unsupervised_set/weights.scp \ - --generate-egs-scp true $unsup_egs_opts \ - data/$unsupervised_set $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/comb_egs -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$cmd" \ - --block-size 64 \ - --lang2weight $supervision_weights 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - # This is to skip stages of den-fst creation, which was already done. - train_stage=-4 -fi - -chunk_width=340,300,200,100 -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --egs.chunk-width=$chunk_width \ - --cmd "$cmd" \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights=true \ - --chain.frame-subsampling-factor=$frame_subsampling_factor \ - --chain.alignment-subsampling-factor=1 \ - --chain.left-tolerance 1 \ - --chain.right-tolerance 1 \ - --trainer.srand=0 \ - --trainer.optimization.shrink-value=1.0 \ - --trainer.num-chunk-per-minibatch=32,16 \ - --trainer.optimization.momentum=0.0 \ - --trainer.frames-per-iter=1500000 \ - --trainer.max-param-change=2.0 \ - --trainer.num-epochs 4 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 5 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ - --cleanup.remove-egs false \ - --feat-dir data/$supervised_set \ - --tree-dir $sup_tree_dir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -if [ $stage -le 17 ]; then - # The reason we are using data/lang here, instead of $lang, is just to - # emphasize that it's not actually important to give mkgraph.sh the - # lang directory with the matched topology (since it gets the - # topology file from the model). So you could give it a different - # lang directory, one that contained a wordlist and LM of your choice, - # as long as phones.txt was compatible. - utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ - $dir $dir/graph || exit 1; -fi - -if [ $stage -le 18 ]; then - frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - for decode_set in test; do - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $nj --cmd "$cmd" \ - $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; - done - steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \ - data/$decode_set $dir/decode_${decode_set}{,_rescored} || exit 1 -fi - -echo "Done. Date: $(date). Results:" -local/chain/compare_wer.sh $dir diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 35a0ffea146..f3019657b2e 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -62,9 +62,9 @@ if [ $stage -le 0 ]; then fi if [ $stage -le 1 ]; then - #echo "$0: Obtaining image groups. calling get_image2num_frames $(date)." - #image/get_image2num_frames.py data/train - #image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train + echo "$0: Obtaining image groups. calling get_image2num_frames $(date)." + image/get_image2num_frames.py data/train + image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train image/get_image2num_frames.py data/dev image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/dev @@ -118,11 +118,11 @@ fi if [ $stage -le 5 ]; then echo "$0:Preparing supervised and unsupervised data..." - #local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full - #head -40000 data/train/uttlist.full > data/train/uttlist.40k - #utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup - #tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k - #utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM + local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full + head -40000 data/train/uttlist.full > data/train/uttlist.40k + utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup + tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k + utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM utils/subset_data_dir.sh data/dev 4000 data/train_sup4k local/get_unique_utterances.py data/train_sup4k/text.old > data/train_sup4k/uttlist @@ -171,12 +171,12 @@ if [ $stage -le 10 ]; then fi # no need for alignments, use same tree from end2endali -#if [ $stage -le 11 ]; then -# echo "$0: Aligning the training data using the e2e chain model..." -# steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ -# --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ -# data/train_sup data/lang_chain exp/chain/cnn_e2eali_1a_$train_set exp/chain/e2eali_$train_set -#fi +if [ $stage -le 11 ]; then + echo "$0: Aligning the training data using the e2e chain model..." + steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ + --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + data/train_sup data/lang_chain exp/chain/cnn_e2eali_1a_$train_set exp/chain/e2eali_$train_set +fi # training baseline system if [ $stage -le 12 ]; then @@ -184,25 +184,17 @@ if [ $stage -le 12 ]; then local/chain/run_cnn_chainali_1a.sh --stage 2 --train-set train_sup fi -train_set=semisup -## no need for alignments, use same tree from end2endali -#if [ $stage -le 13 ]; then -# echo "$0: Aligning the training data using the e2e chain model..." -# steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ -# --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ -# data/semisup data/lang_chain exp/chain/cnn_chainali_1a_$train_set exp/chain/e2eali_$train_set -#fi - # training oracle system -if [ $stage -le 14 ]; then +train_set=semisup +if [ $stage -le 13 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_chainali_semisupervised_1a.sh --train-set semisup --stage 4 + local/chain/run_cnn_chainali_oracle_1a.sh --train-set semisup --stage 2 fi # training semi-supervised system train_set=train_sup -if [ $stage -le 15 ]; then - local/chain/run_cnn_chainali_semisupervised_1b.sh \ +if [ $stage -le 14 ]; then + local/chain/run_cnn_chainali_semisupervised_1a.sh \ --supervised-set train_sup \ --unsupervised-set train_unsup_unique \ --sup-chain-dir exp/chain/cnn_chainali_1a_$train_set \ From 0b9330f736095e34351664f203ed21968528fc12 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 16:48:55 -0400 Subject: [PATCH 11/20] adding x-ent and chainali tree --- .../v1/local/chain/run_cnn_chainali_1a.sh | 71 ++++++++++--------- .../local/chain/run_cnn_chainali_oracle_1a.sh | 5 +- .../run_cnn_chainali_semisupervised_1a.sh | 1 + .../v1/local/chain/run_cnn_e2eali_1a.sh | 8 ++- ..._e2e_cnn_1a.sh => run_flatstart_cnn_1a.sh} | 6 +- egs/madcat_ar/v1/run_end2end.sh | 13 ++-- 6 files changed, 57 insertions(+), 47 deletions(-) rename egs/madcat_ar/v1/local/chain/{run_e2e_cnn_1a.sh => run_flatstart_cnn_1a.sh} (97%) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index fa3c43480c1..b9b7ab25608 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -25,13 +25,12 @@ fi affix=_1a_${train_set} chain_model_dir=exp/chain/cnn_e2eali${affix} -#ali_dir=exp/chain/e2eali_$train_set +ali_dir=exp/chain/e2eali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix} train_data_dir=data/${train_set} -#use end2endali tree tree_dir=exp/chain/tree_chainali_${train_set} -tree_dir=exp/chain/tree_e2eali_${train_set} +xent_regularize=0.1 # the 'lang' directory is created by this script. # If you create such a directory with a non-standard topology # you should probably name it differently. @@ -74,42 +73,47 @@ if [ $stage -le 2 ]; then cp exp/chain/e2eali_${train_set}_lats/splice_opts $lat_dir/splice_opts fi -#if [ $stage -le 3 ]; then -# # Build a tree using our new topology. We know we have alignments for the -# # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use -# # those. The num-leaves is always somewhat less than the num-leaves from -# # the GMM baseline. -# if [ -f $tree_dir/final.mdl ]; then -# echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." -# exit 1; -# fi -# steps/nnet3/chain/build_tree.sh \ -# --frame-subsampling-factor 4 \ -# --alignment-subsampling-factor 1 \ -# --context-opts "--context-width=2 --central-position=1" \ -# --cmd "$cmd" $num_leaves $train_data_dir \ -# $lang $ali_dir $tree_dir -#fi +if [ $stage -le 3 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 4 \ + --alignment-subsampling-factor 1 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$cmd" $num_leaves $train_data_dir \ + $lang $ali_dir $tree_dir +fi if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') - common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" - common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" + common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" + common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" mkdir -p $dir/configs cat < $dir/configs/network.xconfig input dim=40 name=input - conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 - conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 - conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 - conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 - relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim - relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim - ## adding the layers for chain branch - relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts - output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts + conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 dropout-proportion=0.0 + conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 + conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 + conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 + relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0 + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs @@ -125,6 +129,7 @@ if [ $stage -le 5 ]; then --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ + --chain.xent-regularize $xent_regularize \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ --trainer.num-chunk-per-minibatch 16,8 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh index ecd660178c1..3f6e0eaed8b 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -30,9 +30,9 @@ chain_model_dir=exp/chain/cnn_chainali_1a_train_sup lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix} train_data_dir=data/${train_set} -#use end2endali tree +#use chainali tree tree_dir=exp/chain/tree_chainali_${train_set} -tree_dir=exp/chain/tree_e2eali_train_sup +tree_dir=exp/chain/tree_chainali_train_sup # the 'lang' directory is created by this script. # If you create such a directory with a non-standard topology # you should probably name it differently. @@ -159,6 +159,7 @@ if [ $stage -le 7 ]; then --egs.dir "$common_egs_dir" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ + --chain.xent-regularize $xent_regularize \ --trainer.num-chunk-per-minibatch 32,16 \ --trainer.frames-per-iter 1500000 \ --trainer.num-epochs 4 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 3f523b854fc..2663add0114 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -237,6 +237,7 @@ if [ $stage -le 15 ]; then --chain.alignment-subsampling-factor=1 \ --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ + --chain.xent-regularize $xent_regularize \ --trainer.srand=0 \ --trainer.optimization.shrink-value=1.0 \ --trainer.num-chunk-per-minibatch=32,16 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index 92afb9b2495..2e4042abec9 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -24,13 +24,13 @@ EOF fi affix=_1a_${train_set} -e2echain_model_dir=exp/chain/e2e_cnn_1a_$train_set +e2echain_model_dir=exp/chain/flatstart_cnn_1a_$train_set ali_dir=exp/chain/flatstartali_$train_set lat_dir=exp/chain/e2eali_${train_set}_lats dir=exp/chain/cnn_e2eali${affix} train_data_dir=data/${train_set} tree_dir=exp/chain/tree_e2eali_${train_set} - +xent_regularize=0.1 # the 'lang' directory is created by this script. # If you create such a directory with a non-standard topology # you should probably name it differently. @@ -94,6 +94,7 @@ fi if [ $stage -le 4 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) common1="height-offsets=-2,-1,0,1,2 num-filters-out=36" common2="height-offsets=-2,-1,0,1,2 num-filters-out=70" mkdir -p $dir/configs @@ -110,6 +111,8 @@ if [ $stage -le 4 ]; then ## adding the layers for chain branch relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts + relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs @@ -124,6 +127,7 @@ if [ $stage -le 5 ]; then --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights true \ + --chain.xent-regularize $xent_regularize \ --egs.dir "$common_egs_dir" \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh similarity index 97% rename from egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh rename to egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh index 6b448b04879..4ddf5944e39 100755 --- a/egs/madcat_ar/v1/local/chain/run_e2e_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh @@ -31,9 +31,9 @@ EOF fi affix=1a_$train_set -lang=data/lang_e2e -treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building) -dir=exp/chain/e2e_cnn_${affix} +lang=data/lang_flatstart +treedir=exp/chain/flatstart_monotree # it's actually just a trivial tree (no tree building) +dir=exp/chain/flatstart_cnn_${affix} if [ $stage -le 0 ]; then # Create a version of the lang/ directory that has one state per phone in the diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index f3019657b2e..d21e1abc779 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -153,7 +153,7 @@ train_set=train_sup # training flat-start system if [ $stage -le 8 ]; then echo "$0: Calling the flat-start chain recipe... $(date)." - local/chain/run_e2e_cnn_1a.sh --train-set train_sup + local/chain/run_flatstart_cnn_1a.sh --train-set train_sup fi # alignments are used in tree @@ -161,24 +161,23 @@ if [ $stage -le 9 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ - data/train_sup data/lang_e2e exp/chain/e2e_cnn_1a_$train_set exp/chain/flatstartali_$train_set + data/train_sup data/lang exp/chain/flatstart_cnn_1a_$train_set exp/chain/flatstartali_$train_set fi # training e2eali system if [ $stage -le 10 ]; then echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_e2eali_1a.sh --train-set train_sup --stage 4 + local/chain/run_cnn_e2eali_1a.sh --train-set train_sup fi -# no need for alignments, use same tree from end2endali if [ $stage -le 11 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ - data/train_sup data/lang_chain exp/chain/cnn_e2eali_1a_$train_set exp/chain/e2eali_$train_set + data/train_sup data/lang exp/chain/cnn_e2eali_1a_$train_set exp/chain/e2eali_$train_set fi -# training baseline system +# training chainali baseline system (larger network) if [ $stage -le 12 ]; then echo "$0: chain model using the chainali alignments..." local/chain/run_cnn_chainali_1a.sh --stage 2 --train-set train_sup @@ -199,7 +198,7 @@ if [ $stage -le 14 ]; then --unsupervised-set train_unsup_unique \ --sup-chain-dir exp/chain/cnn_chainali_1a_$train_set \ --sup-lat-dir exp/chain/chainali_${train_set}_lats \ - --sup-tree-dir exp/chain/tree_e2eali_${train_set} \ + --sup-tree-dir exp/chain/tree_chainali_${train_set} \ --tdnn-affix _1a_tol1_beam4 \ --exp-root exp/semisup.unsup40k || exit 1 fi From 4323f4422fcebed9f40b8b4bd232ae86f28c5a3c Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 16:52:39 -0400 Subject: [PATCH 12/20] minor change --- egs/madcat_ar/v1/cmd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/madcat_ar/v1/cmd.sh b/egs/madcat_ar/v1/cmd.sh index 3c8eb9f93a5..6080a8bab68 100644 --- a/egs/madcat_ar/v1/cmd.sh +++ b/egs/madcat_ar/v1/cmd.sh @@ -10,4 +10,4 @@ # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. -export cmd="queue.pl" +export cmd="retry.pl queue.pl" From bda6d53b8cefeeed0f8077d82367c438b3cd1f5e Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 17:10:02 -0400 Subject: [PATCH 13/20] semi.madcat --- egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh | 2 +- .../v1/local/chain/run_cnn_chainali_semisupervised_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index b9b7ab25608..6f7f8c23b74 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -167,7 +167,7 @@ fi if [ $stage -le 7 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - for decode_set in test; do + for decode_set in test.5k; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh index 3f6e0eaed8b..3638647e5bf 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -196,7 +196,7 @@ fi if [ $stage -le 9 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - for decode_set in test; do + for decode_set in test.5k; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 2663add0114..113583debdf 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -272,7 +272,7 @@ fi if [ $stage -le 18 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - for decode_set in test; do + for decode_set in test.5k; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index 2e4042abec9..15d7d19f517 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -166,7 +166,7 @@ fi if [ $stage -le 7 ]; then frames_per_chunk=$(echo $chunk_width | cut -d, -f1) - for decode_set in test; do + for decode_set in test.5k; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh index 4ddf5944e39..6ee97e1ff16 100755 --- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh @@ -128,7 +128,7 @@ if [ $stage -le 4 ]; then fi if [ $stage -le 5 ]; then - for decode_set in test; do + for decode_set in test.5k; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj $nj --cmd "$cmd" \ $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1; From c704b42aec93276bdec6647631080d7836a61155 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 17:57:54 -0400 Subject: [PATCH 14/20] adding dropout fixing name --- egs/madcat_ar/v1/local/chain/compare_wer.sh | 8 ++++---- egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh | 2 ++ .../v1/local/chain/run_cnn_chainali_oracle_1a.sh | 2 +- egs/madcat_ar/v1/run_end2end.sh | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/compare_wer.sh b/egs/madcat_ar/v1/local/chain/compare_wer.sh index 7f04061dafb..9f00308658a 100755 --- a/egs/madcat_ar/v1/local/chain/compare_wer.sh +++ b/egs/madcat_ar/v1/local/chain/compare_wer.sh @@ -22,28 +22,28 @@ echo echo -n "# WER " for x in $*; do - wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}') + wer=$(cat $x/decode_test.5k/scoring_kaldi/best_wer | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER (rescored) " for x in $*; do - wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}') + wer=$(cat $x/decode_test.5k_rescored/scoring_kaldi/best_wer | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# CER " for x in $*; do - cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}') + cer=$(cat $x/decode_test.5k/scoring_kaldi/best_cer | awk '{print $2}') printf "% 10s" $cer done echo echo -n "# CER (rescored) " for x in $*; do - cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}') + cer=$(cat $x/decode_test.5k_rescored/scoring_kaldi/best_cer | awk '{print $2}') printf "% 10s" $cer done echo diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 6f7f8c23b74..b870f3f5179 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -9,6 +9,7 @@ num_leaves=500 tdnn_dim=450 lang_decode=data/lang_test lang_rescore=data/lang_rescore_6g +dropout_schedule='0,0@0.20,0.2@0.50,0' # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh @@ -142,6 +143,7 @@ if [ $stage -le 5 ]; then --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ --trainer.max-param-change 2.0 \ + --trainer.dropout-schedule $dropout_schedule \ --cleanup.remove-egs false \ --feat-dir data/${train_set} \ --tree-dir $tree_dir \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh index 3638647e5bf..45fbd5cf3db 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -24,7 +24,7 @@ where "nvcc" is installed. EOF fi -affix=_1a_oracle.denfst.ep4.filterwidthheight${train_set} +affix=_1a_oracle_${train_set} chain_model_dir=exp/chain/cnn_chainali_1a_train_sup #ali_dir=exp/chain/chainali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index d21e1abc779..e940b8e7f52 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -200,5 +200,5 @@ if [ $stage -le 14 ]; then --sup-lat-dir exp/chain/chainali_${train_set}_lats \ --sup-tree-dir exp/chain/tree_chainali_${train_set} \ --tdnn-affix _1a_tol1_beam4 \ - --exp-root exp/semisup.unsup40k || exit 1 + --exp-root exp/semisup || exit 1 fi From 705b0b34934bbcaf97ac5676e40669242d8b6667 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 16 Mar 2019 18:14:03 -0400 Subject: [PATCH 15/20] minor change --- .../v1/local/chain/run_cnn_chainali_semisupervised_1a.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 113583debdf..26f8f298803 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -28,7 +28,7 @@ unsup_egs_dir= # Supply this to skip unsupervised egs creation unsup_egs_opts= # Extra options to pass to unsupervised egs creation # Neural network opts xent_regularize=0.1 -tdnn_dim=550 +tdnn_dim=450 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -68,7 +68,7 @@ if [ $stage -le 4 ]; then steps/nnet3/decode_semisup.sh --num-threads 4 --nj 45 --cmd "$cmd" --beam 15 \ --frames-per-chunk 340 \ --acwt 1.0 --post-decode-acwt 10.0 --write-compact false \ - --scoring-opts "--min-lmwt 8 --max-lmwt 8" --word-determinize false \ + --scoring-opts "--min-lmwt 10 --max-lmwt 10" --word-determinize false \ $graphdir data/$unsupervised_set $sup_chain_dir/decode_${unsupervised_set} fi @@ -238,7 +238,6 @@ if [ $stage -le 15 ]; then --chain.left-tolerance 1 \ --chain.right-tolerance 1 \ --chain.xent-regularize $xent_regularize \ - --trainer.srand=0 \ --trainer.optimization.shrink-value=1.0 \ --trainer.num-chunk-per-minibatch=32,16 \ --trainer.optimization.momentum=0.0 \ From 6ceefbbfc051fa2135377d2b43a2d075c1dcec8e Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sun, 17 Mar 2019 08:44:33 -0400 Subject: [PATCH 16/20] varying amt of supervised data --- .../local/chain/run_cnn_chainali_oracle_1a.sh | 4 +-- .../run_cnn_chainali_semisupervised_1a.sh | 4 +-- egs/madcat_ar/v1/run_end2end.sh | 35 +++++++++---------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh index 45fbd5cf3db..744f25e573f 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -164,8 +164,8 @@ if [ $stage -le 7 ]; then --trainer.frames-per-iter 1500000 \ --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 5 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index 26f8f298803..d8805813f1b 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -245,8 +245,8 @@ if [ $stage -le 15 ]; then --trainer.max-param-change=2.0 \ --trainer.num-epochs 4 \ --trainer.dropout-schedule $dropout_schedule \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 5 \ + --trainer.optimization.num-jobs-initial 5 \ + --trainer.optimization.num-jobs-final 8 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --egs.opts="--frames-overlap-per-eg 0 --constrained false" \ diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index e940b8e7f52..89b3335e762 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -118,31 +118,28 @@ fi if [ $stage -le 5 ]; then echo "$0:Preparing supervised and unsupervised data..." - local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full - head -40000 data/train/uttlist.full > data/train/uttlist.40k - utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup - tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k - utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM - - utils/subset_data_dir.sh data/dev 4000 data/train_sup4k - local/get_unique_utterances.py data/train_sup4k/text.old > data/train_sup4k/uttlist - utils/subset_data_dir.sh --utt-list data/train_sup4k/uttlist data/train_sup4k data/train_sup - + #local/get_unique_utterances.py data/train/text.old > data/train/uttlist.full + #head -40000 data/train/uttlist.full > data/train/uttlist.40k + #utils/subset_data_dir.sh --utt-list data/train/uttlist.40k data/train data/train_unsup + #tail +40000 data/train/uttlist.full > data/train/uttlist.tail.80k + #utils/subset_data_dir.sh --utt-list data/train/uttlist.tail.80k data/train data/train_LM + #utils/subset_data_dir.sh data/test 5000 data/test_5k + + utils/subset_data_dir.sh data/dev 8000 data/train_sup8k + local/get_unique_utterances.py data/train_sup8k/text.old > data/train_sup8k/uttlist + utils/subset_data_dir.sh --utt-list data/train_sup8k/uttlist data/train_sup8k data/train_sup local/remove_sup_utts_from_unsup.py data/train_sup/text.old data/train_unsup/text.old > data/local/unsup_uttlist utils/subset_data_dir.sh --utt-list data/local/unsup_uttlist data/train_unsup data/train_unsup_unique - cp data/train/allowed_lengths.txt data/train_unsup_unique/allowed_lengths.txt cp data/dev/allowed_lengths.txt data/train_sup/allowed_lengths.txt - - utils/subset_data_dir.sh data/test 5000 data/test_5k fi -if [ $stage -le 6 ]; then - echo "$0: Estimating a language model for decoding..." - local/train_lm.unsup.sh - utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.train80k.arpa.gz \ - data/local/dict/lexicon.txt data/lang_decode_unsup -fi +#if [ $stage -le 6 ]; then +# echo "$0: Estimating a language model for decoding..." +# local/train_lm.unsup.sh +# utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.train80k.arpa.gz \ +# data/local/dict/lexicon.txt data/lang_decode_unsup +#fi if [ $stage -le 7 ]; then utils/combine_data.sh data/semisup \ From e392de1e5f7a9ca8b3ac4110c0f157b99dcb441d Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sun, 17 Mar 2019 16:34:09 -0400 Subject: [PATCH 17/20] minor change --- .../v1/local/chain/run_flatstart_cnn_1a.sh | 2 +- egs/madcat_ar/v1/run_end2end.sh | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh index 6ee97e1ff16..00a68e3ae26 100755 --- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn_1a.sh @@ -104,7 +104,7 @@ if [ $stage -le 3 ]; then --trainer.num-epochs 4 \ --trainer.optimization.momentum 0 \ --trainer.optimization.num-jobs-initial 2 \ - --trainer.optimization.num-jobs-final 3 \ + --trainer.optimization.num-jobs-final 4 \ --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.shrink-value 1.0 \ diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 89b3335e762..1b2943cb804 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -180,16 +180,8 @@ if [ $stage -le 12 ]; then local/chain/run_cnn_chainali_1a.sh --stage 2 --train-set train_sup fi -# training oracle system -train_set=semisup -if [ $stage -le 13 ]; then - echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." - local/chain/run_cnn_chainali_oracle_1a.sh --train-set semisup --stage 2 -fi - # training semi-supervised system -train_set=train_sup -if [ $stage -le 14 ]; then +if [ $stage -le 13 ]; then local/chain/run_cnn_chainali_semisupervised_1a.sh \ --supervised-set train_sup \ --unsupervised-set train_unsup_unique \ @@ -199,3 +191,10 @@ if [ $stage -le 14 ]; then --tdnn-affix _1a_tol1_beam4 \ --exp-root exp/semisup || exit 1 fi + +# training oracle system +train_set=semisup +if [ $stage -le 14 ]; then + echo "$(date) stage 5: Building a tree and training a regular chain model using the e2e alignments..." + local/chain/run_cnn_chainali_oracle_1a.sh --train-set semisup --stage 2 +fi From 0b66f67c27672408f8c031f627b6fbaec84ac14a Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 27 Apr 2019 17:03:30 -0400 Subject: [PATCH 18/20] local/chain/run_cnn_chainali_semisupervised_1a.sh --- egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh | 2 +- egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index b870f3f5179..6e330300045 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -25,7 +25,7 @@ EOF fi affix=_1a_${train_set} -chain_model_dir=exp/chain/cnn_e2eali${affix} +chain_model_dir=exp/chain/cnn_chainali${affix} ali_dir=exp/chain/e2eali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix} diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh index 744f25e573f..1518409baa3 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_oracle_1a.sh @@ -157,9 +157,9 @@ if [ $stage -le 7 ]; then --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights true \ --egs.dir "$common_egs_dir" \ + --chain.xent-regularize $xent_regularize \ --chain.frame-subsampling-factor 4 \ --chain.alignment-subsampling-factor 1 \ - --chain.xent-regularize $xent_regularize \ --trainer.num-chunk-per-minibatch 32,16 \ --trainer.frames-per-iter 1500000 \ --trainer.num-epochs 4 \ From cddbc723a68c90c05b2a21c77979f15925ac6486 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 27 Apr 2019 17:23:05 -0400 Subject: [PATCH 19/20] minor changes --- .../v1/local/chain/run_cnn_chainali_semisupervised_1a.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh index d8805813f1b..87455fa5a42 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_semisupervised_1a.sh @@ -149,7 +149,7 @@ if [ -z "$sup_egs_dir" ]; then if [ $stage -le 12 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + /export/b0{5,6,7}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage fi mkdir -p $sup_egs_dir/ touch $sup_egs_dir/.nodelete # keep egs around when that run dies. @@ -179,7 +179,7 @@ lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattice # creating numerator supervision lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs # for unsupervised data -tolerance=1 # frame-tolerance for chain training +tolerance=2 # frame-tolerance for chain training unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set if [ -z "$unsup_egs_dir" ]; then @@ -188,7 +188,7 @@ if [ -z "$unsup_egs_dir" ]; then if [ $stage -le 13 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + /export/b0{5,6,7}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage fi mkdir -p $unsup_egs_dir touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. From 91147479e3604eabee453703adceccc0ada62b65 Mon Sep 17 00:00:00 2001 From: aarora8 Date: Sat, 27 Apr 2019 19:52:17 -0400 Subject: [PATCH 20/20] minor change --- egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 6e330300045..b870f3f5179 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -25,7 +25,7 @@ EOF fi affix=_1a_${train_set} -chain_model_dir=exp/chain/cnn_chainali${affix} +chain_model_dir=exp/chain/cnn_e2eali${affix} ali_dir=exp/chain/e2eali_$train_set lat_dir=exp/chain/chainali_${train_set}_lats dir=exp/chain/cnn_chainali${affix}