From: drowe67 Date: Thu, 24 Sep 2009 02:48:43 +0000 (+0000) Subject: added jitter to zero phase model, helps with mmmt1 and hts1 clicky artifact X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=dacf7c5570dbbebff0afec6ca806610fb55c375d;p=freetel-svn-tracking.git added jitter to zero phase model, helps with mmmt1 and hts1 clicky artifact git-svn-id: https://svn.code.sf.net/p/freetel/code@63 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/src/code.sh b/codec2/src/code.sh index 9ab6b0a6..67af13c7 100644 --- a/codec2/src/code.sh +++ b/codec2/src/code.sh @@ -5,8 +5,10 @@ # Run steps to encode a speech sample ../unittest/tnlp ../raw/$1.raw ../unittest/$1_nlp.p -../src/sinenc ../raw/$1.raw %1.mdl 300 ../unittest/$1_nlp.p -../src/sinedec ../raw/$1.raw %1.mdl -o $1_uq.raw -../src/sinedec ../raw/$1.raw %1.mdl --phase 0 -o $1_phase0.raw --postfilter -../src/sinedec ../raw/$1.raw %1.mdl --lpc 10 -o $1_lpc10.raw +../src/sinenc ../raw/$1.raw $1.mdl 300 ../unittest/$1_nlp.p +../src/sinedec ../raw/$1.raw $1.mdl -o $1_uq.raw +../src/sinedec ../raw/$1.raw $1.mdl --phase 0 -o $1_phase0.raw --postfilter +../src/sinedec ../raw/$1.raw $1.mdl --lpc 10 -o $1_lpc10.raw +../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 -o $1_phase0_lpc10.raw --postfilter +../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 --lsp 37 -o $1_lsp.raw --postfilter diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh index 670b191d..3d9e08d3 100755 --- a/codec2/src/listen.sh +++ b/codec2/src/listen.sh @@ -4,6 +4,7 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3 + diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh index c609b189..82e65668 100755 --- a/codec2/src/listen1.sh +++ b/codec2/src/listen1.sh @@ -4,6 +4,6 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw ../raw/$1_speex_8k.raw -d /dev/dsp1 +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1 diff --git a/codec2/src/phase.c b/codec2/src/phase.c index e13e99c6..2b59f7f7 100644 --- a/codec2/src/phase.c +++ b/codec2/src/phase.c @@ -329,10 +329,19 @@ void phase_synth_zero_order( if (Lrand > model.L) Lrand = model.L; } - /* update excitation fundamental phase track */ + /* update excitation fundamental phase track, this sets + the position of each pitch pulse during voiced speech */ ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0; ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5); + + /* After much experimentation I found that a few percent of jitter + was effective in reducing "clicky" artifact in hts1 and mmt1. The + peaks level of the synthesised speech was reduced to levels closer + to the orginal speech as well.*/ + + ex_phase[0] += 0.05*TWO_PI*(0.5 - (float)rand()/RAND_MAX); + *prev_Wo = model.Wo; /* now modify this frames phase using zero phase model */ @@ -342,12 +351,17 @@ void phase_synth_zero_order( /* generate excitation */ if (m <= Lrand) { - Ex[m].real = cos(ex_phase[0]*m); + Ex[m].real = cos(ex_phase[0]*m); Ex[m].imag = sin(ex_phase[0]*m); - /* following is an experiment in dispersing pulse energy over time, - didn't really change sound at all, e.g. mmt1 still sounded - "clicky"*/ + /* following is an experiment in dispersing pulse energy over + time, didn't really change sound at all, e.g. mmt1 still + sounded "clicky. I think this is because this provides + just a small phase shift between adjacent harmonics. + However for voiced speech it is the high energy harmonics + that form pitch pulses, so we need a relatively high phase + shift between them to disperse pulse energy */ + //Ex[m].real = cos(ex_phase[0]*m + model.Wo*m*m*0.3); //Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3); @@ -355,7 +369,9 @@ void phase_synth_zero_order( (see octave/glottal.m) in an attempt io make mmt1 and hts1 a little less "clicky", i.e. disperse the pusle energy away from the point of onset. Result was no difference in speech quality, in fact - no difference at all. Could be an implementation error I guess. */ + no difference at all. Could be an implementation error I guess. + One again - this model doesnt change phases much between adjacent + harmonics, so not much dispersion. */ //b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5); //Ex[m].real = cos(ex_phase[0]*m + glottal[b]); //Ex[m].imag = sin(ex_phase[0]*m + glottal[b]); diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c index d855412f..700013c3 100644 --- a/codec2/src/quantise.c +++ b/codec2/src/quantise.c @@ -37,7 +37,7 @@ #define MAX_ORDER 20 #define LPC_FLOOR 0.0002 /* autocorrelation floor */ -#define LSP_DELTA1 0.2 /* grid spacing for LSP root searches */ +#define LSP_DELTA1 0.05 /* grid spacing for LSP root searches */ /* Speex lag window */ @@ -267,8 +267,9 @@ float lpc_model_amplitudes( E += ak[i]*R[i]; if (lsp_quantisation) { - roots = lpc_to_lsp(&ak[1], order, lsp, 10, LSP_DELTA1, NULL); - + roots = lpc_to_lsp(&ak[1], order, lsp, 5, LSP_DELTA1, NULL); + if (roots != order) + printf("LSP roots not found\n"); index = quantise(cb12, &lsp[0], LSP_12_K, LSP_12_M, &se); lsp[0] = cb12[index*LSP_12_K+0]; lsp[1] = cb12[index*LSP_12_K+1]; diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c index d25bcf78..3dab7a59 100644 --- a/codec2/src/sinedec.c +++ b/codec2/src/sinedec.c @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) int lpc_model, order; int lsp, lsp_quantiser; float ak[LPC_MAX_ORDER+1]; - + int dump; int phase, phase_model; @@ -92,6 +92,7 @@ int main(int argc, char *argv[]) int postfilt; float bg_est; + if (argc < 3) { printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n"); printf(" [--dump DumpFilePrefix]\n"); @@ -163,6 +164,7 @@ int main(int argc, char *argv[]) if (phase) { phase_model = atoi(argv[phase+1]); assert((phase_model == 0) || (phase_model == 1)); + ex_phase = 0; } bg_est = 0.0; @@ -232,8 +234,8 @@ int main(int argc, char *argv[]) dump_snr(snr); if (phase_model == 0) { /* just to make sure we are not cheating - kill all phases */ - //for(i=0; i