From ebd97e824de24a1f7b25f386a7a98f73a927697c Mon Sep 17 00:00:00 2001 From: drowe67 Date: Fri, 12 Nov 2010 07:49:46 +0000 Subject: [PATCH] added some post processing to voicing estimator, which has reduced some of the voicing errors and clicky artefacts git-svn-id: https://svn.code.sf.net/p/freetel/code@220 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2/src/c2sim.c | 9 ++++--- codec2/src/codec2.c | 5 ++-- codec2/src/sine.c | 59 ++++++++++++++++++++++++++++++++++++++++++--- codec2/src/sine.h | 2 +- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/codec2/src/c2sim.c b/codec2/src/c2sim.c index 950d1061..4e54aaff 100644 --- a/codec2/src/c2sim.c +++ b/codec2/src/c2sim.c @@ -145,7 +145,7 @@ int main(int argc, char *argv[]) for(i=0; inlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); - c2->prev_Wo = TWO_PI/pitch; model->Wo = TWO_PI/pitch; model->L = PI/model->Wo; @@ -338,5 +337,7 @@ void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[]) dft_speech(Sw, c2->Sn, c2->w); two_stage_pitch_refinement(model, Sw); estimate_amplitudes(model, Sw, c2->W); - est_voicing_mbe(model, Sw, c2->W, Sw_, Ew); + est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo); + + c2->prev_Wo = model->Wo; } diff --git a/codec2/src/sine.c b/codec2/src/sine.c index 9a07a4c4..f58882ee 100644 --- a/codec2/src/sine.c +++ b/codec2/src/sine.c @@ -363,8 +363,8 @@ float est_voicing_mbe( COMP W[], COMP Sw_[], /* DFT of all voiced synthesised signal */ /* useful for debugging/dump file */ - COMP Ew[] /* DFT of error */ -) + COMP Ew[], /* DFT of error */ + float prev_Wo) { int i,l,al,bl,m; /* loop variables */ COMP Am; /* amplitude sample for this band */ @@ -373,6 +373,8 @@ float est_voicing_mbe( float error; /* accumulated error between originl and synthesised */ float Wo; float sig, snr; + float elow, ehigh, eratio; + float dF0, sixty; sig = 0.0; for(l=1; l<=model->L/4; l++) { @@ -427,7 +429,58 @@ float est_voicing_mbe( model->voiced = 1; else model->voiced = 0; - + + /* post processing, helps clean up some voicing errors ---------------------*/ + + /* + Determine the ratio of low freancy to high frequency energy, + voiced speech tends to be dominated by low frequency energy, + unvoiced by high frequency. This measure can be used to + determine if we have made any gross errors. + */ + + elow = ehigh = 0.0; + for(l=1; l<=model->L/2; l++) { + elow += model->A[l]*model->A[l]; + } + for(l=model->L/2; l<=model->L; l++) { + ehigh += model->A[l]*model->A[l]; + } + eratio = 10.0*log10(elow/ehigh); + dF0 = 0.0; + + /* Look for Type 1 errors, strongly V speech that has been + accidentally declared UV */ + + if (model->voiced == 0) + if (eratio > 10.0) + model->voiced = 1; + + /* Look for Type 2 errors, strongly UV speech that has been + accidentally declared V */ + + if (model->voiced == 1) { + if (eratio < -10.0) + model->voiced = 0; + + /* If pitch is jumping about it's likely this is UV */ + + dF0 = (model->Wo - prev_Wo)*FS/TWO_PI; + if (fabs(dF0) > 15.0) + model->voiced = 0; + + /* A common source of Type 2 errors is the pitch estimator + gives a low (50Hz) estimate for UV speech, which gives a + good match with noise due to the close harmoonic spacing. + These errors are much more common than people with 50Hz + pitch, so we have just a small eratio threshold. */ + + sixty = 60.0*TWO_PI/FS; + if ((eratio < -4.0) && (model->Wo <= sixty)) + model->voiced = 0; + } + printf(" v: %d snr: %f eratio: %3.2f %f\n", model->voiced, snr, eratio, dF0); + return snr; } diff --git a/codec2/src/sine.h b/codec2/src/sine.h index 73d928fd..88eee37f 100644 --- a/codec2/src/sine.h +++ b/codec2/src/sine.h @@ -35,7 +35,7 @@ void make_analysis_window(float w[], COMP W[]); void dft_speech(COMP Sw[], float Sn[], float w[]); void two_stage_pitch_refinement(MODEL *model, COMP Sw[]); void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]); -float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], COMP Sw_[],COMP Ew[]); +float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], COMP Sw_[],COMP Ew[], float prev_Wo); void make_synthesis_window(float Pn[]); void synthesise(float Sn_[], MODEL *model, float Pn[], int shift); -- 2.25.1