From: drowe67 Date: Sun, 13 Sep 2009 01:44:55 +0000 (+0000) Subject: added postfilter which improves zero phase model performance with bg noise, e.g mmt1_... X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=f8ec49b59f36e14e8c1addd1dcfe4bcd0b94fafb;p=freetel-svn-tracking.git added postfilter which improves zero phase model performance with bg noise, e.g mmt1_phase0 now sounds closer to mmt_uq git-svn-id: https://svn.code.sf.net/p/freetel/code@60 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/src/Makefile b/codec2/src/Makefile index 467adef8..f7a49d86 100644 --- a/codec2/src/Makefile +++ b/codec2/src/Makefile @@ -5,7 +5,7 @@ SINENC_OBJ = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \ quantise.o lpc.o dump.o refine.o ../speex/lsp.o \ ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \ - ../speex/high_lsp_tables.o phase.o + ../speex/high_lsp_tables.o phase.o postfilter.o all: sinenc sinedec diff --git a/codec2/src/code.sh b/codec2/src/code.sh index 1a37c417..7be303c5 100644 --- a/codec2/src/code.sh +++ b/codec2/src/code.sh @@ -8,4 +8,5 @@ ../src/sinenc ../raw/$1.raw %1.mdl 300 ../unittest/$1_nlp.p ../src/sinedec ../raw/$1.raw %1.mdl -o $1_uq.raw ../src/sinedec ../raw/$1.raw %1.mdl --phase 0 -o $1_phase0.raw +../src/sinedec ../raw/$1.raw %1.mdl --lpc 10 -o $1_lpc10.raw diff --git a/codec2/src/dump.c b/codec2/src/dump.c index 9e5ad14c..0f66ae27 100644 --- a/codec2/src/dump.c +++ b/codec2/src/dump.c @@ -47,6 +47,7 @@ static FILE *fsq = NULL; static FILE *fdec = NULL; static FILE *fsnr = NULL; static FILE *fak = NULL; +static FILE *fbg = NULL; static char prefix[MAX_STR]; @@ -86,6 +87,8 @@ void dump_off(){ fclose(fsnr); if (fak != NULL) fclose(fak); + if (fbg != NULL) + fclose(fbg); } void dump_Sn(float Sn[]) { @@ -368,4 +371,20 @@ void dump_dec(COMP Fw[]) { fprintf(fdec,"\n"); } +void dump_bg(float e, float bg_est, float percent_uv) { + char s[MAX_STR]; + + if (!dumpon) return; + + if (fbg == NULL) { + sprintf(s,"%s_bg.txt", prefix); + fbg = fopen(s, "wt"); + assert(fbg != NULL); + } + + fprintf(fbg,"%f\t%f\t%f\n", e, bg_est, percent_uv); +} + + + diff --git a/codec2/src/dump.h b/codec2/src/dump.h index 59ccb348..74c20dac 100644 --- a/codec2/src/dump.h +++ b/codec2/src/dump.h @@ -31,9 +31,13 @@ void dump_on(char filename_prefix[]); void dump_off(); + void dump_Sn(float Sn[]); void dump_Sw(COMP Sw[]); void dump_Sw_(COMP Sw_[]); + +/* amplitude modelling */ + void dump_model(MODEL *m); void dump_quantised_model(MODEL *m); void dump_Pw(COMP Pw[]); @@ -53,4 +57,8 @@ void dump_dec(COMP Fw[]); void dump_Fw(COMP Fw[]); void dump_e(float e_hz[]); +/* post filter */ + +void dump_bg(float e, float bg_est, float percent_uv); + #endif diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh index 54c2db1d..670b191d 100755 --- a/codec2/src/listen.sh +++ b/codec2/src/listen.sh @@ -4,6 +4,6 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh index 281e68ea..a9b156ce 100755 --- a/codec2/src/listen1.sh +++ b/codec2/src/listen1.sh @@ -4,6 +4,6 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw -d /dev/dsp1 +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw -d /dev/dsp1 diff --git a/codec2/src/phase.c b/codec2/src/phase.c index 80016f93..25dcf946 100644 --- a/codec2/src/phase.c +++ b/codec2/src/phase.c @@ -328,7 +328,7 @@ void phase_synth_zero_order( if (Lrand < 1) Lrand = 1; if (Lrand > model.L) Lrand = model.L; } - + /* update excitation fundamental phase track */ ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0; @@ -342,6 +342,7 @@ void phase_synth_zero_order( /* generate excitation */ if (m <= Lrand) { + b = floor(m*model.Wo*FFT_DEC/TWO_PI + 0.5); Ex[m].real = cos(ex_phase[0]*m); Ex[m].imag = sin(ex_phase[0]*m); @@ -350,6 +351,16 @@ void phase_synth_zero_order( "clicky"*/ //Ex[m].real = cos(ex_phase[0]*m + model.Wo*m*m*0.3); //Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3); + + /* following is an experiment to use the phase of a glottal pulse + (see octave/glottal.m) in an attempt io make mmt1 and hts1 a little + less "clicky", i.e. disperse the pusle energy away from the point + of onset. Result was no difference in speech quality, in fact + no difference at all. Could be an implementation error I guess. */ + //b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5); + //Ex[m].real = cos(ex_phase[0]*m + glottal[b]); + //Ex[m].imag = sin(ex_phase[0]*m + glottal[b]); + } else { /* we probably don't need to LPC filter phase in unvoiced case, diff --git a/codec2/src/postfilter.c b/codec2/src/postfilter.c new file mode 100644 index 00000000..6ddfceb0 --- /dev/null +++ b/codec2/src/postfilter.c @@ -0,0 +1,130 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: postfilter.c + AUTHOR......: David Rowe + DATE CREATED: 13/09/09 + + Postfilter to improve sound quality for speech with high levels of + background noise. Unlike mixed-excitation models requires no bits + to be transmitted to handle background noise. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include +#include +#include +#include "postfilter.h" +#include "dump.h" + +/*---------------------------------------------------------------------------*\ + + DEFINES + +\*---------------------------------------------------------------------------*/ + +#define BG_THRESH 40.0 /* only consider low levels signals for bg_est */ +#define BG_BETA 0.1 /* averaging filter constant */ + +/*---------------------------------------------------------------------------*\ + + postfilter() + + The post filter is designed to help with speech corrupted by + background noise. The zero phase model tends to make speech with + background noise sound "clicky". With high levels of background + noise the low level inter-formant parts of the spectrum will contain + noise rather than speech harmonics, so modelling them as voiced + (i.e. a continuous, non-random phase track) is inaccurate. + + Some codecs (like MBE) have a mixed voicing model that breaks the + spectrum into voiced and unvoiced regions. Several bits/frame + (5-12) are required to transmit the frequency selective voicing + information. Mixed excitation also requires accurate voicing + estimation (parameter estimators always break occasionally under + exceptional condition). + + In our case we use a post filter approach which requires no + additional bits to be transmitted. The decoder measures the average + level of the background noise during unvoiced frames. If a harmonic + is less than this level it is made unvoiced by randomising it's + phases. + + This idea is rather experimental. Some potential problems that may + happen: + + 1/ If someone says "aaaaaaaahhhhhhhhh" willl background estimator track + up to speech level? This would be a bad thing. + + 2/ If background noise suddenly dissapears from the source speech does + estimate drop quickly? What is noise suddenly re-appears? + + 3/ Background noise with a non-flat sepctrum. Current algorithm just + comsiders scpetrum as a whole, but this could be broken up into + bands, each with their own estimator. + + 4/ Males and females with the same level of background noise. Check + performance the same. Changing Wo affects width of each band, may + affect bg energy estimates. + + 5/ Not sure what happens during long periods of voiced speech + e.g. "sshhhhhhh" + +\*---------------------------------------------------------------------------*/ + +void postfilter( + MODEL *model, + int voiced, + float *bg_est +) +{ + int m, uv; + float e; + + /* determine average energy across spectrum */ + + e = 0.0; + for(m=1; m<=model->L; m++) + e += model->A[m]*model->A[m]; + + e = 10.0*log10(e/model->L); + + /* If beneath threhold, update bg estimate. The idea + of the threshold is to prevent updating during high level + speech. */ + + if ((e < BG_THRESH) && !voiced) + *bg_est = *bg_est*(1.0 - BG_BETA) + e*BG_BETA; + + /* now mess with phases during voiced frames to make any harmonics + less then our background estimate unvoiced. + */ + + uv = 0; + if (voiced) + for(m=1; m<=model->L; m++) + if (20.0*log10(model->A[m]) < *bg_est) { + model->phi[m] = TWO_PI*(float)rand()/RAND_MAX; + uv++; + } + + dump_bg(e, *bg_est, 100.0*uv/model->L); + +} diff --git a/codec2/src/postfilter.h b/codec2/src/postfilter.h new file mode 100644 index 00000000..9f7555c9 --- /dev/null +++ b/codec2/src/postfilter.h @@ -0,0 +1,36 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: postfilter.h + AUTHOR......: David Rowe + DATE CREATED: 13/09/09 + + Postfilter header file. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __POSTFILTER__ +#define __POSTFILTER__ + +#include "sine.h" + +void postfilter(MODEL *model, int voiced, float *bg_est); + +#endif diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c index a39e96b9..d25bcf78 100644 --- a/codec2/src/sinedec.c +++ b/codec2/src/sinedec.c @@ -34,6 +34,7 @@ #include "phase.h" #include "lpc.h" #include "synth.h" +#include "postfilter.h" /*---------------------------------------------------------------------------*\ @@ -88,6 +89,9 @@ int main(int argc, char *argv[]) int phase, phase_model; float prev_Wo, ex_phase; + int postfilt; + float bg_est; + if (argc < 3) { printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n"); printf(" [--dump DumpFilePrefix]\n"); @@ -161,6 +165,9 @@ int main(int argc, char *argv[]) assert((phase_model == 0) || (phase_model == 1)); } + bg_est = 0.0; + postfilt = switch_present("--postfilter",argc,argv); + /* Initialise ------------------------------------------------------------*/ init_decoder(); @@ -225,8 +232,8 @@ int main(int argc, char *argv[]) dump_snr(snr); if (phase_model == 0) { /* just to make sure we are not cheating - kill all phases */ - for(i=0; i2.0, &bg_est); + + /* Synthesise speech */ if (fout != NULL) { diff --git a/codec2/src/spec.c b/codec2/src/spec.c index fbad779c..37fbbc5b 100644 --- a/codec2/src/spec.c +++ b/codec2/src/spec.c @@ -85,7 +85,8 @@ void estimate_amplitudes() model.phi[m] = atan2(Sw[b].imag,Sw[b].real); - /* construct all voiced model spectrum and estimate voicing */ + #ifdef MBE_VOICING_NEEDED + /* construct all voiced model spectrum and estimate voicing using MBE model */ E = 0.0; for(i=am; i