From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Sun, 13 Sep 2009 01:44:55 +0000 (+0000)
Subject: added postfilter which improves zero phase model performance with bg noise, e.g mmt1_... 
X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=f8ec49b59f36e14e8c1addd1dcfe4bcd0b94fafb;p=freetel-svn-tracking.git

added postfilter which improves zero phase model performance with bg noise, e.g mmt1_phase0 now sounds closer to mmt_uq

git-svn-id: https://svn.code.sf.net/p/freetel/code@60 01035d8c-6547-0410-b346-abe4f91aad63
---

diff --git a/codec2/src/Makefile b/codec2/src/Makefile
index 467adef8..f7a49d86 100644
--- a/codec2/src/Makefile
+++ b/codec2/src/Makefile
@@ -5,7 +5,7 @@ SINENC_OBJ  = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o
 SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \
               quantise.o lpc.o dump.o refine.o ../speex/lsp.o  \
               ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \
-              ../speex/high_lsp_tables.o phase.o
+              ../speex/high_lsp_tables.o phase.o postfilter.o
 
 all: sinenc sinedec
 
diff --git a/codec2/src/code.sh b/codec2/src/code.sh
index 1a37c417..7be303c5 100644
--- a/codec2/src/code.sh
+++ b/codec2/src/code.sh
@@ -8,4 +8,5 @@
 ../src/sinenc ../raw/$1.raw %1.mdl 300 ../unittest/$1_nlp.p
 ../src/sinedec ../raw/$1.raw %1.mdl -o $1_uq.raw
 ../src/sinedec ../raw/$1.raw %1.mdl --phase 0 -o $1_phase0.raw
+../src/sinedec ../raw/$1.raw %1.mdl --lpc 10 -o $1_lpc10.raw
 
diff --git a/codec2/src/dump.c b/codec2/src/dump.c
index 9e5ad14c..0f66ae27 100644
--- a/codec2/src/dump.c
+++ b/codec2/src/dump.c
@@ -47,6 +47,7 @@ static FILE *fsq = NULL;
 static FILE *fdec = NULL;
 static FILE *fsnr = NULL;
 static FILE *fak = NULL;
+static FILE *fbg = NULL;
 
 static char  prefix[MAX_STR];
 
@@ -86,6 +87,8 @@ void dump_off(){
 	fclose(fsnr);
     if (fak != NULL)
 	fclose(fak);
+    if (fbg != NULL)
+	fclose(fbg);
 }
 
 void dump_Sn(float Sn[]) {
@@ -368,4 +371,20 @@ void dump_dec(COMP Fw[]) {
     fprintf(fdec,"\n");    
 }
 
+void dump_bg(float e, float bg_est, float percent_uv) {
+    char s[MAX_STR];
+
+    if (!dumpon) return;
+
+    if (fbg == NULL) {
+	sprintf(s,"%s_bg.txt", prefix);
+	fbg = fopen(s, "wt");
+	assert(fbg != NULL);
+    }
+
+    fprintf(fbg,"%f\t%f\t%f\n", e, bg_est, percent_uv);    
+}
+
+
+
 
diff --git a/codec2/src/dump.h b/codec2/src/dump.h
index 59ccb348..74c20dac 100644
--- a/codec2/src/dump.h
+++ b/codec2/src/dump.h
@@ -31,9 +31,13 @@
 
 void dump_on(char filename_prefix[]);
 void dump_off();
+
 void dump_Sn(float Sn[]);
 void dump_Sw(COMP Sw[]);
 void dump_Sw_(COMP Sw_[]);
+
+/* amplitude modelling */
+
 void dump_model(MODEL *m);
 void dump_quantised_model(MODEL *m);
 void dump_Pw(COMP Pw[]);
@@ -53,4 +57,8 @@ void dump_dec(COMP Fw[]);
 void dump_Fw(COMP Fw[]);
 void dump_e(float e_hz[]);
 
+/* post filter */
+
+void dump_bg(float e, float bg_est, float percent_uv);
+
 #endif
diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh
index 54c2db1d..670b191d 100755
--- a/codec2/src/listen.sh
+++ b/codec2/src/listen.sh
@@ -4,6 +4,6 @@
 #
 # Run menu with common sample file options, headphone version
 
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw
 
 
diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh
index 281e68ea..a9b156ce 100755
--- a/codec2/src/listen1.sh
+++ b/codec2/src/listen1.sh
@@ -4,6 +4,6 @@
 #
 # Run menu with common sample file options, headphone version
 
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw -d /dev/dsp1
 
 
diff --git a/codec2/src/phase.c b/codec2/src/phase.c
index 80016f93..25dcf946 100644
--- a/codec2/src/phase.c
+++ b/codec2/src/phase.c
@@ -328,7 +328,7 @@ void phase_synth_zero_order(
     if (Lrand < 1) Lrand = 1;
     if (Lrand > model.L) Lrand = model.L;
   }
-
+  
   /* update excitation fundamental phase track */
 
   ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0;
@@ -342,6 +342,7 @@ void phase_synth_zero_order(
     /* generate excitation */
 
     if (m <= Lrand) {
+	b = floor(m*model.Wo*FFT_DEC/TWO_PI + 0.5);
         Ex[m].real = cos(ex_phase[0]*m);
 	Ex[m].imag = sin(ex_phase[0]*m);
 
@@ -350,6 +351,16 @@ void phase_synth_zero_order(
 	   "clicky"*/
         //Ex[m].real = cos(ex_phase[0]*m + model.Wo*m*m*0.3);
 	//Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3);
+
+	/* following is an experiment to use the phase of a glottal pulse
+	   (see octave/glottal.m) in an attempt io make mmt1 and hts1 a little
+	   less "clicky", i.e. disperse the pusle energy away from the point
+	   of onset.  Result was no difference in speech quality, in fact
+	   no difference at all. Could be an implementation error I guess. */
+	//b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5);
+        //Ex[m].real = cos(ex_phase[0]*m + glottal[b]);
+	//Ex[m].imag = sin(ex_phase[0]*m + glottal[b]);
+	   
     }
     else {
 	/* we probably don't need to LPC filter phase in unvoiced case,
diff --git a/codec2/src/postfilter.c b/codec2/src/postfilter.c
new file mode 100644
index 00000000..6ddfceb0
--- /dev/null
+++ b/codec2/src/postfilter.c
@@ -0,0 +1,130 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: postfilter.c
+  AUTHOR......: David Rowe                                                          
+  DATE CREATED: 13/09/09
+                                                                             
+  Postfilter to improve sound quality for speech with high levels of
+  background noise.  Unlike mixed-excitation models requires no bits
+  to be transmitted to handle background noise.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "postfilter.h"
+#include "dump.h"
+
+/*---------------------------------------------------------------------------*\
+
+                                DEFINES
+
+\*---------------------------------------------------------------------------*/
+
+#define BG_THRESH 40.0     /* only consider low levels signals for bg_est */
+#define BG_BETA    0.1     /* averaging filter constant                   */
+
+/*---------------------------------------------------------------------------*\
+
+  postfilter()
+
+  The post filter is designed to help with speech corrupted by
+  background noise.  The zero phase model tends to make speech with
+  background noise sound "clicky".  With high levels of background
+  noise the low level inter-formant parts of the spectrum will contain
+  noise rather than speech harmonics, so modelling them as voiced
+  (i.e. a continuous, non-random phase track) is inaccurate.
+
+  Some codecs (like MBE) have a mixed voicing model that breaks the
+  spectrum into voiced and unvoiced regions.  Several bits/frame
+  (5-12) are required to transmit the frequency selective voicing
+  information.  Mixed excitation also requires accurate voicing
+  estimation (parameter estimators always break occasionally under
+  exceptional condition).
+
+  In our case we use a post filter approach which requires no
+  additional bits to be transmitted.  The decoder measures the average
+  level of the background noise during unvoiced frames.  If a harmonic
+  is less than this level it is made unvoiced by randomising it's
+  phases.
+
+  This idea is rather experimental.  Some potential problems that may
+  happen:
+ 
+  1/ If someone says "aaaaaaaahhhhhhhhh" willl background estimator track
+     up to speech level?  This would be a bad thing.
+
+  2/ If background noise suddenly dissapears from the source speech does
+     estimate drop quickly?  What is noise suddenly re-appears?
+
+  3/ Background noise with a non-flat sepctrum.  Current algorithm just
+     comsiders scpetrum as a whole, but this could be broken up into
+     bands, each with their own estimator.
+
+  4/ Males and females with the same level of background noise.  Check
+     performance the same.  Changing Wo affects width of each band, may
+     affect bg energy estimates.
+
+  5/ Not sure what happens during long periods of voiced speech
+     e.g. "sshhhhhhh"
+  
+\*---------------------------------------------------------------------------*/
+
+void postfilter(
+  MODEL *model,
+  int    voiced, 
+  float *bg_est
+)	
+{
+  int   m, uv;
+  float e;
+
+  /* determine average energy across spectrum */
+
+  e = 0.0;
+  for(m=1; m<=model->L; m++)
+      e += model->A[m]*model->A[m];
+
+  e = 10.0*log10(e/model->L);
+
+  /* If beneath threhold, update bg estimate.  The idea
+     of the threshold is to prevent updating during high level
+     speech. */
+
+  if ((e < BG_THRESH) && !voiced)
+      *bg_est =  *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
+
+  /* now mess with phases during voiced frames to make any harmonics
+     less then our background estimate unvoiced.
+  */
+
+  uv = 0;
+  if (voiced)
+      for(m=1; m<=model->L; m++)
+	  if (20.0*log10(model->A[m]) < *bg_est) {
+	      model->phi[m] = TWO_PI*(float)rand()/RAND_MAX;
+	      uv++;
+	  }
+
+  dump_bg(e, *bg_est, 100.0*uv/model->L);
+
+}
diff --git a/codec2/src/postfilter.h b/codec2/src/postfilter.h
new file mode 100644
index 00000000..9f7555c9
--- /dev/null
+++ b/codec2/src/postfilter.h
@@ -0,0 +1,36 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: postfilter.h
+  AUTHOR......: David Rowe                                                          
+  DATE CREATED: 13/09/09
+                                                                             
+  Postfilter header file.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __POSTFILTER__
+#define __POSTFILTER__
+
+#include "sine.h"
+
+void postfilter(MODEL *model, int voiced, float *bg_est);
+
+#endif
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c
index a39e96b9..d25bcf78 100644
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -34,6 +34,7 @@
 #include "phase.h"
 #include "lpc.h"
 #include "synth.h"
+#include "postfilter.h"
 
 /*---------------------------------------------------------------------------*\
                                                                              
@@ -88,6 +89,9 @@ int main(int argc, char *argv[])
   int phase, phase_model;
   float prev_Wo, ex_phase;
 
+  int   postfilt;
+  float bg_est;
+
   if (argc < 3) {
     printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
     printf("       [--dump DumpFilePrefix]\n");
@@ -161,6 +165,9 @@ int main(int argc, char *argv[])
       assert((phase_model == 0) || (phase_model == 1));
   }
 
+  bg_est = 0.0;
+  postfilt = switch_present("--postfilter",argc,argv);
+
   /* Initialise ------------------------------------------------------------*/
 
   init_decoder();
@@ -225,8 +232,8 @@ int main(int argc, char *argv[])
 	dump_snr(snr);
 	if (phase_model == 0) {
 	    /* just to make sure we are not cheating - kill all phases */
-	    for(i=0; i<MAX_AMP; i++)
-	    	model.phi[i] = 0;
+	    //for(i=0; i<MAX_AMP; i++)
+	    //	model.phi[i] = 0;
 	    phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
 	}
 
@@ -236,6 +243,10 @@ int main(int argc, char *argv[])
         }
     }
 
+    if (postfilt)
+	postfilter(&model, snr>2.0, &bg_est);
+
+
     /* Synthesise speech */
 
     if (fout != NULL) {
diff --git a/codec2/src/spec.c b/codec2/src/spec.c
index fbad779c..37fbbc5b 100644
--- a/codec2/src/spec.c
+++ b/codec2/src/spec.c
@@ -85,7 +85,8 @@ void estimate_amplitudes()
 
     model.phi[m] = atan2(Sw[b].imag,Sw[b].real);
 
-    /* construct all voiced model spectrum and estimate voicing */
+    #ifdef MBE_VOICING_NEEDED
+    /* construct all voiced model spectrum and estimate voicing using MBE model */
 
     E = 0.0;
     for(i=am; i<bm; i++) {
@@ -95,6 +96,7 @@ void estimate_amplitudes()
       E = pow(Sw[i].real - Sw_[i].real, 2.0) + pow(Sw[i].imag - Sw_[i].imag, 2.0);
     }
     model.v[m] = E/den;
+    #endif
   }
 }
 
diff --git a/codec2/src/synth.c b/codec2/src/synth.c
index e9affb45..9f2d8c66 100644
--- a/codec2/src/synth.c
+++ b/codec2/src/synth.c
@@ -95,7 +95,13 @@ void synthesise_mixed(
   track. So in unvoiced frames or in cases where the fundamental
   frequency varies by more that 20%, we don't add the small frequency
   offset.
-  
+
+  Result: when tested was no difference in output speech quality.  The
+  partial unvoiced sound when using zero phase model was found to be
+  due mis-laignment of teh LPC analysis window and accidental addition
+  of a random phase component.  So we are sticking with synthesise_mixed()
+  above for now.
+
 \*---------------------------------------------------------------------------*/
 
 void synthesise_continuous_phase(