From 3e4532e3466bafefe6ef9213ba8df301739f222c Mon Sep 17 00:00:00 2001
From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Sun, 4 Oct 2009 01:10:29 +0000
Subject: [PATCH] changed interpolation model in zero phase model, improves
 unvoiced sounds

git-svn-id: https://svn.code.sf.net/p/freetel/code@68 01035d8c-6547-0410-b346-abe4f91aad63
---
 codec2/src/phase.c   | 45 ++++++++++++++++++++------------------------
 codec2/src/phase.h   |  2 +-
 codec2/src/sinedec.c | 29 +++++++++++++++++++---------
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/codec2/src/phase.c b/codec2/src/phase.c
index 2b59f7f7..2fdef26b 100644
--- a/codec2/src/phase.c
+++ b/codec2/src/phase.c
@@ -261,11 +261,9 @@ float phase_model_first_order(
    first harmonic would advance (pi/20)*80 = 4*pi or two complete
    cycles.
 
-   One complication is that two adjacent frames will have different
-   Wo, so we take the average of the two frames to track the
-   excitation phase of the fundamental (first harmonic):
+   We track the excitation phase of the fundamental (first harmonic):
 
-     arg[E[1]] = ((Wo + prev_Wo)/2)*N;
+     arg[E[1]] = Wo*N;
 
    We then relate the phase of the m-th excitation harmonic to the
    phase of the fundamental as:
@@ -299,7 +297,6 @@ float phase_model_first_order(
 void phase_synth_zero_order(
   float  snr,     /* SNR from first order model                */
   COMP   H[],     /* LPC spectra samples                       */
-  float *prev_Wo, /* last frames Wo (we will update this here) */
   float *ex_phase /* excitation phase of fundamental           */
 )
 {
@@ -325,24 +322,21 @@ void phase_synth_zero_order(
   Lrand = model.L;
   if (snr < VTHRESH2) {
     Lrand = floor(model.L*(snr-VTHRESH1)/(VTHRESH2-VTHRESH1));
-    if (Lrand < 1) Lrand = 1;
+    if (Lrand < 1) Lrand = 0;
     if (Lrand > model.L) Lrand = model.L;
   }
-  
-  /* update excitation fundamental phase track, this sets
-     the position of each pitch pulse during voiced speech */
 
-  ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0;
-  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
-
-  /* After much experimentation I found that a few percent of jitter
-     was effective in reducing "clicky" artifact in hts1 and mmt1. The
-     peaks level of the synthesised speech was reduced to levels closer
-     to the orginal speech as well.*/
+  /* 
+     Update excitation fundamental phase track, this sets the position
+     of each pitch pulse during voiced speech.  After much experiment
+     I found that using just this frame Wo improved quality for UV
+     sounds compared to interpolating two frames Wo like this:
+     
+     ex_phase[0] += (*prev_Wo+model.Wo)*N/2;
+  */
 
-  ex_phase[0] += 0.05*TWO_PI*(0.5 - (float)rand()/RAND_MAX);
-  
-  *prev_Wo = model.Wo;
+  ex_phase[0] += (model.Wo)*N;
+  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
 
   /* now modify this frames phase using zero phase model */
 
@@ -352,7 +346,7 @@ void phase_synth_zero_order(
 
     if (m <= Lrand) {
 	Ex[m].real = cos(ex_phase[0]*m);
-	Ex[m].imag = sin(ex_phase[0]*m);
+        Ex[m].imag = sin(ex_phase[0]*m);
 
 	/* following is an experiment in dispersing pulse energy over
 	   time, didn't really change sound at all, e.g. mmt1 still
@@ -366,7 +360,7 @@ void phase_synth_zero_order(
 	//Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3);
 
 	/* following is an experiment to use the phase of a glottal pulse
-	   (see octave/glottal.m) in an attempt io make mmt1 and hts1 a little
+	   (see octave/glottal.m) in an attempt io make 9mmt1 and hts1 a little
 	   less "clicky", i.e. disperse the pusle energy away from the point
 	   of onset.  Result was no difference in speech quality, in fact
 	   no difference at all. Could be an implementation error I guess. 
@@ -375,13 +369,14 @@ void phase_synth_zero_order(
 	//b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5);
         //Ex[m].real = cos(ex_phase[0]*m + glottal[b]);
 	//Ex[m].imag = sin(ex_phase[0]*m + glottal[b]);
-	   
+
     }
     else {
-	/* we probably don't need to LPC filter phase in unvoiced case,
-	   maybe test this theory some time */
+	/* When a few samples were tested I found that LPC filter
+	   phase is not needed in the unvoiced case, but no harm in keeping it.
+        */
 	float phi = TWO_PI*(float)rand()/RAND_MAX;
-	Ex[m].real = cos(phi);
+        Ex[m].real = cos(phi);
 	Ex[m].imag = sin(phi);
     }
 
diff --git a/codec2/src/phase.h b/codec2/src/phase.h
index e7c58300..996ed3fc 100644
--- a/codec2/src/phase.h
+++ b/codec2/src/phase.h
@@ -33,7 +33,7 @@
 
 void aks_to_H(MODEL *model, float aks[], float G, COMP H[], int order);
 float phase_model_first_order(float aks[], COMP H[], float *n_min, COMP *min_Am);
-void phase_synth_zero_order(float snr, COMP H[], float *prev_Wo, float *ex_phase);
+void phase_synth_zero_order(float snr, COMP H[], float *ex_phase);
 void phase_synth_first_order(float snr, COMP H[], float n_min, COMP min_Am);
 
 #endif
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c
index b63547e5..3785d916 100644
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -87,12 +87,14 @@ int main(int argc, char *argv[])
   int dump;
   
   int phase, phase_model;
-  float prev_Wo, ex_phase;
+  float ex_phase[1];
 
   int   postfilt;
   float bg_est;
 
-
+  int   hand_snr;
+  FILE *fsnr;
+  
   if (argc < 3) {
     printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
     printf("       [--dump DumpFilePrefix]\n");
@@ -155,8 +157,6 @@ int main(int argc, char *argv[])
 
   lsp = switch_present("--lsp",argc,argv);
   lsp_quantiser = 0;
-  if (lsp) 
-      lsp_quantiser = atoi(argv[lsp+1]);
 
   /* phase_model 0: zero phase
      phase_model 1: 1st order polynomial */
@@ -164,7 +164,13 @@ int main(int argc, char *argv[])
   if (phase) {
       phase_model = atoi(argv[phase+1]);
       assert((phase_model == 0) || (phase_model == 1));
-      ex_phase = 0;
+      ex_phase[0] = 0;
+  }
+
+  hand_snr = switch_present("--hand_snr",argc,argv);
+  if (hand_snr) {
+      fsnr = fopen(argv[hand_snr+1],"rt");
+      assert(fsnr != NULL);
   }
 
   bg_est = 0.0;
@@ -204,11 +210,11 @@ int main(int argc, char *argv[])
     if (phase) {
 	float Wn[M];		        /* windowed speech samples */
 	float Rk[PHASE_LPC_ORD+1];	/* autocorrelation coeffs  */
-	float ak_phase[PHASE_LPC_ORD+1];/* LPCs                    */
+        float ak_phase[PHASE_LPC_ORD+1];/* LPCs                    */
         COMP  H[MAX_AMP];               /* LPC freq domain samples */
 	float n_min;
 	COMP  min_Am;
-	
+  	
 	dump_phase(&model.phi[0]);
 
 	/* Determine LPCs for phase modelling.  Note that we may also
@@ -235,7 +241,9 @@ int main(int argc, char *argv[])
 	    /* just to make sure we are not cheating - kill all phases */
 	    for(i=0; i<MAX_AMP; i++)
 	    	model.phi[i] = 0;
-	    phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
+	    if (hand_snr)
+		fscanf(fsnr,"%f\n",&snr);
+	    phase_synth_zero_order(snr, H, ex_phase);
 	}
 
 	if (phase_model == 1) {
@@ -251,7 +259,7 @@ int main(int argc, char *argv[])
     /* optional LPC model amplitudes */
 
     if (lpc_model) {
-	snr = lpc_model_amplitudes(Sn, &model, order, lsp_quantiser, ak);
+	snr = lpc_model_amplitudes(Sn, &model, order, lsp, ak);
 	sum_snr += snr;
         dump_quantised_model(&model);
     }
@@ -285,6 +293,9 @@ int main(int argc, char *argv[])
   if (dump)
       dump_off();
 
+  if (hand_snr)
+    fclose(fsnr);
+
   return 0;
 }
 
-- 
2.25.1