From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Sun, 18 Oct 2009 23:07:54 +0000 (+0000)
Subject: interp function working with nimor artifacts, about to clean up
X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=16f791e6d5c97dad096f725b81e43aa0b6c9bbf6;p=freetel-svn-tracking.git

interp function working with nimor artifacts, about to clean up

git-svn-id: https://svn.code.sf.net/p/freetel/code@76 01035d8c-6547-0410-b346-abe4f91aad63
---

diff --git a/codec2/src/Makefile b/codec2/src/Makefile
index f7a49d86..03036bc0 100644
--- a/codec2/src/Makefile
+++ b/codec2/src/Makefile
@@ -5,7 +5,7 @@ SINENC_OBJ  = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o
 SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \
               quantise.o lpc.o dump.o refine.o ../speex/lsp.o  \
               ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \
-              ../speex/high_lsp_tables.o phase.o postfilter.o
+              ../speex/high_lsp_tables.o phase.o postfilter.o interp.o
 
 all: sinenc sinedec
 
diff --git a/codec2/src/interp.c b/codec2/src/interp.c
new file mode 100644
index 00000000..6aad899c
--- /dev/null
+++ b/codec2/src/interp.c
@@ -0,0 +1,104 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: interp.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 9/10/09
+
+  Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "interp.h"
+#include <string.h>
+
+/*---------------------------------------------------------------------------*\
+
+  interp()
+        
+  Given two frames decribed by model parameters 20ms apart, determines the
+  model parameters of the 10ms frame between them.  Note that phases are
+  not interpolated, they must be set external to this function.
+
+\*---------------------------------------------------------------------------*/
+
+void interp(
+  MODEL *prev,      /* previous frames model params                  */
+  MODEL *next,      /* next frames model params                      */
+  MODEL *synth,     /* interp model params for cont frame            */
+  MODEL *a,         /* prev frame extended into this frame           */
+  MODEL *b,         /* next frame extended into this frame           */
+  int   *transition /* non-zero if this is a transition frame, this
+		       information is used for synthesis             */
+)
+{
+    int m;
+    
+    if (fabs(next->Wo - prev->Wo) < 0.1*next->Wo) {
+
+	/* If the Wo of adjacent frames is within 10% we synthesise a 
+	   continuous track through this frame by linear interpolation
+	   of the amplitudes and Wo.  This is typical of a strongly 
+	   voiced frame.
+	*/
+
+	*transition = 0;
+
+	synth->Wo = (next->Wo + prev->Wo)/2.0;
+	if (next->L > prev->L)
+	    synth->L = prev->L;
+	else
+	    synth->L = next->L;
+	for(m=1; m<=synth->L; m++) {
+	    synth->A[m] = (prev->A[m] + next->A[m])/2.0;
+	}
+    }
+    else {
+	/* 
+	   transition frame, adjacent frames have different Wo and L
+	   so set up two sets of model parameters based on prev and
+	   next.  We then synthesise both of them and add them
+	   together in the time domain.
+
+	   The transition case is typical of unvoiced speech or
+	   background noise or a voiced/unvoiced transition.
+	*/
+
+	*transition = 1;
+
+	/* a is prev extended forward into this frame, b is next
+	   extended backward into this frame.  Note the adjustments to
+	   phase to time-shift the model forward or backward N
+	   samples. */
+
+	memcpy(a, prev, sizeof(model));
+	memcpy(b, next, sizeof(model));
+	for(m=1; m<=a->L; m++) {
+	    a->A[m] /= 2.0;
+	    a->phi[m] += a->Wo*m*N;
+	}
+	for(m=1; m<=b->L; m++) {
+	    b->A[m] /= 2.0;
+	    b->phi[m] -= b->Wo*m*N;
+	}
+    }
+
+}
+
diff --git a/codec2/src/interp.h b/codec2/src/interp.h
new file mode 100644
index 00000000..39a9175c
--- /dev/null
+++ b/codec2/src/interp.h
@@ -0,0 +1,37 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: interp.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 9/10/09
+
+  Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __INTERP__
+#define __INTERP__
+
+#include "sine.h"
+
+void interp(MODEL *prev, MODEL *next, MODEL *synth, MODEL *a, MODEL *b, 
+	    int *transition);
+
+#endif
diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh
index 338e0712..1a1676c5 100755
--- a/codec2/src/listen.sh
+++ b/codec2/src/listen.sh
@@ -4,7 +4,7 @@
 #
 # Run menu with common sample file options, headphone version
 
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3
 
 
 
diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh
index cdf03ca8..e3af29ae 100755
--- a/codec2/src/listen1.sh
+++ b/codec2/src/listen1.sh
@@ -4,6 +4,6 @@
 #
 # Run menu with common sample file options, headphone version
 
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3 -d /dev/dsp1
 
 
diff --git a/codec2/src/phase.c b/codec2/src/phase.c
index 8f695411..85105c55 100644
--- a/codec2/src/phase.c
+++ b/codec2/src/phase.c
@@ -305,10 +305,11 @@ void phase_synth_zero_order(
   int    voiced
 )
 {
-  int   m;
+  int   m,i;
   float new_phi;
   COMP  Ex[MAX_AMP];		/* excitation samples */
   COMP  A_[MAX_AMP];		/* synthesised harmonic samples */
+  float maxA;
 
   /* 
      Update excitation fundamental phase track, this sets the position
@@ -374,6 +375,26 @@ void phase_synth_zero_order(
     new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
     model.phi[m] = new_phi;
   }
+
+  #ifdef CLICKY
+  /* Adding a random component to low energy harmonic phase seems to
+     improve low pitch speakers.  Adding a small random component to
+     low energy harmonic amplitudes also helps low pitch speakers after
+     LPC modelling (see LPC modelling/amplitude quantisation code).
+  */
+
+  maxA = 0.0;
+  for(i=1; i<=model.L; i++) {
+      if (model.A[i] > maxA) {
+	  maxA = model.A[i];
+      }
+  }
+  for(i=1; i<=model.L; i++) {
+      if (model.A[i] < 0.1*maxA) {
+	  model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+      }
+  }
+  #endif
 }
 
 /*---------------------------------------------------------------------------*\
diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c
index b9de0a1d..1f1ea094 100644
--- a/codec2/src/quantise.c
+++ b/codec2/src/quantise.c
@@ -36,7 +36,7 @@
 
 #define MAX_ORDER    20
 #define LSP_DELTA1 0.01         /* grid spacing for LSP root searches */
-#define MAX_CB       10         /* max number of codebooks */
+#define MAX_CB       20         /* max number of codebooks */
 
 /* describes each codebook  */
 
@@ -48,6 +48,7 @@ typedef struct {
 
 /* lsp_q describes entire quantiser made up of several codebooks */
 
+#ifdef OLDER
 /* 10+10+6+6 = 32 bit LSP difference split VQ */
 
 LSP_CB lsp_q[] = {
@@ -57,6 +58,21 @@ LSP_CB lsp_q[] = {
     {2,     64, "../unittest/lspd910.txt"},
     {0,    0, ""}
 };
+#endif
+
+LSP_CB lsp_q[] = {
+    {1,   16, "../unittest/lsp1.txt"},
+    {1,   16, "../unittest/lsp2.txt"},
+    {1,   16, "../unittest/lsp3.txt"},
+    {1,   16, "../unittest/lsp4.txt"},
+    {1,   16, "../unittest/lsp5.txt"},
+    {1,   16, "../unittest/lsp6.txt"},
+    {1,   16, "../unittest/lsp7.txt"},
+    {1,    8, "../unittest/lsp8.txt"},
+    {1,    8, "../unittest/lsp9.txt"},
+    {1,    4, "../unittest/lsp10.txt"},
+    {0,    0, ""}
+};
 
 /* ptr to each codebook */
 
@@ -306,6 +322,7 @@ float lpc_model_amplitudes(
   int   i,j;
   float snr;	
   float lsp[MAX_ORDER];
+  float lsp_hz[MAX_ORDER];
   float lsp_[MAX_ORDER];
   float lspd[MAX_ORDER];
   int   roots;            /* number of LSP roots found */
@@ -315,6 +332,8 @@ float lpc_model_amplitudes(
   float *cb;
   float wt[MAX_ORDER];
 
+  float maxA, dB;
+
   for(i=0; i<M; i++)
     Wn[i] = Sn[i]*w[i];
   autocorrelate(Wn,R,M,order);
@@ -329,6 +348,43 @@ float lpc_model_amplitudes(
     if (roots != order)
 	printf("LSP roots not found\n");
 
+    for(i=0; i<order; i++)
+	lsp_hz[i] = (4000.0/PI)*lsp[i];
+    
+    for(i=0; i<10; i++) {
+	k = lsp_q[i].k;
+	m = lsp_q[i].m;
+	cb = plsp_cb[i];
+	index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
+	lsp_hz[i] = cb[index*k];
+    }
+    
+    /*
+    for(i=0; i<order; i++)
+	lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
+    */
+
+    for(i=0; i<order; i++)
+	lsp[i] = (PI/4000.0)*lsp_hz[i];
+
+    for(i=1; i<5; i++) {
+	if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
+	    lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
+    }
+
+    /* as quantiser gaps increased, larger BW expansion was required
+       to prevent twinkly noises */
+    for(i=5; i<8; i++) {
+	if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
+	    lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
+    }
+    for(i=8; i<order; i++) {
+	if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
+	    lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
+    }
+
+    //#define OLD_VQ
+#ifdef OLD_VQ
     lspd[0] = lsp[0];
     for(i=1; i<order; i++)
 	lspd[i] = lsp[i] - lsp[i-1];
@@ -366,7 +422,9 @@ float lpc_model_amplitudes(
 	i++;
 	assert(i < MAX_CB);
     }
-    
+#else
+    l = 0;
+#endif    
     /* used during development: copy remaining LSPs from orig if we haven't
        quantised all of them */
     for(j=l; j<order; j++)
@@ -378,6 +436,27 @@ float lpc_model_amplitudes(
 
   aks_to_M2(ak,order,model,E,&snr);   /* {ak} -> {Am} LPC decode */
 
+  #ifdef CLICKY
+  /* Adding a random component to low energy harmonic phase seems to
+     improve low pitch speakers.  Adding a small random component to
+     low energy harmonic amplitudes also helps low pitch speakers after
+     LPC modelling (see LPC modelling/amplitude quantisation code).
+  */
+
+  maxA = 0.0;
+  for(i=1; i<=model->L; i++) {
+      if (model->A[i] > maxA) {
+	  maxA = model->A[i];
+      }
+  }
+  for(i=1; i<=model->L; i++) {
+      if (model->A[i] < 0.1*maxA) {
+	  dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+	  model->A[i] *= pow(10.0, dB/20.0);
+      }
+  }
+  #endif
+
   return snr;
 }
 
@@ -416,8 +495,7 @@ void aks_to_M2(
 
   for(i=0; i<FFT_DEC; i++) {
     Pw[i].real = 0.0;
-    Pw[i].imag = 0.0;
-  }
+    Pw[i].imag = 0.0;  }
 
   for(i=0; i<=order; i++)
     Pw[i].real = ak[i];
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c
index 815f6984..b826c686 100644
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -35,6 +35,7 @@
 #include "lpc.h"
 #include "synth.h"
 #include "postfilter.h"
+#include "interp.h"
 
 /*---------------------------------------------------------------------------*\
                                                                              
@@ -218,7 +219,7 @@ int main(int argc, char *argv[])
     dump_Sn(Sn);
     dft_speech(); dump_Sw(Sw);   
 
-    //dump_model(&model);
+    dump_model(&model);
 
     /* optional phase modelling - make sure this happens before LPC
        modelling of {Am} as first order model fit doesn't work well
@@ -258,8 +259,10 @@ int main(int argc, char *argv[])
 	    /* just to make sure we are not cheating - kill all phases */
 	    for(i=0; i<MAX_AMP; i++)
 	    	model.phi[i] = 0;
-	    if (hand_snr)
+	    if (hand_snr) {
 		fscanf(fsnr,"%f\n",&snr);
+		voiced = snr > 2.0;
+	    }
 	    phase_synth_zero_order(voiced, H, ex_phase, voiced);
 	}
 
@@ -270,7 +273,7 @@ int main(int argc, char *argv[])
         if (postfilt)
 	    postfilter(&model, voiced, &bg_est);
 
-        dump_phase_(&model.phi[0]);
+        //dump_phase_(&model.phi[0]);
     }
 
     /* optional LPC model amplitudes */
@@ -281,7 +284,55 @@ int main(int argc, char *argv[])
         dump_quantised_model(&model);
     }
 
- #define DEC
+    //#define MAKE_CLICKY
+#ifdef MAKE_CLICKY
+    {
+	float maxA = 0.0;
+	float dB;
+	int   max_m;
+
+	for(i=1; i<=model.L; i++) {
+	    if (model.A[i] > maxA) {
+		maxA = model.A[i];
+		max_m = i;
+	    }
+	}
+	for(i=1; i<=model.L; i++) {
+	    if (model.A[i] > 0.1*maxA) {
+		model.A[i] = 0.0;
+	    }
+	}
+
+    }
+#endif
+	
+
+    //#define REDUCE_CLICKY
+#ifdef REDUCE_CLICKY
+    {
+	float maxA = 0.0;
+	float dB;
+	int   max_m;
+
+	for(i=1; i<=model.L; i++) {
+	    if (model.A[i] > maxA) {
+		maxA = model.A[i];
+		max_m = i;
+	    }
+	}
+	for(i=1; i<=model.L; i++) {
+	    if (model.A[i] < 0.1*maxA) {
+		model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+		dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+		model.A[i] *= pow(10.0, dB/20.0);
+	    }
+	}
+
+    }
+#endif
+
+
+    //#define DEC
  #ifdef DEC
    /* Decimate to 20ms frame rate.  In the code we only send
       off frames to the receiver.  To simulate this on odd
@@ -330,48 +381,45 @@ int main(int argc, char *argv[])
 	/* even frame so we need to synthesise the model parameters by
 	   interpolating between adjacent frames */
 
-	model_synth = model_2;
-        voiced_synth = voiced && voiced_2;
 	if (fabs(model_1.Wo - model_3.Wo) < 0.1*model_1.Wo) {
 	    /* If the Wo of adjacent frames is within 10% we synthesise a 
 	       continuous track through this frame by linear interpolation
 	       of the amplitudes and Wo.  This is typical of a strongly 
 	       voiced frame.
 	    */
+
 	    transition = 0;
 
-	    /* continuous track through this frame */
-	    #define T
-	    #ifdef T
-		model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
-		if (model_1.L > model_3.L)
-		    model_synth.L = model_3.L;
-		else
-		    model_synth.L = model_1.L;
-	    #endif
-		for(i=1; i<=model_synth.L; i++) {
-		    model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
-		    /* cheat on phases for now, these were constructed using
-		       LPC model from actual speech for this frame - fix later */
-		    model_synth.phi[i] = model_2.phi[i];
-		}
-		vf++;
+	    model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
+	    if (model_1.L > model_3.L)
+		model_synth.L = model_3.L;
+	    else
+		model_synth.L = model_1.L;
+	    for(i=1; i<=model_synth.L; i++) {
+		model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
+		/* cheat on phases for now, these were constructed using
+		   LPC model from actual speech for this frame - fix later */
+		model_synth.phi[i] = model_2.phi[i];
+	    }
 	}
 	else {
 	    /* 
 	       transition frame, adjacent frames have different Wo and
 	       L so set up two sets of model parameters based on
 	       previous and next frame.  We then synthesise both of
-	       them and add them together in the time domain.  Note
-	       the adjustments to phase to shift the timing of the
-	       model parameters forward or back N samples.  
+	       them and add them together in the time domain.  
 
-	       This case is typical of unvoiced speech or background noise
-	       of a voiced to unvoiced transition.
+	       This case is typical of unvoiced speech or background
+	       noise or a voiced/unvoiced transition.
 	    */
 
 	    transition = 1;
 
+	    /* model_a is the previous frames extended forward into
+	       this frame, model_b is the next frame extended backward
+	       into this frame.  Note the adjustments to phase to
+	       time-shift the model forward or backward N samples. */
+
 	    memcpy(&model_a, &model_3, sizeof(model));
 	    memcpy(&model_b, &model_1, sizeof(model));
 	    for(i=1; i<=model_a.L; i++) {
@@ -392,6 +440,30 @@ int main(int argc, char *argv[])
     model_2 = model_1;
     model_1 = model;
     model = model_synth;
+#endif
+    //dump_quantised_model(&model);
+
+#define INTERP
+#ifdef INTERP
+    if (frames%2) {
+
+	/* odd frames use the original model parameters */
+
+	model_synth = model_2;
+	transition = 0;
+
+    }
+    else {
+	interp(&model_3, &model_1, &model_synth, &model_a, &model_b, &transition);
+	for(i=1; i<=model_synth.L; i++)
+	    model_synth.phi[i] = model_2.phi[i];
+    }
+
+    model_3 = model_2;
+    model_2 = model_1;
+    model_1 = model;
+    model = model_synth;
+	
 #endif
 
     /* Synthesise speech */
@@ -420,8 +492,6 @@ int main(int argc, char *argv[])
     }    
   }
 
-  //printf("gmin = %f\n", get_gmin());
-  printf("vf = %d\n", vf);
   if (fout != NULL)
     fclose(fout);
 
diff --git a/codec2/src/synth.c b/codec2/src/synth.c
index 105acbf1..04edd8bc 100644
--- a/codec2/src/synth.c
+++ b/codec2/src/synth.c
@@ -109,7 +109,8 @@ void synthesise_mixed(
   partial unvoiced sound when using zero phase model was found to be
   due mis-alignment of the LPC analysis window and accidental addition
   of a random phase component.  So we are sticking with synthesise_mixed()
-  above for now.
+  above for now.  I am leaving this function here for the moment as it 
+  might be useful one day.
 
 \*---------------------------------------------------------------------------*/