interp function working with nimor artifacts, about to clean up

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)
diff --git a/codec2/src/Makefile b/codec2/src/Makefile

index f7a49d8650a7d58236e96ebd2e2177d76845f800..03036bc0e46a8bfe7bb95a3989705d0114442cd1 100644 (file)
--- a/codec2/src/Makefile
+++ b/codec2/src/Makefile
@@ -5,7 +5,7 @@ SINENC_OBJ  = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o
  SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \
                quantise.o lpc.o dump.o refine.o ../speex/lsp.o  \
                ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \
-              ../speex/high_lsp_tables.o phase.o postfilter.o
+              ../speex/high_lsp_tables.o phase.o postfilter.o interp.o
  
  all: sinenc sinedec
  
diff --git a/codec2/src/interp.c b/codec2/src/interp.c

new file mode 100644 (file)

index 0000000..6aad899
--- /dev/null
+++ b/codec2/src/interp.c
@@ -0,0 +1,104 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: interp.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 9/10/09
+
+  Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "interp.h"
+#include <string.h>
+
+/*---------------------------------------------------------------------------*\
+
+  interp()
+        
+  Given two frames decribed by model parameters 20ms apart, determines the
+  model parameters of the 10ms frame between them.  Note that phases are
+  not interpolated, they must be set external to this function.
+
+\*---------------------------------------------------------------------------*/
+
+void interp(
+  MODEL *prev,      /* previous frames model params                  */
+  MODEL *next,      /* next frames model params                      */
+  MODEL *synth,     /* interp model params for cont frame            */
+  MODEL *a,         /* prev frame extended into this frame           */
+  MODEL *b,         /* next frame extended into this frame           */
+  int   *transition /* non-zero if this is a transition frame, this
+                      information is used for synthesis             */
+)
+{
+    int m;
+    
+    if (fabs(next->Wo - prev->Wo) < 0.1*next->Wo) {
+
+       /* If the Wo of adjacent frames is within 10% we synthesise a 
+          continuous track through this frame by linear interpolation
+          of the amplitudes and Wo.  This is typical of a strongly 
+          voiced frame.
+       */
+
+       *transition = 0;
+
+       synth->Wo = (next->Wo + prev->Wo)/2.0;
+       if (next->L > prev->L)
+           synth->L = prev->L;
+       else
+           synth->L = next->L;
+       for(m=1; m<=synth->L; m++) {
+           synth->A[m] = (prev->A[m] + next->A[m])/2.0;
+       }
+    }
+    else {
+       /* 
+          transition frame, adjacent frames have different Wo and L
+          so set up two sets of model parameters based on prev and
+          next.  We then synthesise both of them and add them
+          together in the time domain.
+
+          The transition case is typical of unvoiced speech or
+          background noise or a voiced/unvoiced transition.
+       */
+
+       *transition = 1;
+
+       /* a is prev extended forward into this frame, b is next
+          extended backward into this frame.  Note the adjustments to
+          phase to time-shift the model forward or backward N
+          samples. */
+
+       memcpy(a, prev, sizeof(model));
+       memcpy(b, next, sizeof(model));
+       for(m=1; m<=a->L; m++) {
+           a->A[m] /= 2.0;
+           a->phi[m] += a->Wo*m*N;
+       }
+       for(m=1; m<=b->L; m++) {
+           b->A[m] /= 2.0;
+           b->phi[m] -= b->Wo*m*N;
+       }
+    }
+
+}
+
diff --git a/codec2/src/interp.h b/codec2/src/interp.h

new file mode 100644 (file)

index 0000000..39a9175
--- /dev/null
+++ b/codec2/src/interp.h
@@ -0,0 +1,37 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: interp.h
+  AUTHOR......: David Rowe
+  DATE CREATED: 9/10/09
+
+  Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __INTERP__
+#define __INTERP__
+
+#include "sine.h"
+
+void interp(MODEL *prev, MODEL *next, MODEL *synth, MODEL *a, MODEL *b, 
+           int *transition);
+
+#endif
diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh

index 338e0712a00b3850bc182e24ad60bb29bf12c3cb..1a1676c51afda4096d389819a5ea65c3a43d0f8a 100755 (executable)
--- a/codec2/src/listen.sh
+++ b/codec2/src/listen.sh
@@ -4,7 +4,7 @@
  #
  # Run menu with common sample file options, headphone version
  
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3
  
  
  
diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh

index cdf03ca8167cf23e03484b301096622625b2443d..e3af29ae3acd1324f771421443a509ec0b2e1ad5 100755 (executable)
--- a/codec2/src/listen1.sh
+++ b/codec2/src/listen1.sh
@@ -4,6 +4,6 @@
  #
  # Run menu with common sample file options, headphone version
  
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3 -d /dev/dsp1
  
  
diff --git a/codec2/src/phase.c b/codec2/src/phase.c

index 8f6954110329d22834e31c011bab57db53f39077..85105c55afc5a21eb35da376b1232707197b6afe 100644 (file)
--- a/codec2/src/phase.c
+++ b/codec2/src/phase.c
@@ -305,10 +305,11 @@ void phase_synth_zero_order(
    int    voiced
  )
  {
-  int   m;
+  int   m,i;
    float new_phi;
    COMP  Ex[MAX_AMP];           /* excitation samples */
    COMP  A_[MAX_AMP];           /* synthesised harmonic samples */
+  float maxA;
  
    /* 
       Update excitation fundamental phase track, this sets the position
@@ -374,6 +375,26 @@ void phase_synth_zero_order(
      new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
      model.phi[m] = new_phi;
    }
+
+  #ifdef CLICKY
+  /* Adding a random component to low energy harmonic phase seems to
+     improve low pitch speakers.  Adding a small random component to
+     low energy harmonic amplitudes also helps low pitch speakers after
+     LPC modelling (see LPC modelling/amplitude quantisation code).
+  */
+
+  maxA = 0.0;
+  for(i=1; i<=model.L; i++) {
+      if (model.A[i] > maxA) {
+         maxA = model.A[i];
+      }
+  }
+  for(i=1; i<=model.L; i++) {
+      if (model.A[i] < 0.1*maxA) {
+         model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+      }
+  }
+  #endif
  }
  
  /*---------------------------------------------------------------------------*\
diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c

index b9de0a1df25d072ec72e4436291b95a07963f542..1f1ea0945e7d18d4dad8bda09396fa28127b1a90 100644 (file)
--- a/codec2/src/quantise.c
+++ b/codec2/src/quantise.c
@@ -36,7 +36,7 @@
  
  #define MAX_ORDER    20
  #define LSP_DELTA1 0.01         /* grid spacing for LSP root searches */
-#define MAX_CB       10         /* max number of codebooks */
+#define MAX_CB       20         /* max number of codebooks */
  
  /* describes each codebook  */
  
@@ -48,6 +48,7 @@ typedef struct {
  
  /* lsp_q describes entire quantiser made up of several codebooks */
  
+#ifdef OLDER
  /* 10+10+6+6 = 32 bit LSP difference split VQ */
  
  LSP_CB lsp_q[] = {
@@ -57,6 +58,21 @@ LSP_CB lsp_q[] = {
      {2,     64, "../unittest/lspd910.txt"},
      {0,    0, ""}
  };
+#endif
+
+LSP_CB lsp_q[] = {
+    {1,   16, "../unittest/lsp1.txt"},
+    {1,   16, "../unittest/lsp2.txt"},
+    {1,   16, "../unittest/lsp3.txt"},
+    {1,   16, "../unittest/lsp4.txt"},
+    {1,   16, "../unittest/lsp5.txt"},
+    {1,   16, "../unittest/lsp6.txt"},
+    {1,   16, "../unittest/lsp7.txt"},
+    {1,    8, "../unittest/lsp8.txt"},
+    {1,    8, "../unittest/lsp9.txt"},
+    {1,    4, "../unittest/lsp10.txt"},
+    {0,    0, ""}
+};
  
  /* ptr to each codebook */
  
@@ -306,6 +322,7 @@ float lpc_model_amplitudes(
    int   i,j;
    float snr;   
    float lsp[MAX_ORDER];
+  float lsp_hz[MAX_ORDER];
    float lsp_[MAX_ORDER];
    float lspd[MAX_ORDER];
    int   roots;            /* number of LSP roots found */
@@ -315,6 +332,8 @@ float lpc_model_amplitudes(
    float *cb;
    float wt[MAX_ORDER];
  
+  float maxA, dB;
+
    for(i=0; i<M; i++)
      Wn[i] = Sn[i]*w[i];
    autocorrelate(Wn,R,M,order);
@@ -329,6 +348,43 @@ float lpc_model_amplitudes(
      if (roots != order)
         printf("LSP roots not found\n");
  
+    for(i=0; i<order; i++)
+       lsp_hz[i] = (4000.0/PI)*lsp[i];
+    
+    for(i=0; i<10; i++) {
+       k = lsp_q[i].k;
+       m = lsp_q[i].m;
+       cb = plsp_cb[i];
+       index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
+       lsp_hz[i] = cb[index*k];
+    }
+    
+    /*
+    for(i=0; i<order; i++)
+       lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
+    */
+
+    for(i=0; i<order; i++)
+       lsp[i] = (PI/4000.0)*lsp_hz[i];
+
+    for(i=1; i<5; i++) {
+       if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
+           lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
+    }
+
+    /* as quantiser gaps increased, larger BW expansion was required
+       to prevent twinkly noises */
+    for(i=5; i<8; i++) {
+       if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
+           lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
+    }
+    for(i=8; i<order; i++) {
+       if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
+           lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
+    }
+
+    //#define OLD_VQ
+#ifdef OLD_VQ
      lspd[0] = lsp[0];
      for(i=1; i<order; i++)
         lspd[i] = lsp[i] - lsp[i-1];
@@ -366,7 +422,9 @@ float lpc_model_amplitudes(
         i++;
         assert(i < MAX_CB);
      }
-    
+#else
+    l = 0;
+#endif    
      /* used during development: copy remaining LSPs from orig if we haven't
         quantised all of them */
      for(j=l; j<order; j++)
@@ -378,6 +436,27 @@ float lpc_model_amplitudes(
  
    aks_to_M2(ak,order,model,E,&snr);   /* {ak} -> {Am} LPC decode */
  
+  #ifdef CLICKY
+  /* Adding a random component to low energy harmonic phase seems to
+     improve low pitch speakers.  Adding a small random component to
+     low energy harmonic amplitudes also helps low pitch speakers after
+     LPC modelling (see LPC modelling/amplitude quantisation code).
+  */
+
+  maxA = 0.0;
+  for(i=1; i<=model->L; i++) {
+      if (model->A[i] > maxA) {
+         maxA = model->A[i];
+      }
+  }
+  for(i=1; i<=model->L; i++) {
+      if (model->A[i] < 0.1*maxA) {
+         dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+         model->A[i] *= pow(10.0, dB/20.0);
+      }
+  }
+  #endif
+
    return snr;
  }
  
@@ -416,8 +495,7 @@ void aks_to_M2(
  
    for(i=0; i<FFT_DEC; i++) {
      Pw[i].real = 0.0;
-    Pw[i].imag = 0.0;
-  }
+    Pw[i].imag = 0.0;  }
  
    for(i=0; i<=order; i++)
      Pw[i].real = ak[i];
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c

index 815f69845013c247c6d887abdb910a85038be1a9..b826c68634e472a8a1db27daf50bedf675daa94c 100644 (file)
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -35,6 +35,7 @@
  #include "lpc.h"
  #include "synth.h"
  #include "postfilter.h"
+#include "interp.h"
  
  /*---------------------------------------------------------------------------*\
                                                                               
@@ -218,7 +219,7 @@ int main(int argc, char *argv[])
      dump_Sn(Sn);
      dft_speech(); dump_Sw(Sw);   
  
-    //dump_model(&model);
+    dump_model(&model);
  
      /* optional phase modelling - make sure this happens before LPC
         modelling of {Am} as first order model fit doesn't work well
@@ -258,8 +259,10 @@ int main(int argc, char *argv[])
             /* just to make sure we are not cheating - kill all phases */
             for(i=0; i<MAX_AMP; i++)
                 model.phi[i] = 0;
-           if (hand_snr)
+           if (hand_snr) {
                 fscanf(fsnr,"%f\n",&snr);
+               voiced = snr > 2.0;
+           }
             phase_synth_zero_order(voiced, H, ex_phase, voiced);
         }
  
@@ -270,7 +273,7 @@ int main(int argc, char *argv[])
          if (postfilt)
             postfilter(&model, voiced, &bg_est);
  
-        dump_phase_(&model.phi[0]);
+        //dump_phase_(&model.phi[0]);
      }
  
      /* optional LPC model amplitudes */
@@ -281,7 +284,55 @@ int main(int argc, char *argv[])
          dump_quantised_model(&model);
      }
  
- #define DEC
+    //#define MAKE_CLICKY
+#ifdef MAKE_CLICKY
+    {
+       float maxA = 0.0;
+       float dB;
+       int   max_m;
+
+       for(i=1; i<=model.L; i++) {
+           if (model.A[i] > maxA) {
+               maxA = model.A[i];
+               max_m = i;
+           }
+       }
+       for(i=1; i<=model.L; i++) {
+           if (model.A[i] > 0.1*maxA) {
+               model.A[i] = 0.0;
+           }
+       }
+
+    }
+#endif
+       
+
+    //#define REDUCE_CLICKY
+#ifdef REDUCE_CLICKY
+    {
+       float maxA = 0.0;
+       float dB;
+       int   max_m;
+
+       for(i=1; i<=model.L; i++) {
+           if (model.A[i] > maxA) {
+               maxA = model.A[i];
+               max_m = i;
+           }
+       }
+       for(i=1; i<=model.L; i++) {
+           if (model.A[i] < 0.1*maxA) {
+               model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+               dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+               model.A[i] *= pow(10.0, dB/20.0);
+           }
+       }
+
+    }
+#endif
+
+
+    //#define DEC
   #ifdef DEC
     /* Decimate to 20ms frame rate.  In the code we only send
        off frames to the receiver.  To simulate this on odd
@@ -330,48 +381,45 @@ int main(int argc, char *argv[])
         /* even frame so we need to synthesise the model parameters by
            interpolating between adjacent frames */
  
-       model_synth = model_2;
-        voiced_synth = voiced && voiced_2;
         if (fabs(model_1.Wo - model_3.Wo) < 0.1*model_1.Wo) {
             /* If the Wo of adjacent frames is within 10% we synthesise a 
                continuous track through this frame by linear interpolation
                of the amplitudes and Wo.  This is typical of a strongly 
                voiced frame.
             */
+
             transition = 0;
  
-           /* continuous track through this frame */
-           #define T
-           #ifdef T
-               model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
-               if (model_1.L > model_3.L)
-                   model_synth.L = model_3.L;
-               else
-                   model_synth.L = model_1.L;
-           #endif
-               for(i=1; i<=model_synth.L; i++) {
-                   model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
-                   /* cheat on phases for now, these were constructed using
-                      LPC model from actual speech for this frame - fix later */
-                   model_synth.phi[i] = model_2.phi[i];
-               }
-               vf++;
+           model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
+           if (model_1.L > model_3.L)
+               model_synth.L = model_3.L;
+           else
+               model_synth.L = model_1.L;
+           for(i=1; i<=model_synth.L; i++) {
+               model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
+               /* cheat on phases for now, these were constructed using
+                  LPC model from actual speech for this frame - fix later */
+               model_synth.phi[i] = model_2.phi[i];
+           }
         }
         else {
             /* 
                transition frame, adjacent frames have different Wo and
                L so set up two sets of model parameters based on
                previous and next frame.  We then synthesise both of
-              them and add them together in the time domain.  Note
-              the adjustments to phase to shift the timing of the
-              model parameters forward or back N samples.  
+              them and add them together in the time domain.  
  
-              This case is typical of unvoiced speech or background noise
-              of a voiced to unvoiced transition.
+              This case is typical of unvoiced speech or background
+              noise or a voiced/unvoiced transition.
             */
  
             transition = 1;
  
+           /* model_a is the previous frames extended forward into
+              this frame, model_b is the next frame extended backward
+              into this frame.  Note the adjustments to phase to
+              time-shift the model forward or backward N samples. */
+
             memcpy(&model_a, &model_3, sizeof(model));
             memcpy(&model_b, &model_1, sizeof(model));
             for(i=1; i<=model_a.L; i++) {
@@ -392,6 +440,30 @@ int main(int argc, char *argv[])
      model_2 = model_1;
      model_1 = model;
      model = model_synth;
+#endif
+    //dump_quantised_model(&model);
+
+#define INTERP
+#ifdef INTERP
+    if (frames%2) {
+
+       /* odd frames use the original model parameters */
+
+       model_synth = model_2;
+       transition = 0;
+
+    }
+    else {
+       interp(&model_3, &model_1, &model_synth, &model_a, &model_b, &transition);
+       for(i=1; i<=model_synth.L; i++)
+           model_synth.phi[i] = model_2.phi[i];
+    }
+
+    model_3 = model_2;
+    model_2 = model_1;
+    model_1 = model;
+    model = model_synth;
+       
  #endif
  
      /* Synthesise speech */
@@ -420,8 +492,6 @@ int main(int argc, char *argv[])
      }    
    }
  
-  //printf("gmin = %f\n", get_gmin());
-  printf("vf = %d\n", vf);
    if (fout != NULL)
      fclose(fout);
  
diff --git a/codec2/src/synth.c b/codec2/src/synth.c

index 105acbf196583d7154951e5570157f14baab0771..04edd8bc0c0a5d0517e8e8b95226d0de4c2f535b 100644 (file)
--- a/codec2/src/synth.c
+++ b/codec2/src/synth.c
@@ -109,7 +109,8 @@ void synthesise_mixed(
    partial unvoiced sound when using zero phase model was found to be
    due mis-alignment of the LPC analysis window and accidental addition
    of a random phase component.  So we are sticking with synthesise_mixed()
-  above for now.
+  above for now.  I am leaving this function here for the moment as it 
+  might be useful one day.
  
  \*---------------------------------------------------------------------------*/
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Sun, 18 Oct 2009 23:07:54 +0000 (23:07 +0000)
codec2/src/Makefile		patch \| blob \| history
codec2/src/interp.c	[new file with mode: 0644]	patch \| blob
codec2/src/interp.h	[new file with mode: 0644]	patch \| blob
codec2/src/listen.sh		patch \| blob \| history
codec2/src/listen1.sh		patch \| blob \| history
codec2/src/phase.c		patch \| blob \| history
codec2/src/quantise.c		patch \| blob \| history
codec2/src/sinedec.c		patch \| blob \| history
codec2/src/synth.c		patch \| blob \| history