added jitter to zero phase model, helps with mmmt1 and hts1 clicky artifact

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)
diff --git a/codec2/src/code.sh b/codec2/src/code.sh

index 9ab6b0a650d2a313fd81a2f2309705a7ed6d2fd0..67af13c7f4dec65ae3743cbd062cfc41ba13b589 100644 (file)
--- a/codec2/src/code.sh
+++ b/codec2/src/code.sh
@@ -5,8 +5,10 @@
  # Run steps to encode a speech sample
  
  ../unittest/tnlp ../raw/$1.raw ../unittest/$1_nlp.p
-../src/sinenc ../raw/$1.raw %1.mdl 300 ../unittest/$1_nlp.p
-../src/sinedec ../raw/$1.raw %1.mdl -o $1_uq.raw
-../src/sinedec ../raw/$1.raw %1.mdl --phase 0 -o $1_phase0.raw --postfilter
-../src/sinedec ../raw/$1.raw %1.mdl --lpc 10 -o $1_lpc10.raw
+../src/sinenc ../raw/$1.raw $1.mdl 300 ../unittest/$1_nlp.p
+../src/sinedec ../raw/$1.raw $1.mdl -o $1_uq.raw
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 -o $1_phase0.raw --postfilter
+../src/sinedec ../raw/$1.raw $1.mdl --lpc 10 -o $1_lpc10.raw
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 -o $1_phase0_lpc10.raw --postfilter
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 --lsp 37 -o $1_lsp.raw --postfilter
  
diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh

index 670b191dcd9a78db5a347515952899870fd3ba55..3d9e08d3e1040b878a3fbfa7ff470e50ca1cf90f 100755 (executable)
--- a/codec2/src/listen.sh
+++ b/codec2/src/listen.sh
@@ -4,6 +4,7 @@
  #
  # Run menu with common sample file options, headphone version
  
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3
+
  
  
diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh

index c609b18900a0aee01d6b5bff127a40418af35131..82e65668ccf493b5e1732aca27d29c6f51c74c7c 100755 (executable)
--- a/codec2/src/listen1.sh
+++ b/codec2/src/listen1.sh
@@ -4,6 +4,6 @@
  #
  # Run menu with common sample file options, headphone version
  
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw ../raw/$1_speex_8k.raw -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1
  
  
diff --git a/codec2/src/phase.c b/codec2/src/phase.c

index e13e99c613b4a0a2ee02152e67250e2032fe990c..2b59f7f709ddbcc787f9ef75922b15e373641922 100644 (file)
--- a/codec2/src/phase.c
+++ b/codec2/src/phase.c
@@ -329,10 +329,19 @@ void phase_synth_zero_order(
      if (Lrand > model.L) Lrand = model.L;
    }
    
-  /* update excitation fundamental phase track */
+  /* update excitation fundamental phase track, this sets
+     the position of each pitch pulse during voiced speech */
  
    ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0;
    ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+
+  /* After much experimentation I found that a few percent of jitter
+     was effective in reducing "clicky" artifact in hts1 and mmt1. The
+     peaks level of the synthesised speech was reduced to levels closer
+     to the orginal speech as well.*/
+
+  ex_phase[0] += 0.05*TWO_PI*(0.5 - (float)rand()/RAND_MAX);
+  
    *prev_Wo = model.Wo;
  
    /* now modify this frames phase using zero phase model */
@@ -342,12 +351,17 @@ void phase_synth_zero_order(
      /* generate excitation */
  
      if (m <= Lrand) {
-        Ex[m].real = cos(ex_phase[0]*m);
+       Ex[m].real = cos(ex_phase[0]*m);
         Ex[m].imag = sin(ex_phase[0]*m);
  
-       /* following is an experiment in dispersing pulse energy over time,
-          didn't really change sound at all, e.g. mmt1 still sounded
-          "clicky"*/
+       /* following is an experiment in dispersing pulse energy over
+          time, didn't really change sound at all, e.g. mmt1 still
+          sounded "clicky.  I think this is because this provides
+          just a small phase shift between adjacent harmonics.
+          However for voiced speech it is the high energy harmonics
+          that form pitch pulses, so we need a relatively high phase
+          shift between them to disperse pulse energy */
+
          //Ex[m].real = cos(ex_phase[0]*m + model.Wo*m*m*0.3);
         //Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3);
  
@@ -355,7 +369,9 @@ void phase_synth_zero_order(
            (see octave/glottal.m) in an attempt io make mmt1 and hts1 a little
            less "clicky", i.e. disperse the pusle energy away from the point
            of onset.  Result was no difference in speech quality, in fact
-          no difference at all. Could be an implementation error I guess. */
+          no difference at all. Could be an implementation error I guess. 
+          One again - this model doesnt change phases much between adjacent
+          harmonics, so not much dispersion. */
         //b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5);
          //Ex[m].real = cos(ex_phase[0]*m + glottal[b]);
         //Ex[m].imag = sin(ex_phase[0]*m + glottal[b]);
diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c

index d855412fab118def1dd66fe8aae40f985b824e33..700013c32a1777ac9cdc8042cb3bf55c3c198016 100644 (file)
--- a/codec2/src/quantise.c
+++ b/codec2/src/quantise.c
@@ -37,7 +37,7 @@
  #define MAX_ORDER 20
  
  #define LPC_FLOOR 0.0002        /* autocorrelation floor */
-#define LSP_DELTA1 0.2          /* grid spacing for LSP root searches */
+#define LSP_DELTA1 0.05         /* grid spacing for LSP root searches */
  
  /* Speex lag window */
  
@@ -267,8 +267,9 @@ float lpc_model_amplitudes(
        E += ak[i]*R[i];
    
    if (lsp_quantisation) {
-    roots = lpc_to_lsp(&ak[1], order, lsp, 10, LSP_DELTA1, NULL);
-
+    roots = lpc_to_lsp(&ak[1], order, lsp, 5, LSP_DELTA1, NULL);
+    if (roots != order)
+       printf("LSP roots not found\n");
      index = quantise(cb12, &lsp[0], LSP_12_K, LSP_12_M, &se);
      lsp[0] = cb12[index*LSP_12_K+0];
      lsp[1] = cb12[index*LSP_12_K+1];
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c

index d25bcf78b6fdf9f3a3b28f7b32a93ff60d925ebd..3dab7a594effd6f3be3d11908cd09fd4871ac06a 100644 (file)
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -83,7 +83,7 @@ int main(int argc, char *argv[])
    int lpc_model, order;
    int lsp, lsp_quantiser;
    float ak[LPC_MAX_ORDER+1];
-
+  
    int dump;
    
    int phase, phase_model;
@@ -92,6 +92,7 @@ int main(int argc, char *argv[])
    int   postfilt;
    float bg_est;
  
+
    if (argc < 3) {
      printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
      printf("       [--dump DumpFilePrefix]\n");
@@ -163,6 +164,7 @@ int main(int argc, char *argv[])
    if (phase) {
        phase_model = atoi(argv[phase+1]);
        assert((phase_model == 0) || (phase_model == 1));
+      ex_phase = 0;
    }
  
    bg_est = 0.0;
@@ -232,8 +234,8 @@ int main(int argc, char *argv[])
         dump_snr(snr);
         if (phase_model == 0) {
             /* just to make sure we are not cheating - kill all phases */
-           //for(i=0; i<MAX_AMP; i++)
-           //  model.phi[i] = 0;
+           for(i=0; i<MAX_AMP; i++)
+               model.phi[i] = 0;
             phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
         }
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Thu, 24 Sep 2009 02:48:43 +0000 (02:48 +0000)
codec2/src/code.sh		patch \| blob \| history
codec2/src/listen.sh		patch \| blob \| history
codec2/src/listen1.sh		patch \| blob \| history
codec2/src/phase.c		patch \| blob \| history
codec2/src/quantise.c		patch \| blob \| history
codec2/src/sinedec.c		patch \| blob \| history