original phases working with 8000 Hz and 16000 Hz, sounds OK. Can see better ways...

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)
diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c

index 35f8f60923836b0e6877b767f8fd79eee3e080ac..af8eaecf9e68c42c230549efa6a7fa22663083ad 100644 (file)
--- a/codec2-dev/src/c2sim.c
+++ b/codec2-dev/src/c2sim.c
@@ -64,34 +64,10 @@ void print_help(const struct option *long_options, int num_opts, char* argv[]);
  
  int main(int argc, char *argv[])
  {
-    C2CONST c2const = c2const_create(8000);
-    int   n_samp = c2const.n_samp;
-    int   m_pitch = c2const.m_pitch;
  
-    FILE *fout = NULL; /* output speech file                    */
-    FILE *fin;         /* input speech file                     */
-    short buf[N_SAMP]; /* input/output buffer                   */
-    float buf_float[N_SAMP];
-    float Sn[m_pitch]; /* float input speech samples            */
-    float Sn_pre[m_pitch];     /* pre-emphasised input speech samples   */
-    COMP  Sw[FFT_ENC]; /* DFT of Sn[]                           */
-    codec2_fft_cfg  fft_fwd_cfg;
-    codec2_fftr_cfg  fftr_fwd_cfg;
-    codec2_fftr_cfg  fftr_inv_cfg;
-    float w[m_pitch];          /* time domain hamming window            */
-    COMP  W[FFT_ENC];  /* DFT of w[]                            */
-    MODEL model;
-    float Pn[2*N_SAMP];        /* trapezoidal synthesis window          */
-    float Sn_[2*N_SAMP];       /* synthesised speech */
-    int   i,m;         /* loop variable                         */
-    int   frames;
-    float prev_Wo, prev__Wo, prev_uq_Wo;
-    float pitch;
-    char  out_file[MAX_STR];
-    char  ampexp_arg[MAX_STR];
-    char  phaseexp_arg[MAX_STR];
-    float snr;
-    float sum_snr;
+    int Fs = 8000;
+    int set_fs;
+
      int orderi;
      int lpc_model = 0, order = LPC_ORD;
      int lsp = 0, lspd = 0, lspvq = 0;
@@ -101,63 +77,39 @@ int main(int argc, char *argv[])
      int lspanssi = 0,
      #endif
      int prede = 0;
-    float pre_mem = 0.0, de_mem = 0.0;
-    float ak[order];
-    // COMP  Sw_[FFT_ENC];
-    // COMP  Ew[FFT_ENC];
-
-    int phase0 = 0;
-    float ex_phase[MAX_AMP+1];
-
      int   postfilt;
-    float bg_est = 0.0;
-
      int   hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0, lspmelread = 0;
      int   lpcpf = 0;
      FILE *fvoicing = 0;
      FILE *flspmel = 0;
-
-    MODEL prev_model;
      int dec;
      int decimate = 1;
-    float lsps[order];
-    float e, prev_e;
-    int   lsp_indexes[order];
-    float lsps_[order];
-    float Woe_[2];
-
-    float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
-    MODEL model_dec[4], prev_model_dec;
-    float prev_lsps_dec[order], prev_e_dec;
-
-    void *nlp_states;
-    float hpf_states[2];
+    int   amread, Woread;
+    int   awread;
+    int   hmread;
+    int   phase0 = 0;
      int   scalar_quant_Wo_e = 0;
      int   scalar_quant_Wo_e_low = 0;
      int   vector_quant_Wo_e = 0;
      int   dump_pitch_e = 0;
-    FILE *fjvm = NULL;
-    #ifdef DUMP
-    int   dump;
-    #endif
-    #if 0
-    struct PEXP *pexp = NULL;
-    struct AEXP *aexp = NULL;
-    #endif
      float gain = 1.0;
      int   bpf_en = 0;
      int   bpfb_en = 0;
-    float bpf_buf[BPF_N+N_SAMP];
-    float lspmelvq_mse = 0.0;
-    int   amread, Woread;
      FILE *fam = NULL, *fWo = NULL;
-    int   awread;
      FILE *faw = NULL;
-    int   hmread;
      FILE *fhm = NULL;
+    FILE *fjvm = NULL;
+    #ifdef DUMP
+    int   dump;
+    #endif
+    char  ampexp_arg[MAX_STR];
+    char  phaseexp_arg[MAX_STR];
+    char  out_file[MAX_STR];
+    FILE *fout = NULL; /* output speech file */
  
      char* opt_string = "ho:";
      struct option long_options[] = {
+        { "Fs", required_argument, &set_fs, 1 },
          { "lpc", required_argument, &lpc_model, 1 },
          { "lspjnd", no_argument, &lspjnd, 1 },
          { "lspmel", no_argument, &lspmel, 1 },
@@ -200,36 +152,6 @@ int main(int argc, char *argv[])
          { NULL, no_argument, NULL, 0 }
      };
      int num_opts=sizeof(long_options)/sizeof(struct option);
-    COMP Aw[FFT_ENC];
-    COMP H[MAX_AMP];
-
-    for(i=0; i<m_pitch; i++) {
-       Sn[i] = 1.0;
-       Sn_pre[i] = 1.0;
-    }
-    for(i=0; i<2*N_SAMP; i++)
-       Sn_[i] = 0;
-
-    prev_uq_Wo = prev_Wo = prev__Wo = c2const.Wo_min;
-
-    prev_model.Wo = c2const.Wo_max;
-    prev_model.L = floor(PI/prev_model.Wo);
-    for(i=1; i<=prev_model.L; i++) {
-       prev_model.A[i] = 0.0;
-       prev_model.phi[i] = 0.0;
-    }
-    for(i=1; i<=MAX_AMP; i++) {
-       //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
-       ex_phase[i] = 0.0;
-    }
-    e = prev_e = 1;
-    hpf_states[0] = hpf_states[1] = 0.0;
-
-    nlp_states = nlp_create(m_pitch);
-
-    if (argc < 2) {
-        print_help(long_options, num_opts, argv);
-    }
  
      /*----------------------------------------------------------------*\
  
@@ -245,7 +167,13 @@ int main(int argc, char *argv[])
              break;
          switch (opt) {
           case 0:
-            if(strcmp(long_options[option_index].name, "lpc") == 0) {
+            if(strcmp(long_options[option_index].name, "Fs") == 0) {
+                Fs= atoi(optarg);
+                if((Fs != 8000) && (Fs != 16000)) {
+                    fprintf(stderr, "Error Fs must be 8000 or 16000\n");
+                    exit(1);
+                }
+            } else if(strcmp(long_options[option_index].name, "lpc") == 0) {
                  orderi = atoi(optarg);
                  if((orderi < 4) || (orderi > order)) {
                      fprintf(stderr, "Error in LPC order (4 to %d): %s\n", order, optarg);
@@ -396,6 +324,7 @@ int main(int argc, char *argv[])
  
      /* Input file */
  
+    FILE *fin;         /* input speech file                     */
      if (strcmp(argv[optind], "-")  == 0) fin = stdin;
      else if ((fin = fopen(argv[optind],"rb")) == NULL) {
         fprintf(stderr, "Error opening input speech file: %s: %s.\n",
@@ -403,6 +332,93 @@ int main(int argc, char *argv[])
         exit(1);
      }
  
+    C2CONST c2const = c2const_create(Fs);
+    int   n_samp = c2const.n_samp;
+    int   m_pitch = c2const.m_pitch;
+
+    short buf[N_SAMP]; /* input/output buffer                   */
+    float buf_float[N_SAMP];
+    float Sn[m_pitch]; /* float input speech samples            */
+    float Sn_pre[m_pitch];     /* pre-emphasised input speech samples   */
+    COMP  Sw[FFT_ENC]; /* DFT of Sn[]                           */
+    codec2_fft_cfg  fft_fwd_cfg;
+    codec2_fftr_cfg  fftr_fwd_cfg;
+    codec2_fftr_cfg  fftr_inv_cfg;
+    float w[m_pitch];          /* time domain hamming window            */
+    COMP  W[FFT_ENC];  /* DFT of w[]                            */
+    MODEL model;
+    float Pn[2*N_SAMP];        /* trapezoidal synthesis window          */
+    float Sn_[2*N_SAMP];       /* synthesised speech */
+    int   i,m;         /* loop variable                         */
+    int   frames;
+    float prev_f0;
+    float pitch;
+    float snr;
+    float sum_snr;
+
+    float pre_mem = 0.0, de_mem = 0.0;
+    float ak[order];
+    // COMP  Sw_[FFT_ENC];
+    // COMP  Ew[FFT_ENC];
+
+    float ex_phase[MAX_AMP+1];
+
+    float bg_est = 0.0;
+
+
+    MODEL prev_model;
+    float lsps[order];
+    float e, prev_e;
+    int   lsp_indexes[order];
+    float lsps_[order];
+    float Woe_[2];
+
+    float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
+    MODEL model_dec[4], prev_model_dec;
+    float prev_lsps_dec[order], prev_e_dec;
+
+    void *nlp_states;
+    float hpf_states[2];
+    #if 0
+    struct PEXP *pexp = NULL;
+    struct AEXP *aexp = NULL;
+    #endif
+    float bpf_buf[BPF_N+N_SAMP];
+    float lspmelvq_mse = 0.0;
+
+    COMP Aw[FFT_ENC];
+    COMP H[MAX_AMP];
+
+ 
+    for(i=0; i<m_pitch; i++) {
+       Sn[i] = 1.0;
+       Sn_pre[i] = 1.0;
+    }
+    for(i=0; i<2*N_SAMP; i++)
+       Sn_[i] = 0;
+
+    prev_f0 = 1/P_MAX_S;
+
+    prev_model.Wo = c2const.Wo_max;
+    prev_model.L = floor(PI/prev_model.Wo);
+    for(i=1; i<=prev_model.L; i++) {
+       prev_model.A[i] = 0.0;
+       prev_model.phi[i] = 0.0;
+    }
+    for(i=1; i<=MAX_AMP; i++) {
+       //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
+       ex_phase[i] = 0.0;
+    }
+    e = prev_e = 1;
+    hpf_states[0] = hpf_states[1] = 0.0;
+
+    nlp_states = nlp_create(&c2const);
+
+    if (argc < 2) {
+        print_help(long_options, num_opts, argv);
+    }
+
+
      ex_phase[0] = 0;
      Woe_[0] = Woe_[1] = 1.0;
  
@@ -500,7 +516,7 @@ int main(int argc, char *argv[])
  
         \*------------------------------------------------------------*/
  
-        nlp(nlp_states,Sn,N_SAMP,c2const.p_min,c2const.p_max,&pitch,Sw,W,&prev_uq_Wo);
+        nlp(nlp_states, Sn, N_SAMP, &pitch, Sw, W, &prev_f0);
         model.Wo = TWO_PI/pitch;
  
          dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w);
diff --git a/codec2-dev/src/codec2.c b/codec2-dev/src/codec2.c

index 482a66ffc9c36deccf58d97ce5d45d712ef5ff42..8de9ec7beaeaaba8bff140377a29a6381fe4b021 100644 (file)
--- a/codec2-dev/src/codec2.c
+++ b/codec2-dev/src/codec2.c
@@ -148,7 +148,7 @@ struct CODEC2 * codec2_create(int mode)
      make_synthesis_window(&c2->c2const, c2->Pn);
      c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
      quantise_init();
-    c2->prev_Wo_enc = 0.0;
+    c2->prev_f0_enc = 1/P_MAX_S;
      c2->bg_est = 0.0;
      c2->ex_phase = 0.0;
  
@@ -163,7 +163,7 @@ struct CODEC2 * codec2_create(int mode)
      }
      c2->prev_e_dec = 1;
  
-    c2->nlp = nlp_create(m_pitch);
+    c2->nlp = nlp_create(&c2->c2const);
      if (c2->nlp == NULL) {
         return NULL;
      }
@@ -2103,7 +2103,7 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
  
      /* Estimate pitch */
  
-    nlp(c2->nlp,c2->Sn,n_samp,c2->c2const.p_min,c2->c2const.p_max,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+    nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc);
      PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, "    nlp");
  
      model->Wo = TWO_PI/pitch;
@@ -2116,7 +2116,6 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
      estimate_amplitudes(model, Sw, c2->W, 0);
      PROFILE_SAMPLE_AND_LOG(estamps, two_stage, "    est_amps");
      est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
-    c2->prev_Wo_enc = model->Wo;
      PROFILE_SAMPLE_AND_LOG2(estamps, "    est_voicing");
      #ifdef DUMP
      dump_model(model);
diff --git a/codec2-dev/src/codec2_internal.h b/codec2-dev/src/codec2_internal.h

index 151158738818e5d553f0ca01e7e036b69a33dd09..48fcb9945510431e2ca3bf22aed3e3ffc965b67f 100644 (file)
--- a/codec2-dev/src/codec2_internal.h
+++ b/codec2-dev/src/codec2_internal.h
@@ -53,7 +53,7 @@ struct CODEC2 {
      float        *Sn_;                    /* [2*n_samp] synthesised output speech      */
      float         ex_phase;                /* excitation model phase track              */
      float         bg_est;                  /* background noise estimate for post filter */
-    float         prev_Wo_enc;             /* previous frame's pitch estimate           */
+    float         prev_f0_enc;             /* previous frame's f0    estimate           */
      MODEL         prev_model_dec;          /* previous frame's model parameters         */
      float         prev_lsps_dec[LPC_ORD];  /* previous frame's LSPs                     */
      float         prev_e_dec;              /* previous frame's LPC energy               */
diff --git a/codec2-dev/src/defines.h b/codec2-dev/src/defines.h

index 968c0bbec03d16fdcce549f344516189259edf8c..ec94ed8e38a6e34eb44d0dd5c940582d9158cea1 100644 (file)
--- a/codec2-dev/src/defines.h
+++ b/codec2-dev/src/defines.h
@@ -43,7 +43,7 @@
  #define PI         3.141592654 /* mathematical constant                */
  #endif
  #define TWO_PI     6.283185307 /* mathematical constant                */
-#define MAX_STR    256          /* maximum string size                  */
+#define MAX_STR    2048         /* maximum string size                  */
  
  #define FFT_ENC    512         /* size of FFT used for encoder         */
  #define FFT_DEC    512         /* size of FFT used in decoder          */
diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c

index 36037b432d75ece4a977d5ff410abd4f0ab7be05..8c8d5f1c71226a5b1d0418667fec90d94b489bd8 100644 (file)
--- a/codec2-dev/src/nlp.c
+++ b/codec2-dev/src/nlp.c
@@ -31,6 +31,7 @@
  #include "codec2_fft.h"
  #undef PROFILE
  #include "machdep.h"
+#include "os.h"
  
  #include <assert.h>
  #include <math.h>
@@ -42,7 +43,7 @@
  
  \*---------------------------------------------------------------------------*/
  
-#define PMAX_M      600                /* maximum NLP analysis window size     */
+#define PMAX_M      320                /* maximum NLP analysis window size     */
  #define COEFF       0.95       /* notch filter parameter               */
  #define PE_FFT_SIZE 512                /* DFT size for pitch estimation        */
  #define DEC         5          /* decimation factor                    */
@@ -54,7 +55,11 @@
  #define NLP_NTAP 48            /* Decimation LPF order */
  #undef  POST_PROCESS_MBE        /* choose post processor                */
  
-//#undef DUMP
+/* 8 to 16 kHz sample rate conversion */
+
+#define FDMDV_OS                 2                            /* oversampling rate                   */
+#define FDMDV_OS_TAPS_16K       48                            /* number of OS filter taps at 16kHz   */
+#define FDMDV_OS_TAPS_8K        (FDMDV_OS_TAPS_16K/FDMDV_OS)  /* number of OS filter taps at 8kHz    */
  
  /*---------------------------------------------------------------------------*\
  
@@ -116,12 +121,15 @@ const float nlp_fir[] = {
  };
  
  typedef struct {
+    int           Fs;                /* sample rate in Hz            */
      int           m;
      float         w[PMAX_M/DEC];     /* DFT window                   */
      float         sq[PMAX_M];       /* squared speech samples       */
      float         mem_x,mem_y;       /* memory for notch filter      */
      float         mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
-    codec2_fft_cfg  fft_cfg;           /* kiss FFT config              */
+    codec2_fft_cfg  fft_cfg;         /* kiss FFT config              */
+    float        *Sn16k;            /* Fs=16kHz input speech vector */
+    FILE         *f;
  } NLP;
  
  #ifdef POST_PROCESS_MBE
@@ -130,7 +138,8 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM
  #endif
  float post_process_sub_multiples(COMP Fw[],
                                  int pmin, int pmax, float gmax, int gmax_bin,
-                                float *prev_Wo);
+                                float *prev_f0);
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
  
  /*---------------------------------------------------------------------------*\
  
@@ -140,20 +149,41 @@ float post_process_sub_multiples(COMP Fw[],
  
  \*---------------------------------------------------------------------------*/
  
-void *nlp_create(
-int    m                       /* analysis window size */
-)
+void *nlp_create(C2CONST *c2const)
  {
      NLP *nlp;
      int  i;
-
-    assert(m <= PMAX_M);
+    int  m = c2const->m_pitch;
+    int  Fs = c2const->Fs;
  
      nlp = (NLP*)malloc(sizeof(NLP));
      if (nlp == NULL)
         return NULL;
  
+    assert((Fs == 8000) || (Fs == 16000));
+    nlp->Fs = Fs;
+
      nlp->m = m;
+
+    /* if running at 16kHz allocate storage for decimating filter memory */
+
+    if (Fs == 16000) {
+        nlp->Sn16k = (float*)malloc(sizeof(float)*(FDMDV_OS_TAPS_16K + c2const->n_samp));
+        for(i=0; i<FDMDV_OS_TAPS_16K; i++) {
+           nlp->Sn16k[i] = 0.0;
+        }
+        if (nlp->Sn16k == NULL) {
+            free(nlp);
+            return NULL;
+        }
+
+        /* most processing occurs at 8 kHz sample rate so halve m */
+
+        m /= 2;
+    }
+
+    assert(m <= PMAX_M);
+    
      for(i=0; i<m/DEC; i++) {
         nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
      }
@@ -186,6 +216,9 @@ void nlp_destroy(void *nlp_state)
      nlp = (NLP*)nlp_state;
  
      codec2_fft_free(nlp->fft_cfg);
+    if (nlp->Fs == 16000) {
+        free(nlp->Sn16k);
+    }
      free(nlp_state);
  }
  
@@ -215,28 +248,26 @@ void nlp_destroy(void *nlp_state)
  
    References:
  
-    [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 4
+    [1] http://rowetel.com/downloads/1997_rowe_phd_thesis.pdf Chapter 4
  
  \*---------------------------------------------------------------------------*/
  
  float nlp(
    void *nlp_state,
-  float  Sn[],                 /* input speech vector */
-  int    n,                    /* frames shift (no. new samples in Sn[]) */
-  int    pmin,                  /* minimum pitch value */
-  int    pmax,                 /* maximum pitch value */
-  float *pitch,                        /* estimated pitch period in samples */
-  COMP   Sw[],                  /* Freq domain version of Sn[] */
-  COMP   W[],                   /* Freq domain window */
-  float *prev_Wo
+  float  Sn[],                 /* input speech vector                                */
+  int    n,                    /* frames shift (no. new samples in Sn[])             */
+  float *pitch,                        /* estimated pitch period in samples at current Fs    */
+  COMP   Sw[],                  /* Freq domain version of Sn[]                        */
+  COMP   W[],                   /* Freq domain window                                 */
+  float *prev_f0                /* previous pitch f0 in Hz, memory for pitch tracking */
  )
  {
      NLP   *nlp;
-    float  notch;                  /* current notch filter output    */
+    float  notch;                  /* current notch filter output          */
      COMP   Fw[PE_FFT_SIZE];        /* DFT of squared signal (input/output) */
      float  gmax;
      int    gmax_bin;
-    int    m, i,j;
+    int    m, i, j;
      float  best_f0;
      PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
  
@@ -244,12 +275,43 @@ float nlp(
      nlp = (NLP*)nlp_state;
      m = nlp->m;
  
-    PROFILE_SAMPLE(start);
-
      /* Square, notch filter at DC, and LP filter vector */
  
-    for(i=m-n; i<m; i++)           /* square latest speech samples */
-       nlp->sq[i] = Sn[i]*Sn[i];
+    /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
+       Fs = 8kHz. The decimating filter introduces about 3ms of delay,
+       that shouldn't be a problem as pitch changes slowly. */
+
+    if (nlp->Fs == 8000) {
+        /* Square latest input samples */
+
+        for(i=m-n; i<m; i++) {
+         nlp->sq[i] = Sn[i]*Sn[i];
+        }
+    }
+    else {
+        assert(nlp->Fs == 16000);
+
+        /* re-sample at 8 KHz */
+
+        for(i=0; i<n; i++) {
+            nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
+        }
+
+        m /= 2; n /= 2;
+
+        float Sn8k[n];
+        fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
+
+        /* Square latest input samples */
+
+        for(i=m-n, j=0; i<m; i++, j++) {
+           nlp->sq[i] = Sn8k[j]*Sn8k[j];
+        }
+        assert(j <= n);
+    }
+    //fprintf(stderr, "n: %d m: %d\n", n, m);
+
+    PROFILE_SAMPLE(start);
  
      for(i=m-n; i<m; i++) {     /* notch filter at DC */
         notch = nlp->sq[i] - nlp->mem_x;
@@ -309,6 +371,11 @@ float nlp(
      dump_Fw(Fw);
      #endif
  
+    /* todo: express everything in f0, as pitch in samples is dep on Fs */
+
+    int pmin = floor(SAMPLE_RATE*P_MIN_S);
+    int pmax = floor(SAMPLE_RATE*P_MAX_S);
+
      /* find global peak */
  
      gmax = 0.0;
@@ -323,9 +390,9 @@ float nlp(
      PROFILE_SAMPLE_AND_LOG(peakpick, magsq, "      peak pick");
  
      #ifdef POST_PROCESS_MBE
-    best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo);
+    best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0);
      #else
-    best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
+    best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
      #endif
  
      PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick,  "      post process");
@@ -335,14 +402,16 @@ float nlp(
      for(i=0; i<m-n; i++)
         nlp->sq[i] = nlp->sq[i+n];
  
-    /* return pitch and F0 estimate */
+    /* return pitch period in samples and F0 estimate */
  
-    *pitch = (float)SAMPLE_RATE/best_f0;
+    *pitch = (float)nlp->Fs/best_f0;
  
      PROFILE_SAMPLE_AND_LOG2(shiftmem,  "      shift mem");
  
      PROFILE_SAMPLE_AND_LOG2(start,  "      nlp int");
  
+    *prev_f0 = best_f0;
+
      return(best_f0);
  }
  
@@ -369,7 +438,7 @@ float nlp(
  
  float post_process_sub_multiples(COMP Fw[],
                                  int pmin, int pmax, float gmax, int gmax_bin,
-                                float *prev_Wo)
+                                float *prev_f0)
  {
      int   min_bin, cmax_bin;
      int   mult;
@@ -383,7 +452,7 @@ float post_process_sub_multiples(COMP Fw[],
      mult = 2;
      min_bin = PE_FFT_SIZE*DEC/pmax;
      cmax_bin = gmax_bin;
-    prev_f0_bin = *prev_Wo*(4000.0/PI)*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
+    prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
  
      while(gmax_bin/mult >= min_bin) {
  
@@ -593,3 +662,41 @@ float test_candidate_mbe(
  }
  
  #endif
+
+/*---------------------------------------------------------------------------*\
+
+  FUNCTION....: fdmdv_16_to_8()
+  AUTHOR......: David Rowe
+  DATE CREATED: 9 May 2012
+
+  Changes the sample rate of a signal from 16 to 8 kHz.
+
+  n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n
+  samples at the 48 kHz rate.  As above however a memory of
+  FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example).
+
+  Low pass filter the 16 kHz signal at 4 kHz using the same filter as
+  the upsampler, then just output every FDMDV_OS-th filtered sample.
+
+  Note: this function copied from fdmdv.c, included in nlp.c as a convenience
+  to avoid linking with another source file.
+
+\*---------------------------------------------------------------------------*/
+
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n)
+{
+    float acc;
+    int   i,j,k;
+
+    for(i=0, k=0; k<n; i+=FDMDV_OS, k++) {
+       acc = 0.0;
+       for(j=0; j<FDMDV_OS_TAPS_16K; j++)
+           acc += fdmdv_os_filter[j]*in16k[i-j];
+        out8k[k] = acc;
+    }
+
+    /* update filter memory */
+
+    for(i=-FDMDV_OS_TAPS_16K; i<0; i++)
+       in16k[i] = in16k[i + n*FDMDV_OS];
+}
diff --git a/codec2-dev/src/nlp.h b/codec2-dev/src/nlp.h

index 6e03236c008a1bf1e6e416927e7e781902179976..28a59c3ba78df2fced54a5c32d6f2189fa266d5d 100644 (file)
--- a/codec2-dev/src/nlp.h
+++ b/codec2-dev/src/nlp.h
@@ -30,9 +30,9 @@
  
  #include "comp.h"
  
-void *nlp_create(int m);
+void *nlp_create(C2CONST *c2const);
  void nlp_destroy(void *nlp_state);
-float nlp(void *nlp_state, float Sn[], int n, int pmin, int pmax,
-         float *pitch, COMP Sw[], COMP W[], float *prev_Wo);
+float nlp(void *nlp_state, float Sn[], int n, 
+         float *pitch_samples, COMP Sw[], COMP W[], float *prev_f0);
  
  #endif
diff --git a/codec2-dev/src/os.h b/codec2-dev/src/os.h

index ee2502862b9b38fa117513a8d6b6cca5cdc83abb..14b47135106b4e7d090ad32cad6ef43f6e9498c2 100644 (file)
--- a/codec2-dev/src/os.h
+++ b/codec2-dev/src/os.h
@@ -1,6 +1,6 @@
  /* Generate using fir1(47,1/2) in Octave */
  
-const float fdmdv_os_filter[]= {
+static const float fdmdv_os_filter[]= {
      -0.0008215855034550382,
      -0.0007833023901802921,
       0.001075563790768233,
diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c

index f22b8995d943db8b9792b440c1d56c99c64a2f66..9ec134fa57273e107d3479fb4671164586022a13 100644 (file)
--- a/codec2-dev/src/sine.c
+++ b/codec2-dev/src/sine.c
@@ -80,6 +80,7 @@ C2CONST c2const_create(int Fs) {
      fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
      fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
      fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
+    fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
  
      return c2const;
  }
diff --git a/codec2-dev/unittest/tnlp.c b/codec2-dev/unittest/tnlp.c

index b75b369b1f0e4cae85522cb3b5ff95c6e8b2d785..f5ea4b6ef70894f62a3b0f178d00c7473d40b294 100644 (file)
--- a/codec2-dev/unittest/tnlp.c
+++ b/codec2-dev/unittest/tnlp.c
@@ -25,11 +25,6 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
  
-#define N 80           /* frame size */
-#define M 320          /* pitch analysis window size */
-#define PITCH_MIN 20
-#define PITCH_MAX 160
-#define TNLP
  
  #include <stdlib.h>
  #include <stdio.h>
@@ -46,11 +41,11 @@ int   frames;
  
  /*---------------------------------------------------------------------------*\
  
- switch_present()
+   switch_present()
  
- Searches the command line arguments for a "switch".  If the switch is
- found, returns the command line argument where it ws found, else returns
- NULL.
+   Searches the command line arguments for a "switch".  If the switch is
+   found, returns the command line argument where it ws found, else returns
+   NULL.
  
  \*---------------------------------------------------------------------------*/
  
@@ -74,30 +69,36 @@ int switch_present(sw,argc,argv)
  
  \*---------------------------------------------------------------------------*/
  
-int main(argc,argv)
-int argc;
-char *argv[];
+int main(int argc, char *argv[])
  {
+    if (argc < 3) {
+       printf("\nusage: tnlp InputRawSpeechFile Outputf0PitchTextFile "
+              "[--dump DumpFile] [--Fs SampleRateHz]\n");
+        exit(1);
+    }
+    
+    int Fs = 8000;
+    if (switch_present("--Fs",argc,argv)) {
+        Fs = atoi(argv[argc+1]);
+    }
+
+    C2CONST c2const = c2const_create(Fs);
+    int   n = c2const.n_samp;
+    int   m = c2const.m_pitch;
      FILE *fin,*fout;
-    short buf[N];
-    float Sn[M];               /* float input speech samples */
+    short buf[n];
+    float Sn[m];               /* float input speech samples */
      kiss_fft_cfg  fft_fwd_cfg;
      COMP  Sw[FFT_ENC];         /* DFT of Sn[] */
-    float w[M];                        /* time domain hamming window */
+    float w[m];                        /* time domain hamming window */
      COMP  W[FFT_ENC];          /* DFT of w[] */
-    float pitch;
+    float pitch_samples;
      int   i;
-    float prev_Wo;
+    float f0, prev_f0;
      void  *nlp_states;
-#ifdef DUMP
+    #ifdef DUMP
      int   dump;
-#endif
-
-    if (argc < 3) {
-       printf("\nusage: tnlp InputRawSpeechFile OutputPitchTextFile "
-              "[--dump DumpFile]\n");
-        exit(1);
-    }
+    #endif
  
      /* Input file */
  
@@ -113,46 +114,48 @@ char *argv[];
        exit(1);
      }
  
-#ifdef DUMP
+    #ifdef DUMP
      dump = switch_present("--dump",argc,argv);
      if (dump)
        dump_on(argv[dump+1]);
-#else
-/// TODO
-/// #warning "Compile with -DDUMP if you expect to dump anything."
-#endif
+    #else
+    /// TODO
+    /// #warning "Compile with -DDUMP if you expect to dump anything."
+    #endif
  
-    nlp_states = nlp_create(M);
+    for(i=0; i<m; i++) {
+      Sn[i] = 0.0;
+    }
+
+    nlp_states = nlp_create(&c2const);
      fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL);
-    make_analysis_window(fft_fwd_cfg, w, W);
+    make_analysis_window(&c2const, fft_fwd_cfg, w, W);
  
      frames = 0;
-    prev_Wo = 0;
-    while(fread(buf,sizeof(short),N,fin)) {
-      printf("%d\n", frames++);
-
+    prev_f0 = 1/P_MAX_S;
+    while(fread(buf, sizeof(short), n, fin)) {
        /* Update input speech buffers */
  
-      for(i=0; i<M-N; i++)
-        Sn[i] = Sn[i+N];
-      for(i=0; i<N; i++)
-        Sn[i+M-N] = buf[i];
-      dft_speech(fft_fwd_cfg, Sw, Sn, w);
-#ifdef DUMP
-      dump_Sn(Sn); dump_Sw(Sw);
-#endif
+      for(i=0; i<m-n; i++)
+        Sn[i] = Sn[i+n];
+      for(i=0; i<n; i++)
+        Sn[i+m-n] = buf[i];
+      dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w);
+      #ifdef DUMP
+      dump_Sn(m, Sn); dump_Sw(Sw);
+      #endif
  
-      nlp(nlp_states,Sn,N,PITCH_MIN,PITCH_MAX,&pitch,Sw,W, &prev_Wo);
-      prev_Wo = TWO_PI/pitch;
+      f0 = nlp(nlp_states, Sn, n, &pitch_samples, Sw, W, &prev_f0);
  
-      fprintf(fout,"%f\n",pitch);
+      fprintf(stderr,"%d %f %f\n", frames++, f0, pitch_samples);
+      fprintf(fout,"%f %f\n", f0, pitch_samples);
      }
  
      fclose(fin);
      fclose(fout);
-#ifdef DUMP
+    #ifdef DUMP
      if (dump) dump_off();
-#endif
+    #endif
      nlp_destroy(nlp_states);
  
      return 0;
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Sun, 4 Jun 2017 07:18:43 +0000 (07:18 +0000)
codec2-dev/src/c2sim.c		patch \| blob \| history
codec2-dev/src/codec2.c		patch \| blob \| history
codec2-dev/src/codec2_internal.h		patch \| blob \| history
codec2-dev/src/defines.h		patch \| blob \| history
codec2-dev/src/nlp.c		patch \| blob \| history
codec2-dev/src/nlp.h		patch \| blob \| history
codec2-dev/src/os.h		patch \| blob \| history
codec2-dev/src/sine.c		patch \| blob \| history
codec2-dev/unittest/tnlp.c		patch \| blob \| history