Removed statics from nlp (thanks Mathieu), and cleaned up some warnings I found when...

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)
diff --git a/codec2/README.txt b/codec2/README.txt

index e3b14143ec14536b56aaaf1e65d58c5671a5bdf5..7cf3a395ffd8f9658f64381a94860f13f819ec43 100644 (file)
--- a/codec2/README.txt
+++ b/codec2/README.txt
@@ -19,7 +19,8 @@ Programs
  --------
  
  1/ c2enc encodes a file of speech sample to a file of encoded bits.
-One bit is stored in the LSB or each byte.
+One bit is stored in the LSB of each byte. Note this is unpacked,
+i.e. 51 bits/frame consumes 51 bytes/frame on disk.
  
  2/ c2dec decodes a file of bits to a file of speech samples.
  
diff --git a/codec2/src/c2dec.c b/codec2/src/c2dec.c

index 1d2cf99442cc698e5d4071ae1e1f77960081e76a..e22757a13d53fc6c4d5dfe317493497bb83adc2d 100644 (file)
--- a/codec2/src/c2dec.c
+++ b/codec2/src/c2dec.c
@@ -7,6 +7,10 @@
    Decodes a file of bits to a file of raw speech samples using codec2. Demo
    program for codec2.
  
+  NOTE: the bit file is not packed, 51 bits/frame actually consumes 51
+  bytes/frame on disk.  If you are using this for a real world
+  application you may want to pack the 51 bytes into 7 bytes.
+
  \*---------------------------------------------------------------------------*/
  
  /*
@@ -39,7 +43,6 @@ int main(int argc, char *argv[])
      FILE *fout;
      short buf[CODEC2_SAMPLES_PER_FRAME];
      char  bits[CODEC2_BITS_PER_FRAME];
-    int   i;
  
      if (argc != 3) {
         printf("usage: %s InputBitFile OutputRawSpeechFile\n", argv[0]);
@@ -62,8 +65,6 @@ int main(int argc, char *argv[])
  
      while(fread(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fin) ==
           CODEC2_BITS_PER_FRAME) {
-       //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
-       //    printf("bit[%d] = %d\n", i, bits[i]);
         codec2_decode(codec2, buf, bits);
         fwrite(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fout);
      }
diff --git a/codec2/src/c2enc.c b/codec2/src/c2enc.c

index a2a9e956df60751168c6091c44210ff8abb6bebd..f1171c2425f0fe3fc9b74125b2321810a8e4850c 100644 (file)
--- a/codec2/src/c2enc.c
+++ b/codec2/src/c2enc.c
@@ -8,6 +8,10 @@
    of bits (each bit is stored in the LSB or each output byte). Demo
    program for codec2.
  
+  NOTE: the bit file is not packed, 51 bits/frame actually consumes 51
+  bytes/frame on disk.  If you are using this for a real world
+  application you may want to pack the 51 bytes into 7 bytes.
+
  \*---------------------------------------------------------------------------*/
  
  /*
@@ -40,7 +44,6 @@ int main(int argc, char *argv[])
      FILE *fout;
      short buf[CODEC2_SAMPLES_PER_FRAME];
      char  bits[CODEC2_BITS_PER_FRAME];
-    int   i;
  
      if (argc != 3) {
         printf("usage: %s InputRawspeechFile OutputBitFile\n", argv[0]);
@@ -64,8 +67,6 @@ int main(int argc, char *argv[])
      while(fread(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fin) ==
           CODEC2_SAMPLES_PER_FRAME) {
         codec2_encode(codec2, bits, buf);
-       //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
-       //    printf("bit[%d] = %d\n", i, bits[i]);
         fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fout);
      }
  
diff --git a/codec2/src/c2sim.c b/codec2/src/c2sim.c

index 4b96c23a9d8699936f265412db05c0c78395758b..08748b912f58d15a8fd7c49aaa9a678c4e2a4bb5 100644 (file)
--- a/codec2/src/c2sim.c
+++ b/codec2/src/c2sim.c
@@ -118,6 +118,8 @@ int main(int argc, char *argv[])
    MODEL prev_model, interp_model;
    int decimate;
  
+  void *nlp_states;
+
    for(i=0; i<M; i++)
        Sn[i] = 1.0;
    for(i=0; i<2*N; i++)
@@ -135,6 +137,8 @@ int main(int argc, char *argv[])
        ex_phase[i] = 0.0;
    }
  
+  nlp_states = nlp_create();
+
    if (argc < 2) {
        printf("\nCodec2 - 2400 bit/s speech codec - Simulation Program\n");
        printf("            http://rowetel.com/codec2.html\n\n");
@@ -225,7 +229,7 @@ int main(int argc, char *argv[])
   
      /* Estimate pitch */
  
-    nlp(Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
+    nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
      prev_Wo = TWO_PI/pitch;
      model.Wo = TWO_PI/pitch;
  
@@ -378,6 +382,8 @@ int main(int argc, char *argv[])
    if (hand_voicing)
      fclose(fvoicing);
  
+  nlp_destroy(nlp_states);
+
    return 0;
  }
  
diff --git a/codec2/src/codec2.c b/codec2/src/codec2.c

index 70c5ed71f776843de93bf3e3a73ecff12b91a55c..f498cea65f4dc425b66cfa86972b91bd1d296b0b 100644 (file)
--- a/codec2/src/codec2.c
+++ b/codec2/src/codec2.c
@@ -54,6 +54,7 @@ typedef struct {
      float  ex_phase;     /* excitation model phase track              */
      float  bg_est;       /* background noise estimate for post filter */
      MODEL  prev_model;   /* model parameters from 20ms ago            */
+    void  *nlp;          /* pitch predictor states                    */
  } CODEC2;
  
  /*---------------------------------------------------------------------------*\
@@ -77,7 +78,11 @@ void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model,float ak[]);
    AUTHOR......: David Rowe                           
    DATE CREATED: 21/8/2010 
  
-  Create and initialise an instance of the codec.
+  Create and initialise an instance of the codec.  Returns a pointer
+  to the codec states or NULL on failure.  One set of states is
+  sufficient for a full duuplex codec (i.e. an encoder and decoder).
+  You don't need separate states for encoders and decoders.  See
+  c2enc.c and c2dec.c for examples.
  
  \*---------------------------------------------------------------------------*/
  
@@ -87,6 +92,8 @@ void *codec2_create()
      int     i,l;
  
      c2 = (CODEC2*)malloc(sizeof(CODEC2));
+    if (c2 == NULL)
+       return NULL;
  
      for(i=0; i<M; i++)
         c2->Sn[i] = 1.0;
@@ -103,6 +110,12 @@ void *codec2_create()
         c2->prev_model.A[l] = 0.0;
      c2->prev_model.Wo = TWO_PI/P_MAX;
  
+    c2->nlp = nlp_create();
+    if (c2->nlp == NULL) {
+       free (c2);
+       return NULL;
+    }
+
      return (void*)c2;
  }
  
@@ -118,7 +131,11 @@ void *codec2_create()
  
  void codec2_destroy(void *codec2_state)
  {
+    CODEC2 *c2;
+    
      assert(codec2_state != NULL);
+    c2 = (CODEC2*)codec2_state;
+    nlp_destroy(c2->nlp);
      free(codec2_state);
  }
  
@@ -306,7 +323,7 @@ void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[])
  
      /* Estimate pitch */
  
-    nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+    nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
      c2->prev_Wo = TWO_PI/pitch;
      model->Wo = TWO_PI/pitch;
      model->L = PI/model->Wo;
diff --git a/codec2/src/nlp.c b/codec2/src/nlp.c

index b4c96189e7ac14c6673d584db28502f7a42e0b70..193ca92109f0e25a8ff1900c464ba4be7b938051 100644 (file)
--- a/codec2/src/nlp.c
+++ b/codec2/src/nlp.c
@@ -27,10 +27,13 @@
  */
  
  #include "defines.h"
-#include "dump.h"
  #include "nlp.h"
+#include "dump.h"
+#include "four1.h"
+
  #include <assert.h>
  #include <math.h>
+#include <stdlib.h>
  
  /*---------------------------------------------------------------------------*\
                                                                               
@@ -47,6 +50,7 @@
  #define T           0.1         /* threshold for local minima candidate */
  #define F0_MAX      500
  #define CNLP        0.3                /* post processor constant              */
+#define NLP_NTAP 48            /* Decimation LPF order */
  
  /*---------------------------------------------------------------------------*\
                                                                              
@@ -107,11 +111,57 @@ float nlp_fir[] = {
    -1.0818124e-03
  };
  
+typedef struct {
+    float sq[PMAX_M];       /* squared speech samples */
+    float mem_x,mem_y;       /* memory for notch filter */
+    float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
+} NLP;
+
  float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax);
  float post_process_sub_multiples(COMP Fw[], 
                                  int pmin, int pmax, float gmax, int gmax_bin,
                                  float *prev_Wo);
-extern int frames;
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  nlp_create()                                                                  
+                                                                             
+  Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void *nlp_create()
+{
+    NLP *nlp;
+    int  i;
+
+    nlp = (NLP*)malloc(sizeof(NLP));
+    if (nlp == NULL)
+       return NULL;
+
+    for(i=0; i<PMAX_M; i++)
+       nlp->sq[i] = 0.0;
+    nlp->mem_x = 0.0;
+    nlp->mem_y = 0.0;
+    for(i=0; i<NLP_NTAP; i++)
+       nlp->mem_fir[i] = 0.0;
+
+    return (void*)nlp;
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  nlp_destory()
+                                                                             
+  Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void nlp_destroy(void *nlp_state)
+{
+    assert(nlp_state != NULL);
+    free(nlp_state);
+}
  
  /*---------------------------------------------------------------------------*\
                                                                               
@@ -144,6 +194,7 @@ extern int frames;
  \*---------------------------------------------------------------------------*/
  
  float nlp(
+  void *nlp_state, 
    float  Sn[],                 /* input speech vector */
    int    n,                    /* frames shift (no. new samples in Sn[]) */
    int    m,                    /* analysis window size */
@@ -154,78 +205,81 @@ float nlp(
    float *prev_Wo
  )
  {
-  static float sq[PMAX_M];     /* squared speech samples */
-  float  notch;                        /* current notch filter output */
-  static float mem_x,mem_y;     /* memory for notch filter */
-  static float mem_fir[NLP_NTAP];/* decimation FIR filter memory */
-  COMP   Fw[PE_FFT_SIZE];      /* DFT of squared signal */
-  float  gmax;
-  int    gmax_bin;
-  int   i,j;
-  float best_f0;
-
-  /* Square, notch filter at DC, and LP filter vector */
-
-  for(i=m-n; i<M; i++)                 /* square latest speech samples */
-    sq[i] = Sn[i]*Sn[i];
-
-  for(i=m-n; i<m; i++) {       /* notch filter at DC */
-    notch = sq[i] - mem_x;
-    notch += COEFF*mem_y;
-    mem_x = sq[i];
-    mem_y = notch;
-    sq[i] = notch;
-  }
-
-  for(i=m-n; i<m; i++) {       /* FIR filter vector */
-
-    for(j=0; j<NLP_NTAP-1; j++)
-      mem_fir[j] = mem_fir[j+1];
-    mem_fir[NLP_NTAP-1] = sq[i];
-
-    sq[i] = 0.0;
-    for(j=0; j<NLP_NTAP; j++)
-      sq[i] += mem_fir[j]*nlp_fir[j];
-  }
-
-  /* Decimate and DFT */
-
-  for(i=0; i<PE_FFT_SIZE; i++) {
-    Fw[i].real = 0.0;
-    Fw[i].imag = 0.0;
-  }
-  for(i=0; i<m/DEC; i++) {
-    Fw[i].real = sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
-  }
-  dump_dec(Fw);
-  four1(&Fw[-1].imag,PE_FFT_SIZE,1);
-  for(i=0; i<PE_FFT_SIZE; i++)
-    Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
-
-  dump_sq(sq);
-  dump_Fw(Fw);
-
-  /* find global peak */
-
-  gmax = 0.0;
-  for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
-    if (Fw[i].real > gmax) {
-      gmax = Fw[i].real;
-      gmax_bin = i;
+    NLP   *nlp;
+    float  notch;                  /* current notch filter output */
+    COMP   Fw[PE_FFT_SIZE];        /* DFT of squared signal */
+    float  gmax;
+    int    gmax_bin;
+    int   i,j;
+    float best_f0;
+
+    assert(nlp_state != NULL);
+    nlp = (NLP*)nlp_state;
+
+    /* Square, notch filter at DC, and LP filter vector */
+
+    for(i=m-n; i<M; i++)           /* square latest speech samples */
+       nlp->sq[i] = Sn[i]*Sn[i];
+
+    for(i=m-n; i<m; i++) {     /* notch filter at DC */
+       notch = nlp->sq[i] - nlp->mem_x;
+       notch += COEFF*nlp->mem_y;
+       nlp->mem_x = nlp->sq[i];
+       nlp->mem_y = notch;
+       nlp->sq[i] = notch;
+    }
+
+    for(i=m-n; i<m; i++) {     /* FIR filter vector */
+
+       for(j=0; j<NLP_NTAP-1; j++)
+           nlp->mem_fir[j] = nlp->mem_fir[j+1];
+       nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
+
+       nlp->sq[i] = 0.0;
+       for(j=0; j<NLP_NTAP; j++)
+           nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
+    }
+
+    /* Decimate and DFT */
+
+    for(i=0; i<PE_FFT_SIZE; i++) {
+       Fw[i].real = 0.0;
+       Fw[i].imag = 0.0;
+    }
+    for(i=0; i<m/DEC; i++) {
+       Fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+    }
+    dump_dec(Fw);
+    four1(&Fw[-1].imag,PE_FFT_SIZE,1);
+    for(i=0; i<PE_FFT_SIZE; i++)
+       Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+
+    dump_sq(nlp->sq);
+    dump_Fw(Fw);
+
+    /* find global peak */
+
+    gmax = 0.0;
+    gmax_bin = PE_FFT_SIZE*DEC/pmax;
+    for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+       if (Fw[i].real > gmax) {
+           gmax = Fw[i].real;
+           gmax_bin = i;
+       }
      }
-  }
  
-  best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
+    best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, 
+                                        prev_Wo);
  
-  /* Shift samples in buffer to make room for new samples */
+    /* Shift samples in buffer to make room for new samples */
  
-  for(i=0; i<m-n; i++)
-    sq[i] = sq[i+n];
+    for(i=0; i<m-n; i++)
+       nlp->sq[i] = nlp->sq[i+n];
  
-  /* return pitch and F0 estimate */
+    /* return pitch and F0 estimate */
  
-  *pitch = (float)SAMPLE_RATE/best_f0;
-  return(best_f0);  
+    *pitch = (float)SAMPLE_RATE/best_f0;
+    return(best_f0);  
  }
  
  /*---------------------------------------------------------------------------*\
@@ -284,6 +338,7 @@ float post_process_sub_multiples(COMP Fw[],
             thresh = CNLP*gmax;
  
         lmax = 0;
+       lmax_bin = bmin;
         for (b=bmin; b<=bmax; b++)              /* look for maximum in interval */
             if (Fw[b].real > lmax) {
                 lmax = Fw[b].real;
diff --git a/codec2/src/nlp.h b/codec2/src/nlp.h

index 8bc7a755541439a738aec8312e2c5c1091539308..eaaae970529d6616cd54646e79c4dd037cbd9b33 100644 (file)
--- a/codec2/src/nlp.h
+++ b/codec2/src/nlp.h
@@ -29,11 +29,10 @@
  #ifndef __NLP__
  #define __NLP__
  
-#include "sine.h"
-
-#define NLP_NTAP 48     /* Decimation LPF order */
-
-float nlp(float Sn[], int n, int m, int pmin, int pmax, float *pitch, COMP Sw[], float *prev_Wo);
+void *nlp_create();
+void nlp_destroy(void *nlp_state);
+float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax, 
+         float *pitch, COMP Sw[], float *prev_Wo);
  float test_candidate_mbe(COMP Sw[], float f0, COMP Sw_[]);
  
  #endif
diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c

index 25fba89ec02bed04b7315b8e5e614be198415ac9..4cc3815f30c7a672931b3b43a008d472fef3e626 100644 (file)
--- a/codec2/src/quantise.c
+++ b/codec2/src/quantise.c
@@ -834,7 +834,6 @@ void encode_amplitudes(int    lsp_indexes[],
      float lsps[LPC_ORD];
      float ak[LPC_ORD+1];
      float e;
-    int   i;
  
      e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD);
      encode_lsps(lsp_indexes, lsps, LPC_ORD);
diff --git a/codec2/src/sine.c b/codec2/src/sine.c

index 5f876340fcb9513c9a50320aa1019b5a44a38fa7..9263151b7b6d85d1e024ed3458408f6ce729546b 100644 (file)
--- a/codec2/src/sine.c
+++ b/codec2/src/sine.c
@@ -272,6 +272,7 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
    /* Initialisation */
    
    model->L = PI/model->Wo;     /* use initial pitch est. for L */
+  Wom = model->Wo;
    Em = 0.0;
    r = TWO_PI/FFT_ENC;
    
diff --git a/codec2/unittest/tnlp.c b/codec2/unittest/tnlp.c

index 0f4020c816673f96a76934ca33dc15143c94ab9d..4abf69c4efa3c3a288be41200e982c4a01037d38 100644 (file)
--- a/codec2/unittest/tnlp.c
+++ b/codec2/unittest/tnlp.c
@@ -39,6 +39,7 @@
  
  #include "defines.h"
  #include "dump.h"
+#include "sine.h"
  #include "nlp.h"
  
  int   frames;
@@ -87,7 +88,8 @@ char *argv[];
      int   i; 
      int   dump;
      float prev_Wo;
-    
+    void  *nlp_states;
+
      if (argc < 3) {
         printf("\nusage: tnlp InputRawSpeechFile OutputPitchTextFile "
                "[--dump DumpFile]\n");
@@ -112,6 +114,7 @@ char *argv[];
      if (dump) 
        dump_on(argv[dump+1]);
  
+    nlp_states = nlp_create();
      make_analysis_window(w,W);
  
      frames = 0;
@@ -128,7 +131,7 @@ char *argv[];
        dft_speech(Sw, Sn, w);
        dump_Sn(Sn); dump_Sw(Sw); 
  
-      nlp(Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw,&prev_Wo);
+      nlp(nlp_states,Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw,&prev_Wo);
        prev_Wo = TWO_PI/pitch;
  
        fprintf(fout,"%f\n",pitch);
@@ -137,6 +140,7 @@ char *argv[];
      fclose(fin);
      fclose(fout);
      if (dump) dump_off();
+    nlp_destroy(nlp_states);
  
      return 0;
  }
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Tue, 31 Aug 2010 02:46:26 +0000 (02:46 +0000)
codec2/README.txt		patch \| blob \| history
codec2/src/c2dec.c		patch \| blob \| history
codec2/src/c2enc.c		patch \| blob \| history
codec2/src/c2sim.c		patch \| blob \| history
codec2/src/codec2.c		patch \| blob \| history
codec2/src/nlp.c		patch \| blob \| history
codec2/src/nlp.h		patch \| blob \| history
codec2/src/quantise.c		patch \| blob \| history
codec2/src/sine.c		patch \| blob \| history
codec2/unittest/tnlp.c		patch \| blob \| history