--------
1/ c2enc encodes a file of speech sample to a file of encoded bits.
-One bit is stored in the LSB or each byte.
+One bit is stored in the LSB of each byte. Note this is unpacked,
+i.e. 51 bits/frame consumes 51 bytes/frame on disk.
2/ c2dec decodes a file of bits to a file of speech samples.
Decodes a file of bits to a file of raw speech samples using codec2. Demo
program for codec2.
+ NOTE: the bit file is not packed, 51 bits/frame actually consumes 51
+ bytes/frame on disk. If you are using this for a real world
+ application you may want to pack the 51 bytes into 7 bytes.
+
\*---------------------------------------------------------------------------*/
/*
FILE *fout;
short buf[CODEC2_SAMPLES_PER_FRAME];
char bits[CODEC2_BITS_PER_FRAME];
- int i;
if (argc != 3) {
printf("usage: %s InputBitFile OutputRawSpeechFile\n", argv[0]);
while(fread(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fin) ==
CODEC2_BITS_PER_FRAME) {
- //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
- // printf("bit[%d] = %d\n", i, bits[i]);
codec2_decode(codec2, buf, bits);
fwrite(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fout);
}
of bits (each bit is stored in the LSB or each output byte). Demo
program for codec2.
+ NOTE: the bit file is not packed, 51 bits/frame actually consumes 51
+ bytes/frame on disk. If you are using this for a real world
+ application you may want to pack the 51 bytes into 7 bytes.
+
\*---------------------------------------------------------------------------*/
/*
FILE *fout;
short buf[CODEC2_SAMPLES_PER_FRAME];
char bits[CODEC2_BITS_PER_FRAME];
- int i;
if (argc != 3) {
printf("usage: %s InputRawspeechFile OutputBitFile\n", argv[0]);
while(fread(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fin) ==
CODEC2_SAMPLES_PER_FRAME) {
codec2_encode(codec2, bits, buf);
- //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
- // printf("bit[%d] = %d\n", i, bits[i]);
fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fout);
}
MODEL prev_model, interp_model;
int decimate;
+ void *nlp_states;
+
for(i=0; i<M; i++)
Sn[i] = 1.0;
for(i=0; i<2*N; i++)
ex_phase[i] = 0.0;
}
+ nlp_states = nlp_create();
+
if (argc < 2) {
printf("\nCodec2 - 2400 bit/s speech codec - Simulation Program\n");
printf(" http://rowetel.com/codec2.html\n\n");
/* Estimate pitch */
- nlp(Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
+ nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
prev_Wo = TWO_PI/pitch;
model.Wo = TWO_PI/pitch;
if (hand_voicing)
fclose(fvoicing);
+ nlp_destroy(nlp_states);
+
return 0;
}
float ex_phase; /* excitation model phase track */
float bg_est; /* background noise estimate for post filter */
MODEL prev_model; /* model parameters from 20ms ago */
+ void *nlp; /* pitch predictor states */
} CODEC2;
/*---------------------------------------------------------------------------*\
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
- Create and initialise an instance of the codec.
+ Create and initialise an instance of the codec. Returns a pointer
+ to the codec states or NULL on failure. One set of states is
+ sufficient for a full duuplex codec (i.e. an encoder and decoder).
+ You don't need separate states for encoders and decoders. See
+ c2enc.c and c2dec.c for examples.
\*---------------------------------------------------------------------------*/
int i,l;
c2 = (CODEC2*)malloc(sizeof(CODEC2));
+ if (c2 == NULL)
+ return NULL;
for(i=0; i<M; i++)
c2->Sn[i] = 1.0;
c2->prev_model.A[l] = 0.0;
c2->prev_model.Wo = TWO_PI/P_MAX;
+ c2->nlp = nlp_create();
+ if (c2->nlp == NULL) {
+ free (c2);
+ return NULL;
+ }
+
return (void*)c2;
}
void codec2_destroy(void *codec2_state)
{
+ CODEC2 *c2;
+
assert(codec2_state != NULL);
+ c2 = (CODEC2*)codec2_state;
+ nlp_destroy(c2->nlp);
free(codec2_state);
}
/* Estimate pitch */
- nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
c2->prev_Wo = TWO_PI/pitch;
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
*/
#include "defines.h"
-#include "dump.h"
#include "nlp.h"
+#include "dump.h"
+#include "four1.h"
+
#include <assert.h>
#include <math.h>
+#include <stdlib.h>
/*---------------------------------------------------------------------------*\
#define T 0.1 /* threshold for local minima candidate */
#define F0_MAX 500
#define CNLP 0.3 /* post processor constant */
+#define NLP_NTAP 48 /* Decimation LPF order */
/*---------------------------------------------------------------------------*\
-1.0818124e-03
};
+typedef struct {
+ float sq[PMAX_M]; /* squared speech samples */
+ float mem_x,mem_y; /* memory for notch filter */
+ float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
+} NLP;
+
float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax);
float post_process_sub_multiples(COMP Fw[],
int pmin, int pmax, float gmax, int gmax_bin,
float *prev_Wo);
-extern int frames;
+
+/*---------------------------------------------------------------------------*\
+
+ nlp_create()
+
+ Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void *nlp_create()
+{
+ NLP *nlp;
+ int i;
+
+ nlp = (NLP*)malloc(sizeof(NLP));
+ if (nlp == NULL)
+ return NULL;
+
+ for(i=0; i<PMAX_M; i++)
+ nlp->sq[i] = 0.0;
+ nlp->mem_x = 0.0;
+ nlp->mem_y = 0.0;
+ for(i=0; i<NLP_NTAP; i++)
+ nlp->mem_fir[i] = 0.0;
+
+ return (void*)nlp;
+}
+
+/*---------------------------------------------------------------------------*\
+
+ nlp_destory()
+
+ Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void nlp_destroy(void *nlp_state)
+{
+ assert(nlp_state != NULL);
+ free(nlp_state);
+}
/*---------------------------------------------------------------------------*\
\*---------------------------------------------------------------------------*/
float nlp(
+ void *nlp_state,
float Sn[], /* input speech vector */
int n, /* frames shift (no. new samples in Sn[]) */
int m, /* analysis window size */
float *prev_Wo
)
{
- static float sq[PMAX_M]; /* squared speech samples */
- float notch; /* current notch filter output */
- static float mem_x,mem_y; /* memory for notch filter */
- static float mem_fir[NLP_NTAP];/* decimation FIR filter memory */
- COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */
- float gmax;
- int gmax_bin;
- int i,j;
- float best_f0;
-
- /* Square, notch filter at DC, and LP filter vector */
-
- for(i=m-n; i<M; i++) /* square latest speech samples */
- sq[i] = Sn[i]*Sn[i];
-
- for(i=m-n; i<m; i++) { /* notch filter at DC */
- notch = sq[i] - mem_x;
- notch += COEFF*mem_y;
- mem_x = sq[i];
- mem_y = notch;
- sq[i] = notch;
- }
-
- for(i=m-n; i<m; i++) { /* FIR filter vector */
-
- for(j=0; j<NLP_NTAP-1; j++)
- mem_fir[j] = mem_fir[j+1];
- mem_fir[NLP_NTAP-1] = sq[i];
-
- sq[i] = 0.0;
- for(j=0; j<NLP_NTAP; j++)
- sq[i] += mem_fir[j]*nlp_fir[j];
- }
-
- /* Decimate and DFT */
-
- for(i=0; i<PE_FFT_SIZE; i++) {
- Fw[i].real = 0.0;
- Fw[i].imag = 0.0;
- }
- for(i=0; i<m/DEC; i++) {
- Fw[i].real = sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
- }
- dump_dec(Fw);
- four1(&Fw[-1].imag,PE_FFT_SIZE,1);
- for(i=0; i<PE_FFT_SIZE; i++)
- Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
-
- dump_sq(sq);
- dump_Fw(Fw);
-
- /* find global peak */
-
- gmax = 0.0;
- for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
- if (Fw[i].real > gmax) {
- gmax = Fw[i].real;
- gmax_bin = i;
+ NLP *nlp;
+ float notch; /* current notch filter output */
+ COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */
+ float gmax;
+ int gmax_bin;
+ int i,j;
+ float best_f0;
+
+ assert(nlp_state != NULL);
+ nlp = (NLP*)nlp_state;
+
+ /* Square, notch filter at DC, and LP filter vector */
+
+ for(i=m-n; i<M; i++) /* square latest speech samples */
+ nlp->sq[i] = Sn[i]*Sn[i];
+
+ for(i=m-n; i<m; i++) { /* notch filter at DC */
+ notch = nlp->sq[i] - nlp->mem_x;
+ notch += COEFF*nlp->mem_y;
+ nlp->mem_x = nlp->sq[i];
+ nlp->mem_y = notch;
+ nlp->sq[i] = notch;
+ }
+
+ for(i=m-n; i<m; i++) { /* FIR filter vector */
+
+ for(j=0; j<NLP_NTAP-1; j++)
+ nlp->mem_fir[j] = nlp->mem_fir[j+1];
+ nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
+
+ nlp->sq[i] = 0.0;
+ for(j=0; j<NLP_NTAP; j++)
+ nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
+ }
+
+ /* Decimate and DFT */
+
+ for(i=0; i<PE_FFT_SIZE; i++) {
+ Fw[i].real = 0.0;
+ Fw[i].imag = 0.0;
+ }
+ for(i=0; i<m/DEC; i++) {
+ Fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+ }
+ dump_dec(Fw);
+ four1(&Fw[-1].imag,PE_FFT_SIZE,1);
+ for(i=0; i<PE_FFT_SIZE; i++)
+ Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+
+ dump_sq(nlp->sq);
+ dump_Fw(Fw);
+
+ /* find global peak */
+
+ gmax = 0.0;
+ gmax_bin = PE_FFT_SIZE*DEC/pmax;
+ for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+ if (Fw[i].real > gmax) {
+ gmax = Fw[i].real;
+ gmax_bin = i;
+ }
}
- }
- best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
+ best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin,
+ prev_Wo);
- /* Shift samples in buffer to make room for new samples */
+ /* Shift samples in buffer to make room for new samples */
- for(i=0; i<m-n; i++)
- sq[i] = sq[i+n];
+ for(i=0; i<m-n; i++)
+ nlp->sq[i] = nlp->sq[i+n];
- /* return pitch and F0 estimate */
+ /* return pitch and F0 estimate */
- *pitch = (float)SAMPLE_RATE/best_f0;
- return(best_f0);
+ *pitch = (float)SAMPLE_RATE/best_f0;
+ return(best_f0);
}
/*---------------------------------------------------------------------------*\
thresh = CNLP*gmax;
lmax = 0;
+ lmax_bin = bmin;
for (b=bmin; b<=bmax; b++) /* look for maximum in interval */
if (Fw[b].real > lmax) {
lmax = Fw[b].real;
#ifndef __NLP__
#define __NLP__
-#include "sine.h"
-
-#define NLP_NTAP 48 /* Decimation LPF order */
-
-float nlp(float Sn[], int n, int m, int pmin, int pmax, float *pitch, COMP Sw[], float *prev_Wo);
+void *nlp_create();
+void nlp_destroy(void *nlp_state);
+float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax,
+ float *pitch, COMP Sw[], float *prev_Wo);
float test_candidate_mbe(COMP Sw[], float f0, COMP Sw_[]);
#endif
float lsps[LPC_ORD];
float ak[LPC_ORD+1];
float e;
- int i;
e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD);
encode_lsps(lsp_indexes, lsps, LPC_ORD);
/* Initialisation */
model->L = PI/model->Wo; /* use initial pitch est. for L */
+ Wom = model->Wo;
Em = 0.0;
r = TWO_PI/FFT_ENC;
#include "defines.h"
#include "dump.h"
+#include "sine.h"
#include "nlp.h"
int frames;
int i;
int dump;
float prev_Wo;
-
+ void *nlp_states;
+
if (argc < 3) {
printf("\nusage: tnlp InputRawSpeechFile OutputPitchTextFile "
"[--dump DumpFile]\n");
if (dump)
dump_on(argv[dump+1]);
+ nlp_states = nlp_create();
make_analysis_window(w,W);
frames = 0;
dft_speech(Sw, Sn, w);
dump_Sn(Sn); dump_Sw(Sw);
- nlp(Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw,&prev_Wo);
+ nlp(nlp_states,Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw,&prev_Wo);
prev_Wo = TWO_PI/pitch;
fprintf(fout,"%f\n",pitch);
fclose(fin);
fclose(fout);
if (dump) dump_off();
+ nlp_destroy(nlp_states);
return 0;
}