From 5eba9ab7a48d25c40b85e43b6a373ae76f6faf49 Mon Sep 17 00:00:00 2001 From: drowe67 Date: Tue, 8 Sep 2009 04:42:10 +0000 Subject: [PATCH] added MBE pitch post processor to nlp.c, still debugging git-svn-id: https://svn.code.sf.net/p/freetel/code@43 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2/src/defines.h | 3 +- codec2/src/dump.c | 47 ++++++++- codec2/src/dump.h | 2 + codec2/src/globals.c | 2 +- codec2/src/lpc.c | 2 +- codec2/src/lpc.h | 2 + codec2/src/nlp.c | 225 ++++++++++++++++++++++++++++-------------- codec2/src/nlp.h | 39 ++++++++ codec2/src/phase.c | 1 + codec2/src/quantise.c | 6 +- codec2/src/quantise.h | 2 +- codec2/src/refine.c | 16 ++- codec2/src/sinedec.c | 45 +++++---- codec2/src/sinenc.c | 23 ++--- 14 files changed, 292 insertions(+), 123 deletions(-) create mode 100644 codec2/src/nlp.h diff --git a/codec2/src/defines.h b/codec2/src/defines.h index 13898e2e..c07e7a8f 100644 --- a/codec2/src/defines.h +++ b/codec2/src/defines.h @@ -51,7 +51,8 @@ /* Encoder defines */ -#define AW_ENC 512 /* maximum encoder analysis window size */ +#define NW 220 /* analysis window size */ +#define AW_ENC 512 /* maximum encoder analysis window size */ #define FFT_ENC 512 /* size of FFT used for encoder analysis */ /* Decoder defines */ diff --git a/codec2/src/dump.c b/codec2/src/dump.c index 1b8195ed..1295074b 100644 --- a/codec2/src/dump.c +++ b/codec2/src/dump.c @@ -41,6 +41,8 @@ static FILE *fpw = NULL; static FILE *flsp = NULL; static FILE *fphase = NULL; static FILE *fphase_ = NULL; +static FILE *ffw = NULL; +static FILE *fe = NULL; static char prefix[MAX_STR]; @@ -68,6 +70,10 @@ void dump_off(){ fclose(fphase); if (fphase_ != NULL) fclose(fphase_); + if (ffw != NULL) + fclose(ffw); + if (fe != NULL) + fclose(fe); } void dump_Sn(float Sn[]) { @@ -85,10 +91,10 @@ void dump_Sn(float Sn[]) { /* split across two lines to avoid max line length problems */ /* reconstruct in Octave */ - for(i=0; i + /*---------------------------------------------------------------------------*\ DEFINES \*---------------------------------------------------------------------------*/ -#define PMAX_M 600 /* maximum NLP analysis window size */ -#define COEFF 0.95 /* noth filter parameter */ -#define NTAP 48 /* Decimation LPF order */ -#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ -#define DEC 5 /* decimation factor */ +#define PMAX_M 600 /* maximum NLP analysis window size */ +#define COEFF 0.95 /* notch filter parameter */ +#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */ +#define DEC 5 /* decimation factor */ #define SAMPLE_RATE 8000 -#define PI 3.141592654 /* mathematical constant */ -#define CNLP 0.5 /* post processor constant */ +#define PI 3.141592654 /* mathematical constant */ +#define T 0.1 /* threshold for local minima candidate */ +#define F0_MAX 500 /*---------------------------------------------------------------------------*\ @@ -100,44 +105,46 @@ float nlp_fir[] = { -1.0818124e-03 }; +float test_candidate_mbe(COMP Sw[], float f0); +extern int frames; + /*---------------------------------------------------------------------------*\ void nlp() - Determines the pitch in samples using the NLP algorithm. Returns the - fundamental in Hz. + Determines the pitch in samples using the NLP algorithm. Returns the + fundamental in Hz. Note that the actual pitch estimate is for the + centre of the M sample Sn[] vector, not the current N sample input + vector. This is (I think) a delay of 2.5 frames with N=80 samples. + You should align further analysis using this pitch estimate to be + centred on the middle of Sn[]. \*---------------------------------------------------------------------------*/ -float nlp(Sn,n,m,d,pmin,pmax,pitch) -float Sn[]; /* input speech vector */ -int n; /* frames shift (no. new samples in Sn[]) */ -int m; /* analysis window size */ -int d; /* additional delay (used for testing) */ -int pmin; /* minimum pitch value */ -int pmax; /* maximum pitch value */ -float *pitch; /* estimated pitch */ +float nlp( + float Sn[], /* input speech vector */ + int n, /* frames shift (no. new samples in Sn[]) */ + int m, /* analysis window size */ + int d, /* additional delay (used for testing) */ + int pmin, /* minimum pitch value */ + int pmax, /* maximum pitch value */ + float *pitch, /* estimated pitch period in samples */ + COMP Sw[] /* Freq domain version of Sn[] */ +) { static float sq[PMAX_M]; /* squared speech samples */ - float notch; /* current notch filter output */ + float notch; /* current notch filter output */ static float mem_x,mem_y; /* memory for notch filter */ - static float mem_fir[NTAP]; /* decimation FIR filter memory */ - COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */ - - int gmax_bin; /* DFT bin where global maxima occurs */ - float gmax; /* global maxima value */ - float lmax; /* current local maxima value */ - int lmax_bin; /* bin of current local maxima */ - float cmax; /* chosen local maxima value */ - int cmax_bin; /* bin of chosen local maxima */ - - int mult; /* current submultiple */ - int min_bin; /* lowest possible bin */ - int bmin,bmax; /* range of local maxima search */ - float thresh; /* threshold for submultiple selection */ - - float F0; /* fundamental frequency */ - int i,j,b; + static float mem_fir[NLP_NTAP];/* decimation FIR filter memory */ + COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */ + float gmax; + + float candidate_f0; + float f0,best_f0; /* fundamental frequency */ + float e,e_min; /* MBE cost function */ + int i,j; + float e_hz[F0_MAX]; + int bin; /* Square, notch filter at DC, and LP filter vector */ @@ -154,12 +161,12 @@ float *pitch; /* estimated pitch */ for(i=m-n+d; i gmax) { gmax = Fw[i].real; - gmax_bin = i; } } - /* Now post process estimate by searching submultiples */ - - mult = 2; - min_bin = PE_FFT_SIZE*DEC/pmax; - thresh = CNLP*gmax; - cmax_bin = gmax_bin; - - while(gmax_bin/mult >= min_bin) { - - b = gmax_bin/mult; /* determine search interval */ - bmin = 0.8*b; - bmax = 1.2*b; - if (bmin < min_bin) - bmin = min_bin; - - lmax = 0; - for (b=bmin; b<=bmax; b++) /* look for maximum in interval */ - if (Fw[b].real > lmax) { - lmax = Fw[b].real; - lmax_bin = b; - } - - if (lmax > thresh) - if (lmax > Fw[lmax_bin-1].real && lmax > Fw[lmax_bin+1].real) { - cmax = lmax; - cmax_bin = lmax_bin; - } - - mult++; - } + /* Now look for local maxima. Each local maxima is a candidate + that we test using the MBE pitch estimation algotithm */ - F0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC); - *pitch = SAMPLE_RATE/F0; + for(i=0; i Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) { + + /* local maxima found, lets test if it's big enough */ + + if (Fw[i].real > T*gmax) { + + /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */ + + candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC); + if (frames == 29) + printf("candidate F0: %f\n", candidate_f0); + for(f0=candidate_f0-20; f0<=candidate_f0+20; f0+= 2.5) { + e = test_candidate_mbe(Sw, f0); + bin = floor(f0); assert((bin > 0) && (bin < F0_MAX)); + e_hz[bin] = e; + if (frames == 29) + printf("f0: %f e: %f e_min: %f best_f0: %f\n", + f0, e, e_min, best_f0); + if (e < e_min) { + e_min = e; + best_f0 = f0; + } + } + + } + } + } + dump_e(e_hz); /* Shift samples in buffer to make room for new samples */ for(i=0; i 2) { - fscanf(fp,"%f\n",&pitch); - if (pitch > P_MAX) pitch = P_MAX; - if (pitch < P_MIN) pitch = P_MIN; - } - else - pitch = P_MIN; + fscanf(fp,"%f\n",&pitch); /* construct analysis window */ -- 2.25.1