/* Encoder defines */
-#define AW_ENC 512 /* maximum encoder analysis window size */
+#define NW 220 /* analysis window size */
+#define AW_ENC 512 /* maximum encoder analysis window size */
#define FFT_ENC 512 /* size of FFT used for encoder analysis */
/* Decoder defines */
static FILE *flsp = NULL;
static FILE *fphase = NULL;
static FILE *fphase_ = NULL;
+static FILE *ffw = NULL;
+static FILE *fe = NULL;
static char prefix[MAX_STR];
fclose(fphase);
if (fphase_ != NULL)
fclose(fphase_);
+ if (ffw != NULL)
+ fclose(ffw);
+ if (fe != NULL)
+ fclose(fe);
}
void dump_Sn(float Sn[]) {
/* split across two lines to avoid max line length problems */
/* reconstruct in Octave */
- for(i=0; i<AW_ENC/2; i++)
+ for(i=0; i<M/2; i++)
fprintf(fsn,"%f\t",Sn[i]);
fprintf(fsn,"\n");
- for(i=AW_ENC/2; i<AW_ENC; i++)
+ for(i=M/2; i<M; i++)
fprintf(fsn,"%f\t",Sn[i]);
fprintf(fsn,"\n");
}
fprintf(flsp,"\n");
}
+void dump_Fw(COMP Fw[]) {
+ int i;
+ char s[MAX_STR];
+
+ if (!dumpon) return;
+
+ if (ffw == NULL) {
+ sprintf(s,"%s_fw.txt", prefix);
+ ffw = fopen(s, "wt");
+ assert(ffw != NULL);
+ }
+
+ for(i=0; i<256; i++)
+ fprintf(ffw,"%f\t",Fw[i].real);
+ fprintf(ffw,"\n");
+}
+
+void dump_e(float e_hz[]) {
+ int i;
+ char s[MAX_STR];
+
+ if (!dumpon) return;
+
+ if (fe == NULL) {
+ sprintf(s,"%s_e.txt", prefix);
+ fe = fopen(s, "wt");
+ assert(fe != NULL);
+ }
+
+ for(i=0; i<500/2; i++)
+ fprintf(fe,"%f\t",e_hz[i]);
+ fprintf(fe,"\n");
+ for(i=500/2; i<500; i++)
+ fprintf(fe,"%f\t",e_hz[i]);
+ fprintf(fe,"\n");
+}
+
void dump_lsp(float lsp[]);
void dump_phase(float phase[]);
void dump_phase_(float phase[]);
+void dump_Fw(COMP Fw[]);
+void dump_e(float e_hz[]);
#endif
/* Globals used in encoder and decoder */
int frames; /* number of frames processed so far */
-float Sn[M+AW_ENC/2]; /* float input speech samples */
+float Sn[M]; /* float input speech samples */
MODEL model; /* model parameters for the current frame */
int Nw; /* number of samples in analysis window */
float sig; /* energy of current frame */
{
int i;
- for(i=0; i<=order; i++)
+ for(i=1; i<=order; i++)
akw[i] = ak[i]*pow(gamma,(float)i);
}
#ifndef __LPC__
#define __LPC__
+#define LPC_MAX_ORDER 20
+
void hanning_window(float Sn[], float Wn[], int Nsam);
void autocorrelate(float Sn[], float Rn[], int Nsam, int order);
void levinson_durbin(float R[], float lpcs[], int order);
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include "nlp.h"
+#include "sine.h"
+#include "dump.h"
+#include <assert.h>
+
/*---------------------------------------------------------------------------*\
DEFINES
\*---------------------------------------------------------------------------*/
-#define PMAX_M 600 /* maximum NLP analysis window size */
-#define COEFF 0.95 /* noth filter parameter */
-#define NTAP 48 /* Decimation LPF order */
-#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
-#define DEC 5 /* decimation factor */
+#define PMAX_M 600 /* maximum NLP analysis window size */
+#define COEFF 0.95 /* notch filter parameter */
+#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
+#define DEC 5 /* decimation factor */
#define SAMPLE_RATE 8000
-#define PI 3.141592654 /* mathematical constant */
-#define CNLP 0.5 /* post processor constant */
+#define PI 3.141592654 /* mathematical constant */
+#define T 0.1 /* threshold for local minima candidate */
+#define F0_MAX 500
/*---------------------------------------------------------------------------*\
-1.0818124e-03
};
+float test_candidate_mbe(COMP Sw[], float f0);
+extern int frames;
+
/*---------------------------------------------------------------------------*\
void nlp()
- Determines the pitch in samples using the NLP algorithm. Returns the
- fundamental in Hz.
+ Determines the pitch in samples using the NLP algorithm. Returns the
+ fundamental in Hz. Note that the actual pitch estimate is for the
+ centre of the M sample Sn[] vector, not the current N sample input
+ vector. This is (I think) a delay of 2.5 frames with N=80 samples.
+ You should align further analysis using this pitch estimate to be
+ centred on the middle of Sn[].
\*---------------------------------------------------------------------------*/
-float nlp(Sn,n,m,d,pmin,pmax,pitch)
-float Sn[]; /* input speech vector */
-int n; /* frames shift (no. new samples in Sn[]) */
-int m; /* analysis window size */
-int d; /* additional delay (used for testing) */
-int pmin; /* minimum pitch value */
-int pmax; /* maximum pitch value */
-float *pitch; /* estimated pitch */
+float nlp(
+ float Sn[], /* input speech vector */
+ int n, /* frames shift (no. new samples in Sn[]) */
+ int m, /* analysis window size */
+ int d, /* additional delay (used for testing) */
+ int pmin, /* minimum pitch value */
+ int pmax, /* maximum pitch value */
+ float *pitch, /* estimated pitch period in samples */
+ COMP Sw[] /* Freq domain version of Sn[] */
+)
{
static float sq[PMAX_M]; /* squared speech samples */
- float notch; /* current notch filter output */
+ float notch; /* current notch filter output */
static float mem_x,mem_y; /* memory for notch filter */
- static float mem_fir[NTAP]; /* decimation FIR filter memory */
- COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */
-
- int gmax_bin; /* DFT bin where global maxima occurs */
- float gmax; /* global maxima value */
- float lmax; /* current local maxima value */
- int lmax_bin; /* bin of current local maxima */
- float cmax; /* chosen local maxima value */
- int cmax_bin; /* bin of chosen local maxima */
-
- int mult; /* current submultiple */
- int min_bin; /* lowest possible bin */
- int bmin,bmax; /* range of local maxima search */
- float thresh; /* threshold for submultiple selection */
-
- float F0; /* fundamental frequency */
- int i,j,b;
+ static float mem_fir[NLP_NTAP];/* decimation FIR filter memory */
+ COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal */
+ float gmax;
+
+ float candidate_f0;
+ float f0,best_f0; /* fundamental frequency */
+ float e,e_min; /* MBE cost function */
+ int i,j;
+ float e_hz[F0_MAX];
+ int bin;
/* Square, notch filter at DC, and LP filter vector */
for(i=m-n+d; i<m+d; i++) { /* FIR filter vector */
- for(j=0; j<NTAP-1; j++)
+ for(j=0; j<NLP_NTAP-1; j++)
mem_fir[j] = mem_fir[j+1];
- mem_fir[NTAP-1] = sq[i];
+ mem_fir[NLP_NTAP-1] = sq[i];
sq[i] = 0.0;
- for(j=0; j<NTAP; j++)
+ for(j=0; j<NLP_NTAP; j++)
sq[i] += mem_fir[j]*nlp_fir[j];
}
for(i=0; i<PE_FFT_SIZE; i++)
Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
- /* find global peak within limits, this corresponds to F0 estimate */
+ dump_Fw(Fw);
+
+ /* find global peak */
gmax = 0.0;
for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
if (Fw[i].real > gmax) {
gmax = Fw[i].real;
- gmax_bin = i;
}
}
- /* Now post process estimate by searching submultiples */
-
- mult = 2;
- min_bin = PE_FFT_SIZE*DEC/pmax;
- thresh = CNLP*gmax;
- cmax_bin = gmax_bin;
-
- while(gmax_bin/mult >= min_bin) {
-
- b = gmax_bin/mult; /* determine search interval */
- bmin = 0.8*b;
- bmax = 1.2*b;
- if (bmin < min_bin)
- bmin = min_bin;
-
- lmax = 0;
- for (b=bmin; b<=bmax; b++) /* look for maximum in interval */
- if (Fw[b].real > lmax) {
- lmax = Fw[b].real;
- lmax_bin = b;
- }
-
- if (lmax > thresh)
- if (lmax > Fw[lmax_bin-1].real && lmax > Fw[lmax_bin+1].real) {
- cmax = lmax;
- cmax_bin = lmax_bin;
- }
-
- mult++;
- }
+ /* Now look for local maxima. Each local maxima is a candidate
+ that we test using the MBE pitch estimation algotithm */
- F0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
- *pitch = SAMPLE_RATE/F0;
+ for(i=0; i<F0_MAX; i++)
+ e_hz[i] = -1;
+ e_min = 1E32;
+ best_f0 = 50;
+ for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+ if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) {
+
+ /* local maxima found, lets test if it's big enough */
+
+ if (Fw[i].real > T*gmax) {
+
+ /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */
+
+ candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+ if (frames == 29)
+ printf("candidate F0: %f\n", candidate_f0);
+ for(f0=candidate_f0-20; f0<=candidate_f0+20; f0+= 2.5) {
+ e = test_candidate_mbe(Sw, f0);
+ bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
+ e_hz[bin] = e;
+ if (frames == 29)
+ printf("f0: %f e: %f e_min: %f best_f0: %f\n",
+ f0, e, e_min, best_f0);
+ if (e < e_min) {
+ e_min = e;
+ best_f0 = f0;
+ }
+ }
+
+ }
+ }
+ }
+ dump_e(e_hz);
/* Shift samples in buffer to make room for new samples */
for(i=0; i<m-n+d; i++)
sq[i] = sq[i+n];
- return(F0);
+ /* return pitch and F0 estimate */
+
+ *pitch = (float)SAMPLE_RATE/best_f0;
+ return(best_f0);
}
+/*---------------------------------------------------------------------------*\
+
+ test_candidate_mbe()
+
+ Returns the error of the MBE cost function for the input f0.
+
+ Note: I think a lot of the operations below can be simplified as
+ W[].imag = 0 and has been normalised such that den always equals 1.
+
+\*---------------------------------------------------------------------------*/
+
+float test_candidate_mbe(
+ COMP Sw[],
+ float f0
+)
+{
+ COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */
+ int l,al,bl,m; /* loop variables */
+ COMP Am; /* amplitude sample for this band */
+ int offset; /* centers Hw[] about current harmonic */
+ float den; /* denominator of Am expression */
+ float error; /* accumulated error between originl and synthesised */
+ float Wo; /* current "test" fundamental freq. */
+ int L; /* number of bands */
+
+ Wo = f0*(2*PI/SAMPLE_RATE);
+ L = floor(PI/Wo);;
+
+ error = 0.0;
+
+ /* Just test across the harmonics in the first 1000 Hz (L/4) */
+
+ for(l=1; l<L/4; l++) {
+ Am.real = 0.0;
+ Am.imag = 0.0;
+ den = 0.0;
+ al = ceil((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+ bl = ceil((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+
+ /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+
+ for(m=al; m<bl; m++) {
+ offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+ Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
+ Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
+ den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+ }
+
+ Am.real = Am.real/den;
+ Am.imag = Am.imag/den;
+
+ /* Determine error between estimated harmonic and original */
+
+ for(m=al; m<bl; m++) {
+ offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+ Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
+ Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+ error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real);
+ error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag);
+ }
+ }
+
+ return error;
+}
+
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: nlp.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 23/3/93
+
+ Non Linear Pitch (NLP) estimation functions.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __NLP__
+#define __NLP__
+
+#include "sine.h"
+
+#define NLP_NTAP 48 /* Decimation LPF order */
+
+float nlp(float Sn[], int n, int m, int d, int pmin, int pmax, float *pitch,
+ COMP Sw[]);
+
+#endif
for(i=0; i<=order; i++)
Pw[i].real = aks[i];
+
four1(&Pw[-1].imag,FFT_DEC,-1);
/* Sample magnitude and phase at harmonics */
for(i=0; i<m; i++) {
scan_line(ftext, &cb[k*lines++], k);
}
- printf("%d lines\n",lines);
fclose(ftext);
}
float Sn[], /* Input frame of speech samples */
MODEL *model, /* sinusoidal model parameters */
int order, /* LPC model order */
- int lsp_quantisation /* optional LSP quantisation if non-zero */
+ int lsp_quantisation, /* optional LSP quantisation if non-zero */
+ float ak[] /* output aks */
)
{
float Wn[AW_ENC];
float R[MAX_ORDER+1];
- float ak[MAX_ORDER+1];
float E;
int i;
float snr;
float lsp[MAX_ORDER];
- float lsp_[MAX_ORDER]; /* quantised LSPs */
int roots; /* number of LSP roots found */
int index;
float se;
#include "sine.h"
void quantise_init();
-float lpc_model_amplitudes(float Sn[], MODEL *model, int order, int lsp);
+float lpc_model_amplitudes(float Sn[], MODEL *model, int order,int lsp,float ak[]);
void aks_to_M2(float ak[], int order, MODEL *model, float E, float *snr);
#endif
Sw[i].imag = 0.0;
}
- /* centre analysis window on time axis */
+ /* Centre analysis window on time axis, we need to arrange input
+ to FFT this way to make FFT phases correct */
- for(i=0; i<AW_ENC/2; i++)
- Sw[i].real = Sn[i+AW_ENC/2]*w[i+AW_ENC/2];
- for(i=FFT_ENC-AW_ENC/2; i<FFT_ENC; i++)
- Sw[i].real = Sn[i-FFT_ENC+AW_ENC/2]*w[i-FFT_ENC+AW_ENC/2];
+ /* move 2nd half to start of FFT input vector */
+
+ for(i=0; i<NW/2; i++)
+ Sw[i].real = Sn[i+M/2]*w[i+AW_ENC/2];
+
+ /* move 1st half to end of FFT input vector */
+
+ for(i=0; i<NW/2; i++)
+ Sw[FFT_ENC-NW/2+i].real = Sn[i+M/2-NW/2]*w[i+AW_ENC/2-NW/2];
four1(&Sw[-1].imag,FFT_ENC,-1);
}
int lpc_model, order;
int lsp, lsp_quantiser;
+ float ak[LPC_MAX_ORDER+1];
+
int dump;
int phase, phase_model;
/* phase_model 0: zero phase
phase_model 1: 1st order polynomial */
phase = switch_present("--phase",argc,argv);
- if (phase)
+ if (phase) {
phase_model = atoi(argv[phase+1]);
- assert((phase_model == 0) || (phase_model == 1));
+ assert((phase_model == 0) || (phase_model == 1));
+ }
/* Initialise ------------------------------------------------------------*/
dump_model(&model);
+ /* optional LPC model amplitudes */
+
+ if (lpc_model) {
+ snr = lpc_model_amplitudes(Sn, &model, order, lsp_quantiser, ak);
+ sum_snr += snr;
+ dump_quantised_model(&model);
+ }
+
/* optional phase modelling */
if (phase) {
float Wn[AW_ENC]; /* windowed speech samples */
float Rk[PHASE_LPC_ORD+1]; /* autocorrelation coeffs */
- float aks[PHASE_LPC_ORD+1];
COMP H[MAX_AMP]; /* LPC freq domain samples */
int i_min;
COMP min_Am;
dump_phase(&model.phi[0]);
- /* Determine LPC model using time domain LPC. A little
- further down the development track optionally LPCs from lpc
- modelling/LSP quant for phase modelling */
+ if (!lpc_model) {
+ /* Determine LPC model using time domain LPC if we don't have
+ any LPCs yet */
- for(i=0; i<AW_ENC; i++)
- Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,Rk,AW_ENC,PHASE_LPC_ORD);
- levinson_durbin(Rk,aks,PHASE_LPC_ORD);
+ for(i=0; i<AW_ENC; i++)
+ Wn[i] = Sn[i]*w[i];
+ autocorrelate(Wn,Rk,AW_ENC,PHASE_LPC_ORD);
+ levinson_durbin(Rk,ak,PHASE_LPC_ORD);
+ }
+ else
+ assert(order == PHASE_LPC_ORD);
- snr = phase_model_first_order(aks, H, &i_min, &min_Am);
+ snr = phase_model_first_order(ak, H, &i_min, &min_Am);
+ snr = 5;
if (phase_model == 0)
phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
if (phase_model == 1)
dump_phase_(&model.phi[0]);
}
- /* optional LPC model amplitudes */
-
- if (lpc_model) {
- snr = lpc_model_amplitudes(Sn, &model, order, lsp_quantiser);
- sum_snr += snr;
- dump_quantised_model(&model);
- }
-
-
/* Synthesise speech */
if (fout != NULL) {
\*---------------------------------------------------------------------------*/
int switch_present(sw,argc,argv)
-register char sw[]; /* switch in string form */
-register int argc; /* number of command line arguments */
-register char *argv[]; /* array of command line arguments in string form */
+ char sw[]; /* switch in string form */
+ int argc; /* number of command line arguments */
+ char *argv[]; /* array of command line arguments in string form */
{
- register int i; /* loop variable */
+ int i; /* loop variable */
for(i=1; i<argc; i++)
if (!strcmp(sw,argv[i]))
fref = NULL;
init_encoder();
- Nw = 220;
- make_window(Nw);
+ make_window(NW);
/* Main loop ------------------------------------------------------------*/
/* Update input speech buffers */
- for(i=0; i<N+AW_ENC/2; i++)
+ for(i=0; i<M-N; i++)
Sn[i] = Sn[i+N];
for(i=0; i<N; i++)
- Sn[i+N+AW_ENC/2] = buf[i];
+ Sn[i+M-N] = buf[i];
/* Estimate pitch */
- if (frames > 2) {
- fscanf(fp,"%f\n",&pitch);
- if (pitch > P_MAX) pitch = P_MAX;
- if (pitch < P_MIN) pitch = P_MIN;
- }
- else
- pitch = P_MIN;
+ fscanf(fp,"%f\n",&pitch);
/* construct analysis window */