#include "ampexp.h"
#include "phaseexp.h"
-void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[]);
+void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem);
void print_help(const struct option *long_options, int num_opts, char* argv[]);
FILE *fin; /* input speech file */
short buf[N]; /* input/output buffer */
float Sn[M]; /* float input speech samples */
+ float Sn_pre[M]; /* pre-emphasised input speech samples */
COMP Sw[FFT_ENC]; /* DFT of Sn[] */
kiss_fft_cfg fft_fwd_cfg;
kiss_fft_cfg fft_inv_cfg;
int lspres = 0;
int lspdt = 0, lspdt_mode = LSPDT_ALL;
int dt = 0, lspjvm = 0, lspanssi = 0, lspjnd = 0, lspmel = 0;
+ int prede = 0;
+ float pre_mem = 0.0, de_mem = 0.0;
float ak[LPC_MAX];
COMP Sw_[FFT_ENC];
COMP Ew[FFT_ENC];
{ "hi", no_argument, &hi, 1 },
{ "simlpcpf", no_argument, &simlpcpf, 1 },
{ "lpcpf", no_argument, &lpcpf, 1 },
+ { "prede", no_argument, &prede, 1 },
{ "dump_pitch_e", required_argument, &dump_pitch_e, 1 },
{ "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 },
{ "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 },
};
int num_opts=sizeof(long_options)/sizeof(struct option);
- for(i=0; i<M; i++)
+ for(i=0; i<M; i++) {
Sn[i] = 1.0;
+ Sn_pre[i] = 1.0;
+ }
for(i=0; i<2*N; i++)
Sn_[i] = 0;
/* Read input speech */
- for(i=0; i<M-N; i++)
+ for(i=0; i<M-N; i++) {
Sn[i] = Sn[i+N];
+ Sn_pre[i] = Sn_pre[i+N];
+ }
for(i=0; i<N; i++)
Sn[i+M-N] = buf[i];
-
+
+ pre_emp(&Sn_pre[M-N], &Sn[M-N], &pre_mem, N);
+
+
/*------------------------------------------------------------*\
Estimate Sinusoidal Model Parameters
\*------------------------------------------------------------*/
- nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_uq_Wo);
+ nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
model.Wo = TWO_PI/pitch;
- dft_speech(fft_fwd_cfg, Sw, Sn, w);
+ dft_speech(fft_fwd_cfg, Sw, Sn, w);
two_stage_pitch_refinement(&model, Sw);
estimate_amplitudes(&model, Sw, W);
uq_Wo = model.Wo;
/* find aks here, these are overwritten if LPC modelling is enabled */
- for(i=0; i<M; i++)
- Wn[i] = Sn[i]*w[i];
+ if (prede) {
+ for(i=0; i<M; i++)
+ Wn[i] = Sn_pre[i]*w[i];
+ }
+ else {
+
+ for(i=0; i<M; i++)
+ Wn[i] = Sn[i]*w[i];
+ }
autocorrelate(Wn,Rk,M,order);
levinson_durbin(Rk,ak,order);
\*------------------------------------------------------------*/
if (lpc_model) {
-
- e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
+
+ if (prede)
+ e = speech_to_uq_lsps(lsps, ak, Sn_pre, w, order);
+ else
+ e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
#ifdef DUMP
dump_ak(ak, LPC_ORD);
/* multi-stage VQ from Anssi Ramo OH3GDD */
lspanssi_quantise(lsps, lsps_, LPC_ORD, 5);
+ bw_expand_lsps(lsps_, LPC_ORD);
lsp_to_lpc(lsps_, ak, LPC_ORD);
}
order);
if (postfilt)
postfilter(&interp_model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &interp_model, Sn_, Pn);
+ synth_one_frame(fft_inv_cfg, buf, &interp_model, Sn_, Pn, prede, &de_mem);
//printf(" buf[0] %d\n", buf[0]);
if (fout != NULL)
fwrite(buf,sizeof(short),N,fout);
phase_synth_zero_order(fft_fwd_cfg, &model, ak, ex_phase, order);
if (postfilt)
postfilter(&model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn);
+ synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem);
//printf(" buf[0] %d\n", buf[0]);
if (fout != NULL)
fwrite(buf,sizeof(short),N,fout);
if (postfilt)
postfilter(&model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn);
+ synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem);
if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
}
return 0;
}
-void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[])
+void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem)
{
int i;
synthesise(fft_inv_cfg, Sn_, model, Pn, 1);
+ if (prede)
+ de_emp(Sn_, Sn_, de_mem, N);
for(i=0; i<N; i++) {
if (Sn_[i] > 32767.0)
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, &c2->prev_Wo_enc);
+ nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
AUTHOR......: David Rowe
DATE CREATED: 23/3/93
- Non Linear Pitch (NLP) estimation functions.
-
+ Non Linear Pitch (NLP) estimation functions.
+
\*---------------------------------------------------------------------------*/
/*
kiss_fft_cfg fft_cfg; /* kiss FFT config */
} NLP;
-float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax);
+float test_candidate_mbe(COMP Sw[], COMP W[], float f0);
+float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo);
float post_process_sub_multiples(COMP Fw[],
int pmin, int pmax, float gmax, int gmax_bin,
float *prev_Wo);
int pmax, /* maximum pitch value */
float *pitch, /* estimated pitch period in samples */
COMP Sw[], /* Freq domain version of Sn[] */
+ COMP W[], /* Freq domain window */
float *prev_Wo
)
{
}
}
- best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin,
- prev_Wo);
+ //#define POST_PROCESS_MBE
+ #ifdef POST_PROCESS_MBE
+ best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo);
+ #else
+ best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
+ #endif
/* Shift samples in buffer to make room for new samples */
post_process_sub_multiples()
- Given the global maximma of Fw[] we search interger submultiples for
+ Given the global maximma of Fw[] we search integer submultiples for
local maxima. If local maxima exist and they are above an
experimentally derived threshold (OK a magic number I pulled out of
the air) we choose the submultiple as the F0 estimate.
return best_f0;
}
+/*---------------------------------------------------------------------------*\
+
+ post_process_mbe()
+
+ Use the MBE pitch estimation algorithm to evaluate pitch candidates. This
+ works OK but the accuracy at low F0 is affected by NW, the analysis window
+ size used for the DFT of the input speech Sw[]. Also favours high F0 in
+ the presence of background noise which causes periodic artifacts in the
+ synthesised speech.
+
+\*---------------------------------------------------------------------------*/
+
+float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COMP W[], float *prev_Wo)
+{
+ float candidate_f0;
+ float f0,best_f0; /* fundamental frequency */
+ float e,e_min; /* MBE cost function */
+ int i;
+ float e_hz[F0_MAX];
+ int bin;
+ float f0_min, f0_max;
+ float f0_start, f0_end;
+
+ f0_min = (float)SAMPLE_RATE/pmax;
+ f0_max = (float)SAMPLE_RATE/pmin;
+
+ /* Now look for local maxima. Each local maxima is a candidate
+ that we test using the MBE pitch estimation algotithm */
+
+ for(i=0; i<F0_MAX; i++)
+ e_hz[i] = -1;
+ e_min = 1E32;
+ best_f0 = 50;
+ for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+ if ((Fw[i].real > Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) {
+
+ /* local maxima found, lets test if it's big enough */
+
+ if (Fw[i].real > T*gmax) {
+
+ /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */
+
+ candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+ f0_start = candidate_f0-20;
+ f0_end = candidate_f0+20;
+ if (f0_start < f0_min) f0_start = f0_min;
+ if (f0_end > f0_max) f0_end = f0_max;
+
+ for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
+ e = test_candidate_mbe(Sw, W, f0);
+ bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
+ e_hz[bin] = e;
+ if (e < e_min) {
+ e_min = e;
+ best_f0 = f0;
+ }
+ }
+
+ }
+ }
+ }
+
+ /* finally sample MBE cost function around previous pitch estimate
+ (form of pitch tracking) */
+
+ candidate_f0 = *prev_Wo * SAMPLE_RATE/TWO_PI;
+ f0_start = candidate_f0-20;
+ f0_end = candidate_f0+20;
+ if (f0_start < f0_min) f0_start = f0_min;
+ if (f0_end > f0_max) f0_end = f0_max;
+
+ for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
+ e = test_candidate_mbe(Sw, W, f0);
+ bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
+ e_hz[bin] = e;
+ if (e < e_min) {
+ e_min = e;
+ best_f0 = f0;
+ }
+ }
+
+ #ifdef DUMP
+ dump_e(e_hz);
+ #endif
+
+ return best_f0;
+}
+
+/*---------------------------------------------------------------------------*\
+
+ test_candidate_mbe()
+
+ Returns the error of the MBE cost function for the input f0.
+
+ Note: I think a lot of the operations below can be simplified as
+ W[].imag = 0 and has been normalised such that den always equals 1.
+
+\*---------------------------------------------------------------------------*/
+
+float test_candidate_mbe(
+ COMP Sw[],
+ COMP W[],
+ float f0
+)
+{
+ COMP Sw_[FFT_ENC]; /* DFT of all voiced synthesised signal */
+ int l,al,bl,m; /* loop variables */
+ COMP Am; /* amplitude sample for this band */
+ int offset; /* centers Hw[] about current harmonic */
+ float den; /* denominator of Am expression */
+ float error; /* accumulated error between originl and synthesised */
+ float Wo; /* current "test" fundamental freq. */
+ int L;
+
+ L = floor((SAMPLE_RATE/2.0)/f0);
+ Wo = f0*(2*PI/SAMPLE_RATE);
+
+ error = 0.0;
+
+ /* Just test across the harmonics in the first 1000 Hz (L/4) */
+
+ for(l=1; l<L/4; l++) {
+ Am.real = 0.0;
+ Am.imag = 0.0;
+ den = 0.0;
+ al = ceil((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+ bl = ceil((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+
+ /* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+
+ for(m=al; m<bl; m++) {
+ offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+ Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
+ Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
+ den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+ }
+
+ Am.real = Am.real/den;
+ Am.imag = Am.imag/den;
+
+ /* Determine error between estimated harmonic and original */
+
+ for(m=al; m<bl; m++) {
+ offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+ Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
+ Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+ error += (Sw[m].real - Sw_[m].real)*(Sw[m].real - Sw_[m].real);
+ error += (Sw[m].imag - Sw_[m].imag)*(Sw[m].imag - Sw_[m].imag);
+ }
+ }
+
+ return error;
+}
+
+
void *nlp_create();
void nlp_destroy(void *nlp_state);
float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax,
- float *pitch, COMP Sw[], float *prev_Wo);
-float test_candidate_mbe(COMP Sw[], float f0, COMP Sw_[]);
+ float *pitch, COMP Sw[], COMP W[], float *prev_Wo);
#endif