From: drowe67 Date: Fri, 20 Aug 2010 06:41:32 +0000 (+0000) Subject: refactored code, compiles and runs OK X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=20bba13c91e08c0f3cec11a1702b27da1bc5c144;p=freetel-svn-tracking.git refactored code, compiles and runs OK git-svn-id: https://svn.code.sf.net/p/freetel/code@175 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/src/Makefile b/codec2/src/Makefile index 5b888cef..f037794b 100644 --- a/codec2/src/Makefile +++ b/codec2/src/Makefile @@ -1,10 +1,9 @@ CC=gcc CFLAGS=-g -Wall -I. -I../src -Wall -g -DFLOATING_POINT -DVAR_ARRAYS -SINENC_OBJ = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o -SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \ - quantise.o lpc.o dump.o refine.o lsp.o phase.o postfilter.o \ - interp.o nlp.o +SINENC_OBJ = sine.o four1.o sinenc.o dump.o +SINEDEC_OBJ = sine.o four1.o dump.o quantise.o lpc.o lsp.o phase.o \ + postfilter.o interp.o sinedec.o all: sinenc sinedec diff --git a/codec2/src/code.sh b/codec2/src/code.sh index 63cf5fbb..adeddc49 100755 --- a/codec2/src/code.sh +++ b/codec2/src/code.sh @@ -7,9 +7,9 @@ ../unittest/tnlp ../raw/$1.raw ../unittest/$1_nlp.p ../src/sinenc ../raw/$1.raw $1.mdl 300 ../unittest/$1_nlp.p ../src/sinedec ../raw/$1.raw $1.mdl -o $1_uq.raw -../src/sinedec ../raw/$1.raw $1.mdl --phase 0 -o $1_phase0.raw --postfilter +../src/sinedec ../raw/$1.raw $1.mdl --phase0 -o $1_phase0.raw --postfilter ../src/sinedec ../raw/$1.raw $1.mdl --lpc 10 -o $1_lpc10.raw ../src/sinedec ../raw/$1.raw $1.mdl --lpc 10 --lsp -o $1_lsp.raw -../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 -o $1_phase0_lpc10.raw --postfilter -../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 --lsp -o $1_phase0_lsp.raw --postfilter +../src/sinedec ../raw/$1.raw $1.mdl --phase0 --lpc 10 -o $1_phase0_lpc10.raw --postfilter +../src/sinedec ../raw/$1.raw $1.mdl --phase0 --lpc 10 --lsp -o $1_phase0_lsp.raw --postfilter diff --git a/codec2/src/defines.h b/codec2/src/defines.h index eb8b8287..c963ba4a 100644 --- a/codec2/src/defines.h +++ b/codec2/src/defines.h @@ -26,8 +26,8 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef __SINE__ -#define __SINE__ +#ifndef __DEFINES__ +#define __DEFINES__ /*---------------------------------------------------------------------------*\ @@ -37,29 +37,26 @@ /* General defines */ -#define N 80 /* number of samples per frame */ -#define MAX_AMP 80 /* maximum number of harmonics */ -#define PI 3.141592654 /* mathematical constant */ -#define TWO_PI 6.283185307 /* mathematical constant */ -#define FC 4000 /* cut-off frequency */ +#define N 80 /* number of samples per frame */ +#define MAX_AMP 80 /* maximum number of harmonics */ +#define PI 3.141592654 /* mathematical constant */ +#define TWO_PI 6.283185307 /* mathematical constant */ +#define FS 8000 /* sample rate in Hz */ +#define MAX_STR 256 /* maximum string size */ + +#define NW 279 /* analysis window size */ +#define FFT_ENC 512 /* size of FFT used for encoder */ +#define FFT_DEC 512 /* size of FFT used in decoder */ +#define TW 40 /* Trapezoidal synthesis window overlap */ +#define V_THRESH 4.0 /* voicing threshold in dB */ +#define LPC_MAX 20 /* maximum LPC order */ +#define PHASE_LPC 10 /* maximum LPC order */ /* Pitch estimation defines */ -#define M 320 /* pitch analysis frame size */ -#define P_MIN 20 /* minimum pitch */ -#define P_MAX 160 /* maximum pitch */ - -/* Encoder defines */ - -#define NW 279 /* analysis window size */ -#define FFT_ENC 512 /* size of FFT used for encoder analysis */ - -/* Decoder defines */ - -#define AW_DEC 160 /* number of samples in synthesis window */ -#define FFT_DEC 512 /* number of points in DFT */ -#define TW 40 /* Trapezoidal synthesis window overlap */ -#define MAX_STR 256 +#define M 320 /* pitch analysis frame size */ +#define P_MIN 20 /* minimum pitch */ +#define P_MAX 160 /* maximum pitch */ /*---------------------------------------------------------------------------*\ @@ -74,14 +71,14 @@ typedef struct { float imag; } COMP; -/* Structure to hold unquantised model parameters for one frame */ +/* Structure to hold model parameters for one frame */ typedef struct { - float Wo; /* fundamental frequency estimate in radians */ - int L; /* number of harmonics over the current frame */ - float v[MAX_AMP]; /* voicing measures */ - float A[MAX_AMP]; /* average magnitude/unit frequency samples */ - float phi[MAX_AMP]; /* phase of each harmonic */ + float Wo; /* fundamental frequency estimate in radians */ + int L; /* number of harmonics */ + float v[MAX_AMP]; /* voicing measures (unused at present) */ + float A[MAX_AMP]; /* amplitiude of each harmonic */ + float phi[MAX_AMP]; /* phase of each harmonic */ } MODEL; #endif diff --git a/codec2/src/dump.c b/codec2/src/dump.c index 4be59940..386443c1 100644 --- a/codec2/src/dump.c +++ b/codec2/src/dump.c @@ -24,11 +24,13 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include "defines.h" #include "dump.h" #include #include #include #include +#include static int dumpon = 0; @@ -197,7 +199,7 @@ void dump_quantised_model(MODEL *model) { fprintf(fqmodel,"\n"); } -void dump_phase(float phase[]) { +void dump_phase(float phase[], int L) { int l; char s[MAX_STR]; @@ -209,14 +211,14 @@ void dump_phase(float phase[]) { assert(fphase != NULL); } - for(l=1; l<=model.L; l++) + for(l=1; l<=L; l++) fprintf(fphase,"%f\t",phase[l]); - for(l=model.L+1; l #include +#include "defines.h" +#include "interp.h" + /*---------------------------------------------------------------------------*\ interp() @@ -88,8 +91,8 @@ void interp( phase to time-shift the model forward or backward N samples. */ - memcpy(a, prev, sizeof(model)); - memcpy(b, next, sizeof(model)); + memcpy(a, prev, sizeof(MODEL)); + memcpy(b, next, sizeof(MODEL)); for(m=1; m<=a->L; m++) { a->A[m] /= 2.0; a->phi[m] += a->Wo*m*N; diff --git a/codec2/src/interp.h b/codec2/src/interp.h index 39a9175c..5cbfe909 100644 --- a/codec2/src/interp.h +++ b/codec2/src/interp.h @@ -29,8 +29,6 @@ #ifndef __INTERP__ #define __INTERP__ -#include "sine.h" - void interp(MODEL *prev, MODEL *next, MODEL *synth, MODEL *a, MODEL *b, int *transition); diff --git a/codec2/src/lpc.c b/codec2/src/lpc.c index b95a131d..1f006f3c 100644 --- a/codec2/src/lpc.c +++ b/codec2/src/lpc.c @@ -27,11 +27,12 @@ */ #define LPC_MAX_N 512 /* maximum no. of samples in frame */ -#define MAX_ORDER 20 /* maximum LPC order */ #define PI 3.141592654 /* mathematical constant */ -#include +#include #include +#include "defines.h" +#include "lpc.h" /*---------------------------------------------------------------------------*\ @@ -99,9 +100,9 @@ void levinson_durbin( int order /* order of the LPC analysis */ ) { - float E[MAX_ORDER+1]; - float k[MAX_ORDER+1]; - float a[MAX_ORDER+1][MAX_ORDER+1]; + float E[LPC_MAX+1]; + float k[LPC_MAX+1]; + float a[LPC_MAX+1][LPC_MAX+1]; float sum; int i,j; /* loop variables */ @@ -212,11 +213,13 @@ void find_aks( ) { float Wn[LPC_MAX_N]; /* windowed frame of Nsam speech samples */ - float R[MAX_ORDER+1]; /* order+1 autocorrelation values of Sn[] */ + float R[LPC_MAX+1]; /* order+1 autocorrelation values of Sn[] */ int i; - hanning_window(Sn,Wn,Nsam); + assert(order < LPC_MAX); + assert(Nsam < LPC_MAX_N); + hanning_window(Sn,Wn,Nsam); autocorrelate(Wn,R,Nsam,order); levinson_durbin(R,a,order); diff --git a/codec2/src/nlp.c b/codec2/src/nlp.c index c7a3ed21..11f7dfc5 100644 --- a/codec2/src/nlp.c +++ b/codec2/src/nlp.c @@ -214,11 +214,7 @@ float nlp( } } - #ifdef POST_PROCESS_MBE - best_f0 = post_process_mbe(Fw, pmin, pmax, gmax); - #else best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo); - #endif /* Shift samples in buffer to make room for new samples */ @@ -307,142 +303,3 @@ float post_process_sub_multiples(COMP Fw[], return best_f0; } -/*---------------------------------------------------------------------------*\ - - post_process_mbe() - - Use the MBE pitch estimation algorithm to evaluate pitch candidates. This - works OK but the accuracy at low F0 is affected by NW, the analysis window - size used for the DFT of the input speech Sw[]. Also favours high F0 in - the presence of background noise which causes periodic artifacts in the - synthesised speech. - -\*---------------------------------------------------------------------------*/ - -float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax) -{ - float candidate_f0; - float f0,best_f0; /* fundamental frequency */ - float e,e_min; /* MBE cost function */ - int i; - float e_hz[F0_MAX]; - int bin; - float f0_min, f0_max; - float f0_start, f0_end; - COMP Sw_[FFT_ENC]; - - f0_min = (float)SAMPLE_RATE/pmax; - f0_max = (float)SAMPLE_RATE/pmin; - - /* Now look for local maxima. Each local maxima is a candidate - that we test using the MBE pitch estimation algotithm */ - - for(i=0; i Fw[i-1].real) && (Fw[i].real > Fw[i+1].real)) { - - /* local maxima found, lets test if it's big enough */ - - if (Fw[i].real > T*gmax) { - - /* OK, sample MBE cost function over +/- 10Hz range in 2.5Hz steps */ - - candidate_f0 = (float)i*SAMPLE_RATE/(PE_FFT_SIZE*DEC); - f0_start = candidate_f0-20; - f0_end = candidate_f0+20; - if (f0_start < f0_min) f0_start = f0_min; - if (f0_end > f0_max) f0_end = f0_max; - - for(f0=f0_start; f0<=f0_end; f0+= 2.5) { - e = test_candidate_mbe(Sw, f0, Sw_); - bin = floor(f0); assert((bin > 0) && (bin < F0_MAX)); - e_hz[bin] = e; - if (e < e_min) { - e_min = e; - best_f0 = f0; - } - } - - } - } - } - dump_e(e_hz); - - return best_f0; -} - -/*---------------------------------------------------------------------------*\ - - test_candidate_mbe() - - Returns the error of the MBE cost function for the input f0. - - Note: I think a lot of the operations below can be simplified as - W[].imag = 0 and has been normalised such that den always equals 1. - -\*---------------------------------------------------------------------------*/ - -float test_candidate_mbe( - COMP Sw[], - float f0, - COMP Sw_[] /* DFT of all voiced synthesised signal for f0 */ - /* useful for debugging/dump file */ -) -{ - int i,l,al,bl,m; /* loop variables */ - COMP Am; /* amplitude sample for this band */ - int offset; /* centers Hw[] about current harmonic */ - float den; /* denominator of Am expression */ - float error; /* accumulated error between originl and synthesised */ - float Wo; /* current "test" fundamental freq. */ - int L; - - for(i=0; i +#include #include +#include #define VTHRESH 4.0 @@ -43,12 +46,13 @@ \*---------------------------------------------------------------------------*/ -void aks_to_H(model,aks,G,H, order) -MODEL *model; /* model parameters */ -float aks[]; /* LPC's */ -float G; /* energy term */ -COMP H[]; /* complex LPC spectral samples */ -int order; +void aks_to_H( + MODEL *model, /* model parameters */ + float aks[], /* LPC's */ + float G, /* energy term */ + COMP H[], /* complex LPC spectral samples */ + int order +) { COMP Pw[FFT_DEC]; /* power spectrum */ int i,m; /* loop variables */ @@ -91,139 +95,6 @@ int order; } } -/*---------------------------------------------------------------------------*\ - - phase_model_first_order() - - Models the current frames phase samples {phi[m]} as a LPC synthesis - filter excited by an impulse at time i_min with a complex gain min_Am. - The phase of the complex gain min_Am is a constant phase term, and the - impulse position i_min creates a phase term that varies linearly - with frequency. We are therefore modelling the excitation phase as - a 1st order polynomial function of mWo: - - Ex[m] = -m*Wo*i_min + arg(min_Am); - - and the harmonic phase is given by: - - phi[m] = -m*Wo*i_min + arg(min_Am) + arg[H[m]]; - - where H[m] is the LPC spectra sampled at the mWo harmonic frequencies. - - This actually works out to be a pretty good model for voiced - speech. The fit of the model (snr) is therefore used as a voicing - measure. If the snr is less than a threshold, the frame is - declared unvoiced all all the phases are randomised. - - Reference: - [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 6 - - NOTE: min_Am is a dumb name for the complex gain as {A} or A[m] is - commonly used for the spectral magnitudes. TODO: change name to match - thesis term G(mWo) (which of course clashes with LPC gain! AAAAAAHHH!). - -\*---------------------------------------------------------------------------*/ - -float phase_model_first_order( - float aks[], /* LPC coeffs for this frame */ - COMP H[], /* LPC filter freq doamin samples */ - float *n_min, /* pulse position for min error */ - COMP *minAm, /* complex gain for min error */ - int *voiced -) -{ - float G; /* LPC gain */ - int m; - - float E,Emin; /* current and minimum error so far */ - int P; /* current pitch period */ - COMP A[MAX_AMP]; /* harmonic samples */ - COMP Ex[MAX_AMP]; /* excitation samples */ - COMP A_[MAX_AMP]; /* synthesised harmonic samples */ - COMP Am; /* complex gain */ - COMP Em; /* error for m-th band */ - float den; /* energy of synthesised */ - float snr; /* snr of each excitation source */ - int Lmax; - float n; - - Lmax = model.L; - - /* Construct target vector */ - - sig = 0.0; - for(m=1; m<=Lmax; m++) { - A[m].real = model.A[m]*cos(model.phi[m]); - A[m].imag = model.A[m]*sin(model.phi[m]); - sig += model.A[m]*model.A[m]; - } - - /* Sample LPC model at harmonics */ - - //#define NO_LPC_PHASE - #ifdef NO_LPC_PHASE - /* useful for testing with Sn[] an impulse train */ - for(m=1; m<=PHASE_LPC_ORD; m++) - aks[m] = 0; - #endif - G = 1.0; - aks_to_H(&model,aks,G,H,PHASE_LPC_ORD); - - /* Now attempt to fit impulse, by trying positions 0..P-1 */ - - Emin = 1E32; - P = floor(TWO_PI/model.Wo + 0.5); - for(n=0; nreal = Am.real; - minAm->imag = Am.imag; - } - - } - - snr = 10.0*log10(sig/Emin); - if (snr > VTHRESH) - *voiced = 1; - else - *voiced = 0; - - return snr; -} /*---------------------------------------------------------------------------*\ @@ -317,32 +188,36 @@ float phase_model_first_order( \*---------------------------------------------------------------------------*/ void phase_synth_zero_order( - float snr, /* SNR from first order model */ - COMP H[], /* LPC spectra samples */ - float *ex_phase,/* excitation phase of fundamental */ - int voiced + MODEL *model, + float aks[], + int voiced, + float *ex_phase /* excitation phase of fundamental */ ) { - int m,i; + int m; float new_phi; COMP Ex[MAX_AMP]; /* excitation samples */ COMP A_[MAX_AMP]; /* synthesised harmonic samples */ - float maxA; + COMP H[MAX_AMP]; /* LPC freq domain samples */ + float G; float jitter; + G = 1.0; + aks_to_H(model,aks,G,H,PHASE_LPC); + /* Update excitation fundamental phase track, this sets the position of each pitch pulse during voiced speech. After much experiment I found that using just this frame Wo improved quality for UV sounds compared to interpolating two frames Wo like this: - ex_phase[0] += (*prev_Wo+model.Wo)*N/2; + ex_phase[0] += (*prev_Wo+mode->Wo)*N/2; */ - ex_phase[0] += (model.Wo)*N; + ex_phase[0] += (model->Wo)*N; ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5); - for(m=1; m<=model.L; m++) { + for(m=1; m<=model->L; m++) { /* generate excitation */ @@ -352,8 +227,8 @@ void phase_synth_zero_order( of each harmonic over at +/- 0.25 of a sample. */ jitter = 0.25*(1.0 - 2.0*rand()/RAND_MAX); - Ex[m].real = cos(ex_phase[0]*m - jitter*model.Wo*m); - Ex[m].imag = sin(ex_phase[0]*m - jitter*model.Wo*m); + Ex[m].real = cos(ex_phase[0]*m - jitter*model->Wo*m); + Ex[m].imag = sin(ex_phase[0]*m - jitter*model->Wo*m); } else { @@ -374,66 +249,7 @@ void phase_synth_zero_order( /* modify sinusoidal phase */ new_phi = atan2(A_[m].imag, A_[m].real+1E-12); - model.phi[m] = new_phi; + model->phi[m] = new_phi; } } - -/*---------------------------------------------------------------------------*\ - - phase_synth_first_order() - - Synthesises phases based on SNR and the first oreder phase model - parameters. - -\*---------------------------------------------------------------------------*/ - -void phase_synth_first_order( - float snr, /* SNR from first order model */ - COMP H[], /* LPC spectra samples */ - float n_min, /* best pulse position */ - COMP minAm, /* best complex gain */ - int voiced -) -{ - int Lrand; - int m; - float new_phi; - COMP Ex[MAX_AMP]; /* excitation samples */ - COMP A_[MAX_AMP]; /* synthesised harmonic samples */ - COMP Tm; - - /* now modify sinusoidal model phase using phase model */ - - for(m=1; m<=model.L; m++) { - - /* generate excitation */ - - if (voiced) { - Ex[m].real = cos(model.Wo*m*n_min); - Ex[m].imag = sin(-model.Wo*m*n_min); - } - else { - float phi = TWO_PI*(float)rand()/RAND_MAX; - Ex[m].real = cos(phi); - Ex[m].imag = sin(phi); - } - - /* filter using LPC filter */ - - A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag; - A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag; - - /* scale by complex gain (could have done this earlier at Ex[] - stage) */ - - Tm.real = A_[m].real*minAm.real - A_[m].imag*minAm.imag; - Tm.imag = A_[m].imag*minAm.real + A_[m].real*minAm.imag; - - /* modify sinusoidal phase */ - - new_phi = atan2(Tm.imag, Tm.real+1E-12); - model.phi[m] = new_phi; - } -} - diff --git a/codec2/src/phase.h b/codec2/src/phase.h index e6b4e8c3..9ce2d057 100644 --- a/codec2/src/phase.h +++ b/codec2/src/phase.h @@ -29,13 +29,6 @@ #ifndef __PHASE__ #define __PHASE__ -#define PHASE_LPC_ORD 10 - -void aks_to_H(MODEL *model, float aks[], float G, COMP H[], int order); -float phase_model_first_order(float aks[], COMP H[], float *n_min, COMP *min_Am, - int *voiced); -void phase_synth_zero_order(float snr, COMP H[], float *ex_phase, int voiced); -void phase_synth_first_order(float snr, COMP H[], float n_min, COMP min_Am, - int voiced); +void phase_synth_zero_order(MODEL *model, float aks[], int voiced, float *ex_phase); #endif diff --git a/codec2/src/postfilter.c b/codec2/src/postfilter.c index 6ddfceb0..a2c36057 100644 --- a/codec2/src/postfilter.c +++ b/codec2/src/postfilter.c @@ -31,8 +31,10 @@ #include #include #include -#include "postfilter.h" + +#include "defines.h" #include "dump.h" +#include "postfilter.h" /*---------------------------------------------------------------------------*\ @@ -70,7 +72,7 @@ This idea is rather experimental. Some potential problems that may happen: - 1/ If someone says "aaaaaaaahhhhhhhhh" willl background estimator track + 1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track up to speech level? This would be a bad thing. 2/ If background noise suddenly dissapears from the source speech does diff --git a/codec2/src/postfilter.h b/codec2/src/postfilter.h index 9f7555c9..1d5c4e81 100644 --- a/codec2/src/postfilter.h +++ b/codec2/src/postfilter.h @@ -29,8 +29,6 @@ #ifndef __POSTFILTER__ #define __POSTFILTER__ -#include "sine.h" - void postfilter(MODEL *model, int voiced, float *bg_est); #endif diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c index 15b157d9..a6fa4f74 100644 --- a/codec2/src/quantise.c +++ b/codec2/src/quantise.c @@ -26,13 +26,17 @@ #include #include -#include "sine.h" +#include +#include +#include + +#include "defines.h" +#include "dump.h" #include "quantise.h" #include "lpc.h" #include "lsp.h" -#include "dump.h" +#include "four1.h" -#define MAX_ORDER 20 #define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */ #define MAX_CB 20 /* max number of codebooks */ @@ -120,8 +124,8 @@ void lsp_quantise( ) { int i; - float dlsp[MAX_ORDER]; - float dlsp_[MAX_ORDER]; + float dlsp[LPC_MAX]; + float dlsp_[LPC_MAX]; dlsp[0] = lsp[0]; for(i=1; i +#include +#include + +#include "defines.h" +#include "sine.h" +#include "four1.h" + +/*---------------------------------------------------------------------------*\ + + HEADERS + +\*---------------------------------------------------------------------------*/ + +void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep); + +/*---------------------------------------------------------------------------*\ + + FUNCTIONS + +\*---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_analysis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the time domain analysis window and it's DFT. + +\*---------------------------------------------------------------------------*/ + +void make_analysis_window(float w[],COMP W[]) +{ + float m; + COMP temp; + int i,j; + + /* + Generate Hamming window centered on M-sample pitch analysis window + + 0 M/2 M-1 + |-------------|-------------| + |-------|-------| + NW samples + + All our analysis/synthsis is centred on the M/2 sample. + */ + + m = 0.0; + for(i=0; iWo + 5; + pmin = TWO_PI/model->Wo - 5; + pstep = 1.0; + hs_pitch_refinement(model,Sw,pmin,pmax,pstep); + + /* Fine refinement */ + + pmax = TWO_PI/model->Wo + 1; + pmin = TWO_PI/model->Wo - 1; + pstep = 0.25; + hs_pitch_refinement(model,Sw,pmin,pmax,pstep); + + /* Limit range */ + + if (model->Wo < TWO_PI/P_MAX) + model->Wo = TWO_PI/P_MAX; + if (model->Wo > TWO_PI/P_MIN) + model->Wo = TWO_PI/P_MIN; + + model->L = floor(PI/model->Wo); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: hs_pitch_refinement + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Harmonic sum pitch refinement function. + + pmin pitch search range minimum + pmax pitch search range maximum + step pitch search step size + model current pitch estimate in model.Wo + + model refined pitch estimate in model.Wo + +\*---------------------------------------------------------------------------*/ + +void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep) +{ + int m; /* loop variable */ + int b; /* bin for current harmonic centre */ + float E; /* energy for current pitch*/ + float Wo; /* current "test" fundamental freq. */ + float Wom; /* Wo that maximises E */ + float Em; /* mamimum energy */ + float r; /* number of rads/bin */ + float p; /* current pitch */ + + /* Initialisation */ + + model->L = PI/model->Wo; /* use initial pitch est. for L */ + Em = 0.0; + r = TWO_PI/FFT_ENC; + + /* Determine harmonic sum for a range of Wo values */ + + for(p=pmin; p<=pmax; p+=pstep) { + E = 0.0; + Wo = TWO_PI/p; + + /* Sum harmonic magnitudes */ + + for(m=1; m<=model->L; m++) { + b = floor(m*Wo/r + 0.5); + E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; + } + + /* Compare to see if this is a maximum */ + + if (E > Em) { + Em = E; + Wom = Wo; + } + } + + model->Wo = Wom; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: estimate_amplitudes + AUTHOR......: David Rowe + DATE CREATED: 27/5/94 + + Estimates the complex amplitudes of the harmonics. + +\*---------------------------------------------------------------------------*/ + +void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]) +{ + int i,m; /* loop variables */ + int am,bm; /* bounds of current harmonic */ + int b; /* DFT bin of centre of current harmonic */ + float den; /* denominator of amplitude expression */ + float r; /* number of rads/bin */ + int offset; + COMP Am; + + r = TWO_PI/FFT_ENC; + + for(m=1; m<=model->L; m++) { + den = 0.0; + am = floor((m - 0.5)*model->Wo/r + 0.5); + bm = floor((m + 0.5)*model->Wo/r + 0.5); + b = floor(m*model->Wo/r + 0.5); + + /* Estimate ampltude of harmonic */ + + den = 0.0; + Am.real = Am.imag = 0.0; + for(i=am; iWo/r + 0.5); + Am.real += Sw[i].real*W[offset].real; + Am.imag += Sw[i].imag*W[offset].real; + } + + model->A[m] = sqrt(den); + + /* Estimate phase of harmonic */ + + model->phi[m] = atan2(Sw[b].imag,Sw[b].real); + } +} + +/*---------------------------------------------------------------------------*\ + + est_voicing_mbe() + + Returns the error of the MBE cost function for a fiven F0. + + Note: I think a lot of the operations below can be simplified as + W[].imag = 0 and has been normalised such that den always equals 1. + +\*---------------------------------------------------------------------------*/ + +float est_voicing_mbe( + MODEL *model, + COMP Sw[], + COMP W[], + float f0, + COMP Sw_[], /* DFT of all voiced synthesised signal for f0 */ + /* useful for debugging/dump file */ + int *voiced +) +{ + int i,l,al,bl,m; /* loop variables */ + COMP Am; /* amplitude sample for this band */ + int offset; /* centers Hw[] about current harmonic */ + float den; /* denominator of Am expression */ + float error; /* accumulated error between originl and synthesised */ + float Wo; /* current "test" fundamental freq. */ + int L; + float sig, snr; + + sig = 0.0; + for(l=1; l<=model->L/4; l++) { + sig += model->A[l]*model->A[l]; + } + + for(i=0; i V_THRESH) + *voiced = 1; + else + *voiced = 0; + + return snr; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: make_synthesis_window + AUTHOR......: David Rowe + DATE CREATED: 11/5/94 + + Init function that generates the trapezoidal (Parzen) sythesis window. + +\*---------------------------------------------------------------------------*/ + +void make_synthesis_window(float Pn[]) +{ + int i; + float win; + + /* Generate Parzen window in time domain */ + + win = 0.0; + for(i=0; iL; l++) { + b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + Sw_[b].real = model->A[l]*cos(model->phi[l]); + Sw_[b].imag = model->A[l]*sin(model->phi[l]); + Sw_[FFT_DEC-b].real = Sw_[b].real; + Sw_[FFT_DEC-b].imag = -Sw_[b].imag; + } + + /* Perform inverse DFT */ + + four1(&Sw_[-1].imag,FFT_DEC,1); + + /* Overlap add to previous samples */ + + for(i=0; i -#include -#include - -#include "defines.h" /* defines for sinusoidal coder */ -#include "globals.h" /* external globals */ - -/*---------------------------------------------------------------------------*\ - - FUNCTIONS - -\*---------------------------------------------------------------------------*/ - -/* functions in refine.c */ - -void dft_speech(); -void two_stage_pitch_refinement(); -void hs_pitch_refinement(float pmin, float pmax, float pstep); - -/* functions in spec.c */ - -void estimate_amplitudes(); -void estimate_voicing(); -void estimate_voicing_av(); -float voicing(int lower, int upper); - -/* functions in four1.c */ - -void four1(); - -/* functions in synth.c */ - -void synthesise_mixed(); - -/* functions in initenc.c and initdec.c */ - -void init_encoder(void); -float make_window(); -void init_decoder(); - -/* functions in gasdev.c */ +#ifndef __SINE__ +#define __SINE__ -float gasdev(); +void make_analysis_window(float w[], COMP W[]); +void dft_speech(COMP Sw[], float Sn[], float w[]); +void two_stage_pitch_refinement(MODEL *model, COMP Sw[]); +void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]); +float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], float f0, COMP Sw_[], int *voiced); +void make_synthesis_window(float Pn[]); +void synthesise(float Sn_[], MODEL *model, float Pn[], int shift); +#endif diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c index e8988cb5..ceb49e5b 100644 --- a/codec2/src/sinedec.c +++ b/codec2/src/sinedec.c @@ -27,16 +27,19 @@ */ #include +#include +#include #include +#include + +#include "defines.h" #include "sine.h" -#include "quantise.h" #include "dump.h" -#include "phase.h" #include "lpc.h" -#include "synth.h" +#include "quantise.h" +#include "phase.h" #include "postfilter.h" #include "interp.h" -#include "nlp.h" /*---------------------------------------------------------------------------*\ @@ -71,11 +74,19 @@ register char *argv[]; /* array of command line arguments in string form */ int main(int argc, char *argv[]) { FILE *fmodel; /* file of model parameters from encoder */ - FILE *fout; /* output speech file */ - FILE *fin; /* input speech file */ - short buf[N]; /* input/output buffer */ - int i; /* loop variable */ - int length; /* number of frames so far */ + FILE *fout; /* output speech file */ + FILE *fin; /* input speech file */ + short buf[N]; /* input/output buffer */ + float Sn[M]; /* float input speech samples */ + COMP Sw[FFT_ENC]; /* DFT of w[] */ + float w[M]; /* time domain hamming window */ + COMP W[FFT_ENC]; /* DFT of w[] */ + MODEL model; + float Pn[2*N]; /* trapezoidal synthesis window */ + float Sn_[2*N]; /* synthesised speech */ + int i; /* loop variable */ + int frames; + int length; /* number of frames so far */ char out_file[MAX_STR]; int arg; @@ -84,11 +95,11 @@ int main(int argc, char *argv[]) int lpc_model, order; int lsp, lsp_quantiser; - float ak[LPC_MAX_ORDER+1]; + float ak[LPC_MAX+1]; int dump; - int phase, phase_model; + int phase0; float ex_phase[MAX_AMP+1]; int voiced, voiced_1, voiced_2; @@ -101,6 +112,9 @@ int main(int argc, char *argv[]) MODEL model_1, model_2, model_3, model_synth, model_a, model_b; int transition, decimate; + for(i=0; i<2*N; i++) + Sn_[i] = 0; + voiced_1 = voiced_2 = 0; model_1.Wo = TWO_PI/P_MIN; model_1.L = floor(PI/model_1.Wo); @@ -176,12 +190,8 @@ int main(int argc, char *argv[]) lsp = switch_present("--lsp",argc,argv); lsp_quantiser = 0; - /* phase_model 0: zero phase - phase_model 1: 1st order polynomial */ - phase = switch_present("--phase",argc,argv); - if (phase) { - phase_model = atoi(argv[phase+1]); - assert((phase_model == 0) || (phase_model == 1)); + phase0 = switch_present("--phase0",argc,argv); + if (phase0) { ex_phase[0] = 0; } @@ -199,10 +209,8 @@ int main(int argc, char *argv[]) /* Initialise ------------------------------------------------------------*/ - init_decoder(); - init_encoder(); - Nw = 220; - make_window(Nw); + make_analysis_window(w,W); + make_synthesis_window(Pn); quantise_init(); /* Main loop ------------------------------------------------------------*/ @@ -220,23 +228,19 @@ int main(int argc, char *argv[]) for(i=0; i f0_max) f0_end = f0_max; - e_min = 1E32; - - for(f0=f0_start; f0<=f0_end; f0+= 1) { - e = test_candidate_mbe(Sw, f0, Sw_); - if (e < e_min) { - e_min = e; - best_f0 = f0; - } - } - noise = test_candidate_mbe(Sw,best_f0, Sw_); - snr = sig/noise; - dump_Sw_(Sw_); - } - - phase_model_first_order(ak_phase, H, &n_min, &min_Am, &voiced); - if (snr > 4.0) - voiced = 1; - else - voiced = 0; + assert(order == PHASE_LPC); + + dump_ak(ak_phase, PHASE_LPC); + /* determine voicing */ + snr = est_voicing_mbe(&model, Sw, W, (FS/TWO_PI)*model.Wo, Sw_, &voiced); + dump_Sw_(Sw_); dump_snr(snr); - if (phase_model == 0) { - /* just to make sure we are not cheating - kill all phases */ - for(i=0; i 2.0; - } - phase_synth_zero_order(voiced, H, ex_phase, voiced); - } - if (phase_model == 1) { - phase_synth_first_order(voiced, H, n_min, min_Am, voiced); - } + /* just to make sure we are not cheating - kill all phases */ - if (postfilt) + for(i=0; i 2.0; + } + phase_synth_zero_order(&model, ak_phase, voiced, ex_phase); + + if (postfilt) postfilter(&model, voiced, &bg_est); - - //dump_phase_(&model.phi[0]); } /* optional LPC model amplitudes */ if (lpc_model) { - snr = lpc_model_amplitudes(Sn, &model, order, lsp, ak); + snr = lpc_model_amplitudes(Sn, w, &model, order, lsp, ak); sum_snr += snr; dump_quantised_model(&model); } @@ -370,11 +329,11 @@ int main(int argc, char *argv[]) if (fout != NULL) { if (transition) { - synthesise_mixed(Pn,&model_a,Sn_,1); - synthesise_mixed(Pn,&model_b,Sn_,0); + synthesise(Sn_,&model_a,Pn,1); + synthesise(Sn_,&model_b,Pn,0); } else { - synthesise_mixed(Pn,&model,Sn_,1); + synthesise(Sn_,&model,Pn,1); } /* Save output speech to disk */ diff --git a/codec2/src/sinenc.c b/codec2/src/sinenc.c index 70f098da..5f75be3c 100644 --- a/codec2/src/sinenc.c +++ b/codec2/src/sinenc.c @@ -26,9 +26,12 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "sine.h" -#include "dump.h" +#include +#include #include +#include "defines.h" +#include "dump.h" +#include "sine.h" /*---------------------------------------------------------------------------*\ @@ -62,17 +65,22 @@ int switch_present(sw,argc,argv) int main(int argc, char *argv[]) { - FILE *fin; /* input speech sample file */ - FILE *fmodel; /* output file of model parameters */ - FILE *fp; /* input text file containing pitch estimates */ - short buf[N]; /* input speech sample buffer */ - int length; /* number of frames to process */ - float pitch; /* current pitch estimate from external pitch file */ - int i; /* loop variable */ - FILE *fref; /* optional output file with refined pitch estimate */ - int arg; - int dump; - int frames; + FILE *fin; /* input speech sample file */ + FILE *fmodel; /* output file of model parameters */ + FILE *fp; /* input text file containing pitch estimates */ + short buf[N]; /* input speech sample buffer */ + float Sn[M]; /* float input speech samples */ + COMP Sw[FFT_ENC]; /* DFT of Sn[] */ + float w[M]; /* time domain hamming window */ + COMP W[FFT_ENC]; /* DFT of w[] */ + MODEL model; + int length; /* number of frames to process */ + float pitch; /* current pitch estimate from external pitch file */ + int i; /* loop variable */ + FILE *fref; /* optional output file with refined pitch estimate */ + int arg; + int dump; + int frames; if (argc < 5) { printf("usage: sinenc InputFile ModelFile Frames PitchFile\n"); @@ -102,7 +110,7 @@ int main(int argc, char *argv[]) if (dump) dump_on(argv[dump+1]); - if ((arg == switch_present("--ref",argc,argv))) { + if ((arg = switch_present("--ref",argc,argv))) { if ((fref = fopen(argv[arg+1],"wt")) == NULL) { printf("Error opening output pitch refinement file: %s\n",argv[5]); exit(1); @@ -111,9 +119,14 @@ int main(int argc, char *argv[]) else fref = NULL; - init_encoder(); - make_window(NW); + /* Initialise sample buffer memories to stop divide by zero errors + and zero energy frames at the start of simulation */ + + for(i=0; iL; l++) { - b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); - Sw_[b].real = model->A[l]*cos(model->phi[l]); - Sw_[b].imag = model->A[l]*sin(model->phi[l]); - Sw_[FFT_DEC-b].real = Sw_[b].real; - Sw_[FFT_DEC-b].imag = -Sw_[b].imag; - } + for(l=1; l<=model->L; l++) { + b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + Sw_[b].real = model->A[l]*cos(model->phi[l]); + Sw_[b].imag = model->A[l]*sin(model->phi[l]); + Sw_[FFT_DEC-b].real = Sw_[b].real; + Sw_[FFT_DEC-b].imag = -Sw_[b].imag; + } - /* Perform inverse DFT */ + /* Perform inverse DFT */ - four1(&Sw_[-1].imag,FFT_DEC,1); + four1(&Sw_[-1].imag,FFT_DEC,1); - /* Overlap add to previous samples */ + /* Overlap add to previous samples */ - for(i=0; iWo) > 0.2*model->Wo)) { - //printf("disc voiced = %d\n", voiced); - //printf("%f %f\n", fabs(*Wo_prev - model->Wo), 0.2*model->Wo); - /* discontinous phase tracks: no phase adjustment of frequency - as we want discontinuous phase tracks */ - - for(l=1; l<=model->L; l++) - b[l] = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); - } - else { - /* continous phase tracks: determine frequency of each harmonic - to ensure smooth phase track at the centre of next synthesis - frame */ - - for(l=1; l<=model->L; l++) { - //printf("Wo_prev = %f Wo = %f\n", *Wo_prev, model->Wo); - delta_w = (model->phi[l] - l*N*(*Wo_prev + model->Wo)/2.0 - phi_prev[l]); - delta_w -= TWO_PI*floor(delta_w/TWO_PI + 0.5); - delta_w /= N; - b[l] = floor((l*model->Wo+delta_w)*FFT_DEC/TWO_PI + 0.5); - //printf("delta_w = %f b[%d] = %d\n", delta_w,l,b[l]); - } - } - - /* update memories for phase tracking */ - - *Wo_prev = model->Wo; - for(l=1; l<=model->L; l++) - phi_prev[l] = model->phi[l]; - - /* Now set up frequency domain synthesised speech */ - - for(l=1; l<=model->L; l++) { - Sw_[b[l]].real = model->A[l]*cos(model->phi[l]); - Sw_[b[l]].imag = model->A[l]*sin(model->phi[l]); - Sw_[FFT_DEC-b[l]].real = Sw_[b[l]].real; - Sw_[FFT_DEC-b[l]].imag = -Sw_[b[l]].imag; - } - - /* Perform inverse DFT */ - - four1(&Sw_[-1].imag,FFT_DEC,1); - - /* Overlap add to previous samples */ - - for(i=0; i +#include "defines.h" +#include "window.h" + +float make_window( + float w[], /* time domain analysis window */ + COMP W[] /* w[] in frequency domain */ +) +{ + float m; + COMP temp; + int i,j; + + /* + Generate Hamming window centered on M-sample pitch analysis window + + 0 M/2 M-1 + |-------------|-------------| + |-------|-------| + NW samples + + All our analysis/synthsis is centred on the M/2 sample. + */ + + m = 0.0; + for(i=0; i