From 6b4201f17a97ec76cf0a4b022959390c8c08a489 Mon Sep 17 00:00:00 2001 From: drowe67 Date: Tue, 21 May 2013 11:05:04 +0000 Subject: [PATCH] profiling code and mods for faster execution as part of stm32f4 port git-svn-id: https://svn.code.sf.net/p/freetel/code@1255 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/src/c2demo.c | 16 ++- codec2-dev/src/c2sim.c | 16 ++- codec2-dev/src/codec2.c | 55 +++++++-- codec2-dev/src/dump.c | 50 +++++--- codec2-dev/src/dump.h | 4 + codec2-dev/src/machdep.h | 51 ++++++++ codec2-dev/src/nlp.c | 62 ++++++++-- codec2-dev/src/nlp.h | 4 +- codec2-dev/src/phase.c | 62 ++++------ codec2-dev/src/postfilter.c | 12 +- codec2-dev/src/quantise.c | 238 ++++++++++++------------------------ codec2-dev/src/sine.c | 80 ++++++------ codec2-dev/src/sine.h | 5 +- 13 files changed, 364 insertions(+), 291 deletions(-) create mode 100644 codec2-dev/src/machdep.h diff --git a/codec2-dev/src/c2demo.c b/codec2-dev/src/c2demo.c index 3b674140..042ec62a 100644 --- a/codec2-dev/src/c2demo.c +++ b/codec2-dev/src/c2demo.c @@ -32,14 +32,13 @@ */ #include "codec2.h" +#include "sine.h" #include #include #include #include -#define BITS_SIZE ((CODEC2_BITS_PER_FRAME + 7) / 8) - int main(int argc, char *argv[]) { struct CODEC2 *codec2; @@ -47,7 +46,12 @@ int main(int argc, char *argv[]) FILE *fout; short *buf; unsigned char *bits; - int nsam, nbit; + int nsam, nbit, i, r; + + for(i=0; i<10; i++) { + r = codec2_rand(); + printf("[%d] r = %d\n", i, r); + } if (argc != 3) { printf("usage: %s InputRawSpeechFile OutputRawSpeechFile\n", argv[0]); @@ -66,10 +70,14 @@ int main(int argc, char *argv[]) exit(1); } + #ifdef DUMP + dump_on("c2demo"); + #endif + /* Note only one set of Codec 2 states is required for an encoder and decoder pair. */ - codec2 = codec2_create(CODEC2_MODE_1400); + codec2 = codec2_create(CODEC2_MODE_1300); nsam = codec2_samples_per_frame(codec2); buf = (short*)malloc(nsam*sizeof(short)); nbit = codec2_bits_per_frame(codec2); diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index d47f0bbd..92f13878 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -140,10 +140,14 @@ int main(int argc, char *argv[]) { "lspd", no_argument, &lspd, 1 }, { "lspvq", no_argument, &lspvq, 1 }, { "lspres", no_argument, &lspres, 1 }, + #ifdef __EXPERIMENTAL__ { "lspdt", no_argument, &lspdt, 1 }, { "lspdt_mode", required_argument, NULL, 0 }, + #endif { "lspjvm", no_argument, &lspjvm, 1 }, + #ifdef __EXPERIMENTAL__ { "lspanssi", no_argument, &lspanssi, 1 }, + #endif { "phase0", no_argument, &phase0, 1 }, { "phaseexp", required_argument, &phaseexp, 1 }, { "ampexp", required_argument, &exp, 1 }, @@ -194,7 +198,7 @@ int main(int argc, char *argv[]) e = prev_e = 1; hpf_states[0] = hpf_states[1] = 0.0; - nlp_states = nlp_create(); + nlp_states = nlp_create(M); if (argc < 2) { print_help(long_options, num_opts, argv); @@ -380,12 +384,12 @@ int main(int argc, char *argv[]) \*------------------------------------------------------------*/ - nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo); + nlp(nlp_states,Sn,N,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo); model.Wo = TWO_PI/pitch; dft_speech(fft_fwd_cfg, Sw, Sn, w); two_stage_pitch_refinement(&model, Sw); - estimate_amplitudes(&model, Sw, W); + estimate_amplitudes(&model, Sw, W, 1); uq_Wo = model.Wo; #ifdef DUMP @@ -525,11 +529,13 @@ int main(int argc, char *argv[]) lsp_to_lpc(lsps_, ak, LPC_ORD); } +#ifdef __EXPERIMENTAL__ if (lspvq) { lspvq_quantise(lsps, lsps_, LPC_ORD); bw_expand_lsps(lsps_, LPC_ORD); lsp_to_lpc(lsps_, ak, LPC_ORD); } +#endif if (lspjvm) { /* Jean-Marc's multi-stage, split VQ */ @@ -542,6 +548,7 @@ int main(int argc, char *argv[]) } } +#ifdef __EXPERIMENTAL__ if (lspanssi) { /* multi-stage VQ from Anssi Ramo OH3GDD */ @@ -549,6 +556,7 @@ int main(int argc, char *argv[]) bw_expand_lsps(lsps_, LPC_ORD); lsp_to_lpc(lsps_, ak, LPC_ORD); } +#endif /* experimenting with non-linear LSP spacing to see if it's just noticable */ @@ -607,6 +615,7 @@ int main(int argc, char *argv[]) /* Odd frames are generated by quantising the difference between the previous frames LSPs and this frames */ +#ifdef __EXPERIMENTAL__ if (lspdt && !decimate) { if (frames%2) { lspdt_quantise(lsps, lsps_, lsps__prev, lspdt_mode); @@ -616,6 +625,7 @@ int main(int argc, char *argv[]) for(i=0; iprev_e_dec = 1; - c2->nlp = nlp_create(); + c2->nlp = nlp_create(M); if (c2->nlp == NULL) { free (c2); return NULL; @@ -335,7 +336,6 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) for(i=0; iSn, c2->w, LPC_ORD); e_index = encode_energy(e); pack(bits, &nbit, e_index, E_BITS); @@ -1003,6 +1009,9 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) for(i=0; iprev_lsps_dec, &lsps[3][0], weight); interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); @@ -1082,6 +1093,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma); apply_lpc_correction(&model[i]); } + TIMER_SAMPLE_AND_LOG2(recover_start, " recover"); /* synthesise ------------------------------------------------*/ @@ -1288,10 +1300,26 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, float ak[]) { int i; + TIMER_VAR(phase_start, pf_start, synth_start); + + #ifdef DUMP + dump_quantised_model(model); + #endif + + TIMER_SAMPLE(phase_start); phase_synth_zero_order(c2->fft_fwd_cfg, model, ak, &c2->ex_phase, LPC_ORD); + + TIMER_SAMPLE_AND_LOG(pf_start,phase_start, " phase_synth"); + postfilter(model, &c2->bg_est); + + TIMER_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter"); + synthesise(c2->fft_inv_cfg, c2->Sn_, model, c2->Pn, 1); + + TIMER_SAMPLE_AND_LOG2(synth_start, " synth"); + ear_protection(c2->Sn_, N); for(i=0; iSn[i+M-N] = speech[i]; + TIMER_SAMPLE(dft_start); dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); + TIMER_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech"); /* Estimate pitch */ - nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc); + nlp(c2->nlp,c2->Sn,N,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc); + TIMER_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); + model->Wo = TWO_PI/pitch; model->L = PI/model->Wo; /* estimate model parameters */ two_stage_pitch_refinement(model, Sw); - estimate_amplitudes(model, Sw, c2->W); - snr = est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc); - //fprintf(stderr,"snr %3.2f v: %d Wo: %f prev_Wo: %f\n", - // snr, model->voiced, model->Wo, c2->prev_Wo_enc); + TIMER_SAMPLE_AND_LOG(two_stage, model_start, " two_stage"); + estimate_amplitudes(model, Sw, c2->W, 0); + TIMER_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); + est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc); c2->prev_Wo_enc = model->Wo; + TIMER_SAMPLE_AND_LOG2(estamps, " est_voicing"); + #ifdef DUMP + dump_model(model); + #endif } /*---------------------------------------------------------------------------*\ diff --git a/codec2-dev/src/dump.c b/codec2-dev/src/dump.c index b414c794..db4b1cc9 100644 --- a/codec2-dev/src/dump.c +++ b/codec2-dev/src/dump.c @@ -32,6 +32,13 @@ #include #include +#ifdef __EMBEDDED__ +#include "gdb_stdio.h" +#define fprintf gdb_stdio_fprintf +#define fopen gdb_stdio_fopen +#define fclose gdb_stdio_fclose +#endif + #ifdef DUMP static int dumpon = 0; @@ -204,6 +211,7 @@ void dump_Ew(COMP Ew[]) { void dump_model(MODEL *model) { int l; char s[MAX_STR]; + char line[2048]; if (!dumpon) return; @@ -213,18 +221,25 @@ void dump_model(MODEL *model) { assert(fmodel != NULL); } - fprintf(fmodel,"%f\t%d\t", model->Wo, model->L); - for(l=1; l<=model->L; l++) - fprintf(fmodel,"%f\t",model->A[l]); - for(l=model->L+1; lvoiced); - fprintf(fmodel,"\n"); + sprintf(line,"%12f %12d ", model->Wo, model->L); + for(l=1; l<=model->L; l++) { + sprintf(s,"%12f ",model->A[l]); + strcat(line, s); + } + for(l=model->L+1; l<=MAX_AMP; l++) { + sprintf(s,"%12f ", 0.0); + strcat(line,s); + } + + sprintf(s,"%d\n",model->voiced); + strcat(line,s); + fprintf(fmodel,"%s",line); } void dump_quantised_model(MODEL *model) { int l; char s[MAX_STR]; + char line[2048]; if (!dumpon) return; @@ -234,12 +249,19 @@ void dump_quantised_model(MODEL *model) { assert(fqmodel != NULL); } - fprintf(fqmodel,"%f\t%d\t", model->Wo, model->L); - for(l=1; l<=model->L; l++) - fprintf(fqmodel,"%f\t",model->A[l]); - for(l=model->L+1; lWo, model->L); + for(l=1; l<=model->L; l++) { + sprintf(s,"%12f ",model->A[l]); + strcat(line, s); + } + for(l=model->L+1; l<=MAX_AMP; l++) { + sprintf(s,"%12f ", 0.0); + strcat(line, s); + } + + sprintf(s,"%d\n",model->voiced); + strcat(line, s); + fprintf(fqmodel, "%s", line); } void dump_phase(float phase[], int L) { @@ -256,7 +278,7 @@ void dump_phase(float phase[], int L) { for(l=1; l<=L; l++) fprintf(fphase,"%f\t",phase[l]); - for(l=L+1; l. +*/ + +#ifndef __MACHDEP__ +#define __MACHDEP__ + +#ifdef TIMER +#define TIMER_VAR(...) unsigned int __VA_ARGS__ +#define TIMER_SAMPLE(timestamp) timestamp = machdep_timer_sample() +#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label) \ + timestamp = machdep_timer_sample_and_log(prev_timestamp, label) +#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label) \ + machdep_timer_sample_and_log(prev_timestamp, label) +#else +#define TIMER_VAR(...) +#define TIMER_SAMPLE(timestamp) +#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label) +#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label) +#endif + +void machdep_timer_init(void); +void machdep_timer_reset(void); +unsigned int machdep_timer_sample(void); +unsigned int machdep_timer_sample_and_log(unsigned int start, char s[]); +void machdep_timer_print_logged_samples(void); + +#endif diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c index 3214578e..14c5600a 100644 --- a/codec2-dev/src/nlp.c +++ b/codec2-dev/src/nlp.c @@ -29,6 +29,8 @@ #include "nlp.h" #include "dump.h" #include "kiss_fft.h" +#undef TIMER +#include "machdep.h" #include #include @@ -51,6 +53,8 @@ #define CNLP 0.3 /* post processor constant */ #define NLP_NTAP 48 /* Decimation LPF order */ +#undef DUMP + /*---------------------------------------------------------------------------*\ GLOBALS @@ -111,6 +115,8 @@ const float nlp_fir[] = { }; typedef struct { + int m; + float w[PMAX_M/DEC]; /* DFT window */ float sq[PMAX_M]; /* squared speech samples */ float mem_x,mem_y; /* memory for notch filter */ float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */ @@ -131,15 +137,24 @@ float post_process_sub_multiples(COMP Fw[], \*---------------------------------------------------------------------------*/ -void *nlp_create() +void *nlp_create( +int m /* analysis window size */ +) { NLP *nlp; int i; + assert(m <= PMAX_M); + nlp = (NLP*)malloc(sizeof(NLP)); if (nlp == NULL) return NULL; + nlp->m = m; + for(i=0; iw[i] = 0.5 - 0.5*cos(2*PI*i/(m/DEC-1)); + } + for(i=0; isq[i] = 0.0; nlp->mem_x = 0.0; @@ -205,7 +220,6 @@ float nlp( void *nlp_state, float Sn[], /* input speech vector */ int n, /* frames shift (no. new samples in Sn[]) */ - int m, /* analysis window size */ int pmin, /* minimum pitch value */ int pmax, /* maximum pitch value */ float *pitch, /* estimated pitch period in samples */ @@ -220,12 +234,15 @@ float nlp( COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (output) */ float gmax; int gmax_bin; - int i,j; - float best_f0; - + int m, i,j; + float best_f0; + TIMER_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem); + assert(nlp_state != NULL); - assert(m <= PMAX_M); nlp = (NLP*)nlp_state; + m = nlp->m; + + TIMER_SAMPLE(start); /* Square, notch filter at DC, and LP filter vector */ @@ -247,6 +264,8 @@ float nlp( exactly sure why. */ } + TIMER_SAMPLE_AND_LOG(tnotch, start, " square and notch"); + for(i=m-n; isq[i] += nlp->mem_fir[j]*nlp_fir[j]; } + TIMER_SAMPLE_AND_LOG(filter, tnotch, " filter"); + /* Decimate and DFT */ for(i=0; isq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1))); + fw[i].real = nlp->sq[i*DEC]*nlp->w[i]; } + TIMER_SAMPLE_AND_LOG(window, filter, " window"); #ifdef DUMP dump_dec(Fw); #endif kiss_fft(nlp->fft_cfg, (kiss_fft_cpx *)fw, (kiss_fft_cpx *)Fw); + TIMER_SAMPLE_AND_LOG(fft, window, " fft"); + for(i=0; isq); dump_Fw(Fw); @@ -291,6 +316,8 @@ float nlp( } } + TIMER_SAMPLE_AND_LOG(peakpick, magsq, " peak pick"); + //#define POST_PROCESS_MBE #ifdef POST_PROCESS_MBE best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo); @@ -298,6 +325,8 @@ float nlp( best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo); #endif + TIMER_SAMPLE_AND_LOG(shiftmem, peakpick, " post process"); + /* Shift samples in buffer to make room for new samples */ for(i=0; i thresh) if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) { - cmax = lmax; cmax_bin = lmax_bin; } @@ -404,7 +437,9 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM float f0,best_f0; /* fundamental frequency */ float e,e_min; /* MBE cost function */ int i; + #ifdef DUMP float e_hz[F0_MAX]; + #endif int bin; float f0_min, f0_max; float f0_start, f0_end; @@ -415,8 +450,10 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM /* Now look for local maxima. Each local maxima is a candidate that we test using the MBE pitch estimation algotithm */ + #ifdef DUMP for(i=0; i 0) && (bin < F0_MAX)); - e_hz[bin] = e; + #ifdef DUMP + e_hz[bin] = e; + #endif if (e < e_min) { e_min = e; best_f0 = f0; @@ -460,7 +499,9 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM for(f0=f0_start; f0<=f0_end; f0+= 2.5) { e = test_candidate_mbe(Sw, W, f0); bin = floor(f0); assert((bin > 0) && (bin < F0_MAX)); + #ifdef DUMP e_hz[bin] = e; + #endif if (e < e_min) { e_min = e; best_f0 = f0; @@ -540,4 +581,3 @@ float test_candidate_mbe( return error; } - diff --git a/codec2-dev/src/nlp.h b/codec2-dev/src/nlp.h index 38265959..2d2dada1 100644 --- a/codec2-dev/src/nlp.h +++ b/codec2-dev/src/nlp.h @@ -30,9 +30,9 @@ #include "comp.h" -void *nlp_create(); +void *nlp_create(int m); void nlp_destroy(void *nlp_state); -float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax, +float nlp(void *nlp_state, float Sn[], int n, int pmin, int pmax, float *pitch, COMP Sw[], COMP W[], float *prev_Wo); #endif diff --git a/codec2-dev/src/phase.c b/codec2-dev/src/phase.c index d41303b7..5db0ea2f 100644 --- a/codec2-dev/src/phase.c +++ b/codec2-dev/src/phase.c @@ -29,7 +29,7 @@ #include "phase.h" #include "kiss_fft.h" #include "comp.h" -#include "glottal.c" +#include "sine.h" #include #include @@ -37,8 +37,6 @@ #include #include -#define GLOTTAL_FFT_SIZE 512 - /*---------------------------------------------------------------------------*\ aks_to_H() @@ -84,18 +82,18 @@ void aks_to_H( /* Sample magnitude and phase at harmonics */ for(m=1; m<=model->L; m++) { - am = floor((m - 0.5)*model->Wo/r + 0.5); - bm = floor((m + 0.5)*model->Wo/r + 0.5); - b = floor(m*model->Wo/r + 0.5); - - Em = 0.0; - for(i=am; iWo/r + 0.5); + bm = (int)((m + 0.5)*model->Wo/r + 0.5); + b = (int)(m*model->Wo/r + 0.5); + + Em = 0.0; + for(i=am; iWo)*N; ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5); - r = TWO_PI/GLOTTAL_FFT_SIZE; for(m=1; m<=model->L; m++) { /* generate excitation */ - if (model->voiced) { - //float rnd; - - b = floor(m*model->Wo/r + 0.5); - if (b > ((GLOTTAL_FFT_SIZE/2)-1)) { - b = (GLOTTAL_FFT_SIZE/2)-1; - } - - /* I think adding a little jitter helps improve low pitch - males like hts1a. This moves the onset of each harmonic - over +/- 0.25 of a sample. - */ - //jitter = 0.25*(1.0 - 2.0*rand()/RAND_MAX); - jitter = 0; - - //rnd = (PI/8)*(1.0 - 2.0*rand()/RAND_MAX); - Ex[m].real = cos(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/); - Ex[m].imag = sin(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/); + if (model->voiced) { + + Ex[m].real = cosf(ex_phase[0]*m); + Ex[m].imag = sinf(ex_phase[0]*m); } else { @@ -253,9 +233,9 @@ void phase_synth_zero_order( phase is not needed in the unvoiced case, but no harm in keeping it. */ - float phi = TWO_PI*(float)rand()/RAND_MAX; - Ex[m].real = cos(phi); - Ex[m].imag = sin(phi); + float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX; + Ex[m].real = cosf(phi); + Ex[m].imag = sinf(phi); } /* filter using LPC filter */ @@ -265,7 +245,7 @@ void phase_synth_zero_order( /* modify sinusoidal phase */ - new_phi = atan2(A_[m].imag, A_[m].real+1E-12); + new_phi = atan2f(A_[m].imag, A_[m].real+1E-12); model->phi[m] = new_phi; } diff --git a/codec2-dev/src/postfilter.c b/codec2-dev/src/postfilter.c index c78f495b..f347658c 100644 --- a/codec2-dev/src/postfilter.c +++ b/codec2-dev/src/postfilter.c @@ -35,6 +35,7 @@ #include "defines.h" #include "comp.h" #include "dump.h" +#include "sine.h" #include "postfilter.h" /*---------------------------------------------------------------------------*\ @@ -103,16 +104,16 @@ void postfilter( ) { int m, uv; - float e; + float e, thresh; /* determine average energy across spectrum */ - e = 0.0; + e = 1E-12; for(m=1; m<=model->L; m++) e += model->A[m]*model->A[m]; assert(e > 0.0); - e = 10.0*log10(e/model->L); + e = 10.0*log10f(e/model->L); /* If beneath threhold, update bg estimate. The idea of the threshold is to prevent updating during high level @@ -126,10 +127,11 @@ void postfilter( */ uv = 0; + thresh = powf(10.0, (*bg_est + BG_MARGIN)/20.0); if (model->voiced) for(m=1; m<=model->L; m++) - if (20.0*log10(model->A[m]) < (*bg_est + BG_MARGIN)) { - model->phi[m] = TWO_PI*(float)rand()/RAND_MAX; + if (model->A[m] < thresh) { + model->phi[m] = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX; uv++; } diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c index 62728b84..2e493814 100644 --- a/codec2-dev/src/quantise.c +++ b/codec2-dev/src/quantise.c @@ -37,6 +37,8 @@ #include "lpc.h" #include "lsp.h" #include "kiss_fft.h" +#undef TIMER +#include "machdep.h" #define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */ @@ -63,9 +65,11 @@ int lspd_bits(int i) { return lsp_cbd[i].log2m; } +#ifdef __EXPERIMENTAL__ int lspdt_bits(int i) { return lsp_cbdt[i].log2m; } +#endif int lsp_pred_vq_bits(int i) { return lsp_cbjvm[i].log2m; @@ -223,7 +227,7 @@ void decode_lspds_scalar( } - +#ifdef __EXPERIMENTAL__ /*---------------------------------------------------------------------------*\ lspvq_quantise @@ -393,6 +397,7 @@ void lspdt_quantise(float lsps[], float lsps_[], float lsps__prev[], int mode) #endif } +#endif #define MIN(a,b) ((a)<(b)?(a):(b)) #define MAX_ENTRIES 16384 @@ -524,6 +529,8 @@ void lspjvm_quantise(float *x, float *xq, int ndim) } } +#ifdef __EXPERIMENTAL__ + #define MBEST_STAGES 4 struct MBEST_LIST { @@ -718,6 +725,7 @@ void lspanssi_quantise(float *x, float *xq, int ndim, int mbest_entries) mbest_destroy(mbest_stage3); mbest_destroy(mbest_stage4); } +#endif int check_lsp_order(float lsp[], int lpc_order) { @@ -748,130 +756,6 @@ void force_min_lsp_dist(float lsp[], int lpc_order) } } -#ifdef NOT_USED -/*---------------------------------------------------------------------------*\ - - lpc_model_amplitudes - - Derive a LPC model for amplitude samples then estimate amplitude samples - from this model with optional LSP quantisation. - - Returns the spectral distortion for this frame. - -\*---------------------------------------------------------------------------*/ - -float lpc_model_amplitudes( - float Sn[], /* Input frame of speech samples */ - float w[], - MODEL *model, /* sinusoidal model parameters */ - int order, /* LPC model order */ - int lsp_quant, /* optional LSP quantisation if non-zero */ - float ak[] /* output aks */ -) -{ - float Wn[M]; - float R[LPC_MAX+1]; - float E; - int i,j; - float snr; - float lsp[LPC_MAX]; - float lsp_hz[LPC_MAX]; - float lsp_[LPC_MAX]; - int roots; /* number of LSP roots found */ - float wt[LPC_MAX]; - - for(i=0; i {Am} LPC decode */ - - return snr; -} -#endif /*---------------------------------------------------------------------------*\ @@ -913,8 +797,10 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak float e_before, e_after, gain; float Pfw[FFT_ENC]; /* Post filter mag spectrum */ float max_Rw, min_Rw; - float range, thresh, r, w; - int m, bin; + float coeff; + TIMER_VAR(tstart, tfft1, taw, tfft2, tww, tr); + + TIMER_SAMPLE(tstart); /* Determine LPC inverse filter spectrum 1/A(exp(jw)) -----------*/ @@ -932,10 +818,14 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak x[i].real = ak[i]; kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Aw); + TIMER_SAMPLE_AND_LOG(tfft1, tstart, " fft1"); + for(i=0; i max_Rw) max_Rw = Rw[i]; if (Rw[i] < min_Rw) min_Rw = Rw[i]; } + + TIMER_SAMPLE_AND_LOG(tr, tww, " R"); + #ifdef DUMP if (dump) dump_Rw(Rw); @@ -984,7 +885,7 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak e_after = 1E-4; for(i=0; iL; m++) { - am = floor((m - 0.5)*model->Wo/r + 0.5); - bm = floor((m + 0.5)*model->Wo/r + 0.5); - Em = 0.0; - - for(i=am; iA[m],2.0); - noise += pow(model->A[m] - Am,2.0); - - /* This code significantly improves perf of LPC model, in - particular when combined with phase0. The LPC spectrum tends - to track just under the peaks of the spectral envelope, and - just above nulls. This algorithm does the reverse to - compensate - raising the amplitudes of spectral peaks, while - attenuating the null. This enhances the formants, and - supresses the energy between formants. */ - - if (sim_pf) { - if (Am > model->A[m]) - Am *= 0.7; - if (Am < model->A[m]) - Am *= 1.4; - } - - model->A[m] = Am; + am = (int)((m - 0.5)*model->Wo/r + 0.5); + bm = (int)((m + 0.5)*model->Wo/r + 0.5); + Em = 0.0; + + for(i=am; iA[m]*model->A[m]; + noise += (model->A[m] - Am)*(model->A[m] - Am); + + /* This code significantly improves perf of LPC model, in + particular when combined with phase0. The LPC spectrum tends + to track just under the peaks of the spectral envelope, and + just above nulls. This algorithm does the reverse to + compensate - raising the amplitudes of spectral peaks, while + attenuating the null. This enhances the formants, and + supresses the energy between formants. */ + + if (sim_pf) { + if (Am > model->A[m]) + Am *= 0.7; + if (Am < model->A[m]) + Am *= 1.4; + } + + model->A[m] = Am; } *snr = 10.0*log10(signal/noise); + + TIMER_SAMPLE_AND_LOG2(tpf, " rec"); } /*---------------------------------------------------------------------------*\ @@ -1360,6 +1274,8 @@ void decode_lsps_scalar(float lsp[], int indexes[], int order) } +#ifdef __EXPERIMENTAL__ + /*---------------------------------------------------------------------------*\ FUNCTION....: encode_lsps_diff_freq_vq() @@ -1540,7 +1456,7 @@ void decode_lsps_diff_time( } } - +#endif /*---------------------------------------------------------------------------*\ diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c index 67cbeaeb..9e01c95a 100644 --- a/codec2-dev/src/sine.c +++ b/codec2-dev/src/sine.c @@ -289,7 +289,7 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float float Wo; /* current "test" fundamental freq. */ float Wom; /* Wo that maximises E */ float Em; /* mamimum energy */ - float r; /* number of rads/bin */ + float r, one_on_r; /* number of rads/bin */ float p; /* current pitch */ /* Initialisation */ @@ -298,7 +298,8 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float Wom = model->Wo; Em = 0.0; r = TWO_PI/FFT_ENC; - + one_on_r = 1.0/r; + /* Determine harmonic sum for a range of Wo values */ for(p=pmin; p<=pmax; p+=pstep) { @@ -306,12 +307,10 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float Wo = TWO_PI/p; /* Sum harmonic magnitudes */ - for(m=1; m<=model->L; m++) { - b = floor(m*Wo/r + 0.5); - E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; + b = (int)(m*Wo*one_on_r + 0.5); + E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag; } - /* Compare to see if this is a maximum */ if (E > Em) { @@ -333,40 +332,45 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float \*---------------------------------------------------------------------------*/ -void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]) +void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase) { int i,m; /* loop variables */ int am,bm; /* bounds of current harmonic */ int b; /* DFT bin of centre of current harmonic */ float den; /* denominator of amplitude expression */ - float r; /* number of rads/bin */ + float r, one_on_r; /* number of rads/bin */ int offset; COMP Am; r = TWO_PI/FFT_ENC; + one_on_r = 1.0/r; for(m=1; m<=model->L; m++) { den = 0.0; - am = floor((m - 0.5)*model->Wo/r + 0.5); - bm = floor((m + 0.5)*model->Wo/r + 0.5); - b = floor(m*model->Wo/r + 0.5); + am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5); + bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5); + b = (int)(m*model->Wo/r + 0.5); /* Estimate ampltude of harmonic */ den = 0.0; Am.real = Am.imag = 0.0; + offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5); for(i=am; iWo/r + 0.5); - Am.real += Sw[i].real*W[offset].real; - Am.imag += Sw[i].imag*W[offset].real; + Am.real += Sw[i].real*W[i + offset].real; + Am.imag += Sw[i].imag*W[i + offset].real; } - model->A[m] = sqrt(den); + model->A[m] = sqrtf(den); - /* Estimate phase of harmonic */ + if (est_phase) { - model->phi[m] = atan2(Sw[b].imag,Sw[b].real); + /* Estimate phase of harmonic, this is expensive in CPU for + embedded devicesso we make it an option */ + + model->phi[m] = atan2(Sw[b].imag,Sw[b].real); + } } } @@ -398,7 +402,7 @@ float est_voicing_mbe( float Wo; float sig, snr; float elow, ehigh, eratio; - float dF0, sixty; + float sixty; sig = 1E-4; for(l=1; l<=model->L/4; l++) { @@ -425,11 +429,11 @@ float est_voicing_mbe( /* Estimate amplitude of harmonic assuming harmonic is totally voiced */ + offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5; for(m=al; mA[l]*model->A[l]; } eratio = 10.0*log10(elow/ehigh); - dF0 = 0.0; /* Look for Type 1 errors, strongly V speech that has been accidentally declared UV */ @@ -487,16 +490,6 @@ float est_voicing_mbe( if (eratio < -10.0) model->voiced = 0; - /* If pitch is jumping about it's likely this is UV */ - - /* 13 Feb 2012 - this seems to add some V errors so comment out for now. Maybe - double check on bg noise files - - dF0 = (model->Wo - prev_Wo)*FS/TWO_PI; - if (fabs(dF0) > 15.0) - model->voiced = 0; - */ - /* A common source of Type 2 errors is the pitch estimator gives a low (50Hz) estimate for UV speech, which gives a good match with noise due to the close harmoonic spacing. @@ -570,7 +563,6 @@ void synthesise( if (shift) { /* Update memories */ - for(i=0; iL; l++) { //for(l=model->L/2; l<=model->L; l++) { //for(l=1; l<=model->L/4; l++) { - b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5); if (b > ((FFT_DEC/2)-1)) { b = (FFT_DEC/2)-1; } - Sw_[b].real = model->A[l]*cos(model->phi[l]); - Sw_[b].imag = model->A[l]*sin(model->phi[l]); + Sw_[b].real = model->A[l]*cosf(model->phi[l]); + Sw_[b].imag = model->A[l]*sinf(model->phi[l]); Sw_[FFT_DEC-b].real = Sw_[b].real; Sw_[FFT_DEC-b].imag = -Sw_[b].imag; } @@ -646,3 +638,11 @@ void synthesise( Sn_[i] += sw_[j].real*Pn[i]; } + +static unsigned long next = 1; + +int codec2_rand(void) { + next = next * 1103515245 + 12345; + return((unsigned)(next/65536) % 32768); +} + diff --git a/codec2-dev/src/sine.h b/codec2-dev/src/sine.h index 202adfd2..da7dc28e 100644 --- a/codec2-dev/src/sine.h +++ b/codec2-dev/src/sine.h @@ -36,10 +36,13 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[]); float hpf(float x, float states[]); void dft_speech(kiss_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]); void two_stage_pitch_refinement(MODEL *model, COMP Sw[]); -void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]); +void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase); float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], COMP Sw_[],COMP Ew[], float prev_Wo); void make_synthesis_window(float Pn[]); void synthesise(kiss_fft_cfg fft_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift); +#define CODEC2_RAND_MAX 32767 +int codec2_rand(void); + #endif -- 2.25.1