From: drowe67 Date: Mon, 29 Aug 2016 04:29:57 +0000 (+0000) Subject: some nice optimisations for the STM32F4 - thanks Danilo and mCHF team X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=669c827b165a75d6949c330a8370ad6318c197ab;p=freetel-svn-tracking.git some nice optimisations for the STM32F4 - thanks Danilo and mCHF team git-svn-id: https://svn.code.sf.net/p/freetel/code@2848 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2-dev/src/fdmdv.c b/codec2-dev/src/fdmdv.c index ce21293d..e23bf878 100644 --- a/codec2-dev/src/fdmdv.c +++ b/codec2-dev/src/fdmdv.c @@ -676,6 +676,12 @@ void generate_pilot_lut(COMP pilot_lut[], COMP *pilot_freq) if (f >= 4) memcpy(&pilot_lut[M*(f-4)], pilot, M*sizeof(COMP)); } + + // create complex conjugate since we need this and only this later on + + for (f=0;f<4*M;f++) { + pilot_lut[f] = cconj(pilot_lut[f]); + } } @@ -806,10 +812,18 @@ float rx_est_freq_offset(struct FDMDV *f, COMP rx_fdm[], int nin, int do_fft) f->pilot_baseband2[i] = f->pilot_baseband2[i+nin]; } +#ifndef ARM_MATH_CM4 for(i=0,j=NPILOTBASEBAND-nin; ipilot_baseband1[j] = cmult(rx_fdm[i], cconj(pilot[i])); - f->pilot_baseband2[j] = cmult(rx_fdm[i], cconj(prev_pilot[i])); + f->pilot_baseband1[j] = cmult(rx_fdm[i], pilot[i]); + f->pilot_baseband2[j] = cmult(rx_fdm[i], prev_pilot[i]); } +#else + // TODO: Maybe a handwritten mult taking advantage of rx_fdm[0] being + // used twice would be faster but this is for sure faster than + // the implementation above in any case. + arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&pilot[0].real,&f->pilot_baseband1[NPILOTBASEBAND-nin].real,nin); + arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&prev_pilot[0].real,&f->pilot_baseband2[NPILOTBASEBAND-nin].real,nin); +#endif lpf_peak_pick(&foff1, &max1, f->pilot_baseband1, f->pilot_lpf1, f->fft_pilot_cfg, f->S1, nin, do_fft); lpf_peak_pick(&foff2, &max2, f->pilot_baseband2, f->pilot_lpf2, f->fft_pilot_cfg, f->S2, nin, do_fft); @@ -970,29 +984,45 @@ void rxdec_filter(COMP rx_fdm_filter[], COMP rx_fdm[], COMP rxdec_lpf_mem[], int } } - /*---------------------------------------------------------------------------*\ - FUNCTION....: fir_filter() - AUTHOR......: David Rowe - DATE CREATED: July 2014 + FUNCTION....: fir_filter2() + AUTHOR......: Danilo Beuche + DATE CREATED: Auhust 2016 - Helper fir filter function. + Ths version submitted by Danilo for the STM32F4 platform. The idea + is to avoid reading the same value from the STM32F4 "slow" flash + twice. 2-4ms of savings per frame were measured by Danilo and the mcHF + team. \*---------------------------------------------------------------------------*/ -static float fir_filter(float mem[], float coeff[], int dec_rate) { - float acc = 0.0; - int m; +static void fir_filter2(float acc[2], float mem[], float coeff[], const unsigned int dec_rate) { + acc[0] = 0.0; + acc[1] = 0.0; + + float c,m1,m2,a1,a2; + float* inpCmplx = &mem[0]; + float* coeffPtr = &coeff[0]; + + int m; for(m=0; m