From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Mon, 19 Sep 2016 06:31:22 +0000 (+0000)
Subject: Danilo's wonderful FFT real patch, just one lsb difference in output coded speech... 
X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=2c2b7f1dc455873998f9a434e11f172340fda434;p=freetel-svn-tracking.git

Danilo's wonderful FFT real patch, just one lsb difference in output coded speech and 10% saving of CPU on stm32f4 target- thanks so much Danilo!

git-svn-id: https://svn.code.sf.net/p/freetel/code@2878 01035d8c-6547-0410-b346-abe4f91aad63
---

diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c
index 523f2ffb..8262634b 100644
--- a/codec2-dev/src/c2sim.c
+++ b/codec2-dev/src/c2sim.c
@@ -72,6 +72,7 @@ int main(int argc, char *argv[])
     float Sn_pre[N];	/* pre-emphasised input speech samples   */
     COMP  Sw[FFT_ENC];	/* DFT of Sn[]                           */
     kiss_fft_cfg  fft_fwd_cfg;
+    kiss_fftr_cfg  fftr_fwd_cfg;
     kiss_fft_cfg  fft_inv_cfg;
     float w[M];	        /* time domain hamming window            */
     COMP  W[FFT_ENC];	/* DFT of w[]                            */
@@ -398,6 +399,7 @@ int main(int argc, char *argv[])
     /* Initialise ------------------------------------------------------------*/
 
     fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places   */
+    fftr_fwd_cfg = kiss_fftr_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places   */
     fft_inv_cfg = kiss_fft_alloc(FFT_DEC, 1, NULL, NULL); /* inverse FFT, used just for synth */
     make_analysis_window(fft_fwd_cfg, w, W);
     make_synthesis_window(Pn);
@@ -809,7 +811,7 @@ int main(int argc, char *argv[])
             for(i=0; i<decimate; i++) {
                 if (lpc_model) {
                     lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order);
-                    aks_to_M2(fft_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i],
+                    aks_to_M2(fftr_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i],
                               &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
                     apply_lpc_correction(&model_dec[i]);
                     sum_snr += snr;
diff --git a/codec2-dev/src/codec2.c b/codec2-dev/src/codec2.c
index 485d31b1..4ac59f4d 100644
--- a/codec2-dev/src/codec2.c
+++ b/codec2-dev/src/codec2.c
@@ -123,6 +123,7 @@ struct CODEC2 * codec2_create(int mode)
     for(i=0; i<2*N; i++)
 	c2->Sn_[i] = 0;
     c2->fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL);
+    c2->fftr_fwd_cfg = kiss_fftr_alloc(FFT_ENC, 0, NULL, NULL);
     make_analysis_window(c2->fft_fwd_cfg, c2->w,c2->W);
     make_synthesis_window(c2->Pn);
     c2->fft_inv_cfg = kiss_fft_alloc(FFT_DEC, 1, NULL, NULL);
@@ -458,7 +459,7 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char *
 
     for(i=0; i<2; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -601,7 +602,7 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char *
     interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
     for(i=0; i<2; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -785,7 +786,7 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char *
     }
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -955,7 +956,7 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char *
     }
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -1138,7 +1139,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char *
 
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -1325,7 +1326,7 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char *
     }
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -1523,7 +1524,7 @@ void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char *
     }
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
@@ -1727,7 +1728,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char *
     }
     for(i=0; i<4; i++) {
 	lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
-	aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
+	aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
 	apply_lpc_correction(&model[i]);
 	synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
diff --git a/codec2-dev/src/codec2_internal.h b/codec2-dev/src/codec2_internal.h
index 26a5e2b5..5fb94324 100644
--- a/codec2-dev/src/codec2_internal.h
+++ b/codec2-dev/src/codec2_internal.h
@@ -32,6 +32,7 @@
 struct CODEC2 {
     int           mode;
     kiss_fft_cfg  fft_fwd_cfg;             /* forward FFT config                        */
+    kiss_fftr_cfg fftr_fwd_cfg;            /* forward real FFT config                   */
     float         w[M];	                   /* time domain hamming window                */
     COMP          W[FFT_ENC];	           /* DFT of w[]                                */
     float         Pn[2*N];	           /* trapezoidal synthesis window              */
diff --git a/codec2-dev/src/dump.h b/codec2-dev/src/dump.h
index 878efb96..58c93102 100644
--- a/codec2-dev/src/dump.h
+++ b/codec2-dev/src/dump.h
@@ -29,6 +29,7 @@
 #include "defines.h"
 #include "comp.h"
 #include "kiss_fft.h"
+#include "kiss_fftr.h"
 #include "codec2_internal.h"
 
 void dump_on(char filename_prefix[]);
diff --git a/codec2-dev/src/kiss_fftr.c b/codec2-dev/src/kiss_fftr.c
index b8e238b1..ca5d4181 100644
--- a/codec2-dev/src/kiss_fftr.c
+++ b/codec2-dev/src/kiss_fftr.c
@@ -14,6 +14,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 
 #include "kiss_fftr.h"
 #include "_kiss_fft_guts.h"
+#include "assert.h"
 
 struct kiss_fftr_state{
     kiss_fft_cfg substate;
@@ -70,10 +71,7 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr
     int k,ncfft;
     kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
 
-    if ( st->substate->inverse) {
-        fprintf(stderr,"kiss fft usage error: improper alloc\n");
-        exit(1);
-    }
+    assert(st->substate->inverse==0);
 
     ncfft = st->substate->nfft;
 
@@ -125,10 +123,7 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t
     /* input buffer timedata is stored row-wise */
     int k, ncfft;
 
-    if (st->substate->inverse == 0) {
-        fprintf (stderr, "kiss fft usage error: improper alloc\n");
-        exit (1);
-    }
+    assert(st->substate->inverse == 1);
 
     ncfft = st->substate->nfft;
 
diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c
index ea0d5603..23a8868f 100644
--- a/codec2-dev/src/quantise.c
+++ b/codec2-dev/src/quantise.c
@@ -846,13 +846,13 @@ void force_min_lsp_dist(float lsp[], int order)
 
 \*---------------------------------------------------------------------------*/
 
-void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, float Pw[], float ak[],
+void lpc_post_filter(kiss_fftr_cfg fftr_fwd_cfg, float Pw[], float ak[],
                      int order, int dump, float beta, float gamma, int bass_boost, float E)
 {
     int   i;
-    COMP  x[FFT_ENC];   /* input to FFTs                */
-    COMP  Ww[FFT_ENC];  /* weighting spectrum           */
-    float Rw[FFT_ENC];  /* R = WA                       */
+    float x[FFT_ENC];   /* input to FFTs                */
+    COMP  Ww[FFT_ENC/2+1];  /* weighting spectrum           */
+    float Rw[FFT_ENC/2+1];  /* R = WA                       */
     float e_before, e_after, gain;
     float Pfw;
     float max_Rw, min_Rw;
@@ -864,17 +864,16 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, float Pw[], float ak[],
     /* Determine weighting filter spectrum W(exp(jw)) ---------------*/
 
     for(i=0; i<FFT_ENC; i++) {
-	x[i].real = 0.0;
-	x[i].imag = 0.0;
+	x[i] = 0.0;
     }
 
-    x[0].real = ak[0];
+    x[0]  = ak[0];
     coeff = gamma;
     for(i=1; i<=order; i++) {
-	x[i].real = ak[i] * coeff;
+	x[i] = ak[i] * coeff;
         coeff *= gamma;
     }
-    kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Ww);
+    kiss_fftr(fftr_fwd_cfg, (kiss_fft_scalar *)x, (kiss_fft_cpx *)Ww);
 
     PROFILE_SAMPLE_AND_LOG(tfft2, taw, "        fft2");
 
@@ -957,7 +956,7 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, float Pw[], float ak[],
 \*---------------------------------------------------------------------------*/
 
 void aks_to_M2(
-  kiss_fft_cfg  fft_fwd_cfg,
+  kiss_fftr_cfg  fftr_fwd_cfg,
   float         ak[],	     /* LPC's */
   int           order,
   MODEL        *model,	     /* sinusoidal model parameters for this frame */
@@ -986,23 +985,23 @@ void aks_to_M2(
 
   /* Determine DFT of A(exp(jw)) --------------------------------------------*/
   {
-      COMP a[FFT_ENC];  /* input to FFT for power spectrum */
+      float a[FFT_ENC];  /* input to FFT for power spectrum */
 
       for(i=0; i<FFT_ENC; i++) {
-          a[i].real = 0.0;
-          a[i].imag = 0.0;
+          a[i] = 0.0;
       }
 
       for(i=0; i<=order; i++)
-          a[i].real = ak[i];
-      kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)a, (kiss_fft_cpx *)Aw);
+          a[i] = ak[i];
+      kiss_fftr(fftr_fwd_cfg, (kiss_fft_scalar *)a, (kiss_fft_cpx *)Aw);
   }
   PROFILE_SAMPLE_AND_LOG(tfft, tstart, "      fft");
 
   /* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
 
+  float Pw[FFT_ENC/2];
+
 #ifndef ARM_MATH_CM4
-  float Pw[FFT_ENC];
   for(i=0; i<FFT_ENC/2; i++) {
     Pw[i] = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag + 1E-6);
   }
@@ -1012,11 +1011,6 @@ void aks_to_M2(
   // so please leave it as is or improve further
   // since this code is called 4 times it results in almost 4ms gain (21ms -> 17ms per audio frame decode @ 1300 )
 
-  float Pw[FFT_ENC];
-  for(i=FFT_ENC/2; i<FFT_ENC; i++) {
-    Pw[i] = 0.0;
-  }
-
   for(i=0; i<FFT_ENC/2; i++)
   {
       Pw[i] = Aw[i].real * Aw[i].real + Aw[i].imag * Aw[i].imag  + 1E-6;
@@ -1029,9 +1023,9 @@ void aks_to_M2(
   PROFILE_SAMPLE_AND_LOG(tpw, tfft, "      Pw");
 
   if (pf)
-      lpc_post_filter(fft_fwd_cfg, Pw, ak, order, dump, beta, gamma, bass_boost, E);
+      lpc_post_filter(fftr_fwd_cfg, Pw, ak, order, dump, beta, gamma, bass_boost, E);
   else {
-      for(i=0; i<FFT_ENC; i++) {
+      for(i=0; i<FFT_ENC/2; i++) {
           Pw[i] *= E;
       }
   }
diff --git a/codec2-dev/src/quantise.h b/codec2-dev/src/quantise.h
index 0374e9f0..2036c704 100644
--- a/codec2-dev/src/quantise.h
+++ b/codec2-dev/src/quantise.h
@@ -27,6 +27,7 @@
 #define __QUANTISE__
 
 #include "kiss_fft.h"
+#include "kiss_fftr.h"
 #include "comp.h"
 
 #define WO_BITS     7
@@ -56,7 +57,7 @@
 void quantise_init();
 float lpc_model_amplitudes(float Sn[], float w[], MODEL *model, int order,
 			   int lsp,float ak[]);
-void aks_to_M2(kiss_fft_cfg fft_fwd_cfg, float ak[], int order, MODEL *model,
+void aks_to_M2(kiss_fftr_cfg fftr_fwd_cfg, float ak[], int order, MODEL *model,
 	       float E, float *snr, int dump, int sim_pf,
                int pf, int bass_boost, float beta, float gamma, COMP Aw[]);