From 7e95b9930dc6dc9eadd4e83f283b6ccd271c9e3b Mon Sep 17 00:00:00 2001
From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Fri, 24 May 2013 10:42:26 +0000
Subject: [PATCH] fixed most float to doubel promotions to spped up STM a
 little

git-svn-id: https://svn.code.sf.net/p/freetel/code@1269 01035d8c-6547-0410-b346-abe4f91aad63
---
 codec2-dev/src/interp.c              | 16 +++++------
 codec2-dev/src/lpc.c                 |  6 ++---
 codec2-dev/src/nlp.c                 |  2 +-
 codec2-dev/src/phase.c               |  2 +-
 codec2-dev/src/quantise.c            | 40 ++++++++++++++--------------
 codec2-dev/src/sine.c                |  8 +++---
 codec2-dev/stm32/Makefile            | 10 +++----
 codec2-dev/stm32/src/stm32f4_timer.c |  1 +
 8 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/codec2-dev/src/interp.c b/codec2-dev/src/interp.c
index d7e56abb..e712a46e 100644
--- a/codec2-dev/src/interp.c
+++ b/codec2-dev/src/interp.c
@@ -111,19 +111,19 @@ float sample_log_amp(MODEL *model, float w)
 
     assert(w > 0.0); assert (w <= PI);
 
-    m = floor(w/model->Wo + 0.5);
+    m = floorf(w/model->Wo + 0.5);
     f = (w - m*model->Wo)/w;
     assert(f <= 1.0);
 
     if (m < 1) {
-	log_amp = f*log10(model->A[1] + 1E-6);
+	log_amp = f*log10f(model->A[1] + 1E-6);
     }
     else if ((m+1) > model->L) {
-	log_amp = (1.0-f)*log10(model->A[model->L] + 1E-6);
+	log_amp = (1.0-f)*log10f(model->A[model->L] + 1E-6);
     }
     else {
-	log_amp = (1.0-f)*log10(model->A[m] + 1E-6) + 
-                  f*log10(model->A[m+1] + 1E-6);
+	log_amp = (1.0-f)*log10f(model->A[m] + 1E-6) + 
+                  f*log10f(model->A[m+1] + 1E-6);
     }
 
     return log_amp;
@@ -195,7 +195,7 @@ void interpolate_lsp(
 
     /* Interpolate LPC energy in log domain */
 
-    e = pow(10.0, (log10(prev_e) + log10(next_e))/2.0);
+    e = powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
     //printf("  interp: e: %f\n", e);
 
     /* convert back to amplitudes */
@@ -280,7 +280,7 @@ void interp_Wo2(
 
 float interp_energy(float prev_e, float next_e)
 {
-    return pow(10.0, (log10(prev_e) + log10(next_e))/2.0);
+    return powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
  
 }
 
@@ -298,7 +298,7 @@ float interp_energy(float prev_e, float next_e)
 
 float interp_energy2(float prev_e, float next_e, float weight)
 {
-    return pow(10.0, (1.0 - weight)*log10(prev_e) + weight*log10(next_e));
+    return powf(10.0, (1.0 - weight)*log10f(prev_e) + weight*log10f(next_e));
  
 }
 
diff --git a/codec2-dev/src/lpc.c b/codec2-dev/src/lpc.c
index a253289a..d03cb178 100644
--- a/codec2-dev/src/lpc.c
+++ b/codec2-dev/src/lpc.c
@@ -107,7 +107,7 @@ void hanning_window(
   int i;	/* loop variable */
 
   for(i=0; i<Nsam; i++)
-    Wn[i] = Sn[i]*(0.5 - 0.5*cos(2*PI*(float)i/(Nsam-1)));
+    Wn[i] = Sn[i]*(0.5 - 0.5*cosf(2*PI*(float)i/(Nsam-1)));
 }
 
 /*---------------------------------------------------------------------------*\
@@ -169,7 +169,7 @@ void levinson_durbin(
     for(j=1; j<=i-1; j++)
       sum += a[i-1][j]*R[i-j];
     k[i] = -1.0*(R[i] + sum)/E[i-1];	/* Equation 38b, Makhoul */
-    if (fabs(k[i]) > 1.0)
+    if (fabsf(k[i]) > 1.0)
       k[i] = 0.0;
 
     a[i][i] = k[i];
@@ -304,6 +304,6 @@ void weight(
   int i;
   
   for(i=1; i<=order; i++)
-    akw[i] = ak[i]*pow(gamma,(float)i);
+    akw[i] = ak[i]*powf(gamma,(float)i);
 }
     
diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c
index 14c5600a..5fb83587 100644
--- a/codec2-dev/src/nlp.c
+++ b/codec2-dev/src/nlp.c
@@ -152,7 +152,7 @@ int    m			/* analysis window size */
 
     nlp->m = m;
     for(i=0; i<m/DEC; i++) {
-	nlp->w[i] = 0.5 - 0.5*cos(2*PI*i/(m/DEC-1));
+	nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
     }
 
     for(i=0; i<PMAX_M; i++)
diff --git a/codec2-dev/src/phase.c b/codec2-dev/src/phase.c
index 5db0ea2f..e1476862 100644
--- a/codec2-dev/src/phase.c
+++ b/codec2-dev/src/phase.c
@@ -216,7 +216,7 @@ void phase_synth_zero_order(
   */
   
   ex_phase[0] += (model->Wo)*N;
-  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+  ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
 
   for(m=1; m<=model->L; m++) {
       
diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c
index 2e493814..088070cf 100644
--- a/codec2-dev/src/quantise.c
+++ b/codec2-dev/src/quantise.c
@@ -119,7 +119,7 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se)
 	e = 0.0;
 	for(i=0; i<k; i++) {
 	    diff = cb[j*k+i]-vec[i];
-	    e += pow(diff*w[i],2.0);
+	    e += powf(diff*w[i],2.0);
 	}
 	if (e < beste) {
 	    beste = e;
@@ -1014,7 +1014,7 @@ void aks_to_M2(
 
       model->A[m] = Am;
   }
-  *snr = 10.0*log10(signal/noise);
+  *snr = 10.0*log10f(signal/noise);
 
   TIMER_SAMPLE_AND_LOG2(tpf, "      rec"); 
 }
@@ -1037,7 +1037,7 @@ int encode_Wo(float Wo)
     float norm;
 
     norm = (Wo - Wo_min)/(Wo_max - Wo_min);
-    index = floor(WO_LEVELS * norm + 0.5);
+    index = floorf(WO_LEVELS * norm + 0.5);
     if (index < 0 ) index = 0;
     if (index > (WO_LEVELS-1)) index = WO_LEVELS-1;
 
@@ -1196,7 +1196,7 @@ float speech_to_uq_lsps(float lsp[],
     */
 
     for(i=0; i<=order; i++)
-	ak[i] *= pow(0.994,(float)i);
+	ak[i] *= powf(0.994,(float)i);
 
     roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
     if (roots != order) {
@@ -1634,7 +1634,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
     step = 25;
     for(i=0; i<2; i++) {
 	lsp_hz = lsps[i]*4000.0/PI;
-	lsp_hz = floor(lsp_hz/step + 0.5)*step;
+	lsp_hz = floorf(lsp_hz/step + 0.5)*step;
 	lsps[i] = lsp_hz*PI/4000.0;
 	if (i) {
 	    if (lsps[i] == lsps[i-1])
@@ -1648,7 +1648,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
     step = 50;
     for(i=2; i<4; i++) {
 	lsp_hz = lsps[i]*4000.0/PI;
-	lsp_hz = floor(lsp_hz/step + 0.5)*step;
+	lsp_hz = floorf(lsp_hz/step + 0.5)*step;
 	lsps[i] = lsp_hz*PI/4000.0;
 	if (i) {
 	    if (lsps[i] == lsps[i-1])
@@ -1662,7 +1662,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
     step = 100;
     for(i=4; i<10; i++) {
 	lsp_hz = lsps[i]*4000.0/PI;
-	lsp_hz = floor(lsp_hz/step + 0.5)*step;
+	lsp_hz = floorf(lsp_hz/step + 0.5)*step;
 	lsps[i] = lsp_hz*PI/4000.0;
 	if (i) {
 	    if (lsps[i] == lsps[i-1])
@@ -1708,9 +1708,9 @@ int encode_energy(float e)
     float e_max = E_MAX_DB;
     float norm;
 
-    e = 10.0*log10(e);
+    e = 10.0*log10f(e);
     norm = (e - e_min)/(e_max - e_min);
-    index = floor(E_LEVELS * norm + 0.5);
+    index = floorf(E_LEVELS * norm + 0.5);
     if (index < 0 ) index = 0;
     if (index > (E_LEVELS-1)) index = E_LEVELS-1;
 
@@ -1736,7 +1736,7 @@ float decode_energy(int index)
 
     step = (e_max - e_min)/E_LEVELS;
     e    = e_min + step*(index);
-    e    = pow(10.0,e/10.0);
+    e    = powf(10.0,e/10.0);
 
     return e;
 }
@@ -1792,11 +1792,11 @@ void compute_weights2(const float *x, const float *xp, float *w, int ndim)
      w[1] *= .3;
   }
   /* Higher weight if pitch is stable */
-  if (fabs(x[0]-xp[0])<.2)
+  if (fabsf(x[0]-xp[0])<.2)
   {
      w[0] *= 2;
      w[1] *= 1.5;
-  } else if (fabs(x[0]-xp[0])>.5) /* Lower if not stable */
+  } else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */
   {
      w[0] *= .5;
   }
@@ -1854,8 +1854,8 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
   float Wo_min = TWO_PI/P_MAX;
   float Wo_max = TWO_PI/P_MIN;
 
-  x[0] = log10((model->Wo/PI)*4000.0/50.0)/log10(2);
-  x[1] = 10.0*log10(1e-4 + *e);
+  x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+  x[1] = 10.0*log10f(1e-4 + *e);
 
   compute_weights2(x, xq, w, ndim);
   for (i=0;i<ndim;i++)
@@ -1874,7 +1874,7 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
     Wo = (2^x)*(PI*50)/4000;
   */
   
-  model->Wo = pow(2.0, xq[0])*(PI*50.0)/4000.0;
+  model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
 
   /* bit errors can make us go out of range leading to all sorts of
      probs like seg faults */
@@ -1884,7 +1884,7 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
 
   model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
 
-  *e = pow(10.0, xq[1]/10.0);
+  *e = powf(10.0, xq[1]/10.0);
 }
 
 /*---------------------------------------------------------------------------*\
@@ -1912,8 +1912,8 @@ int encode_WoE(MODEL *model, float e, float xq[])
 
   if (e < 0.0) e = 0;  /* occasional small negative energies due LPC round off I guess */
 
-  x[0] = log10((model->Wo/PI)*4000.0/50.0)/log10(2);
-  x[1] = 10.0*log10(1e-4 + e);
+  x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+  x[1] = 10.0*log10f(1e-4 + e);
 
   compute_weights2(x, xq, w, ndim);
   for (i=0;i<ndim;i++)
@@ -1959,7 +1959,7 @@ void decode_WoE(MODEL *model, float *e, float xq[], int n1)
   }
 
   //printf("dec: %f %f\n", xq[0], xq[1]);
-  model->Wo = pow(2.0, xq[0])*(PI*50.0)/4000.0;
+  model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
 
   /* bit errors can make us go out of range leading to all sorts of
      probs like seg faults */
@@ -1969,6 +1969,6 @@ void decode_WoE(MODEL *model, float *e, float xq[], int n1)
 
   model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
 
-  *e = pow(10.0, xq[1]/10.0);
+  *e = powf(10.0, xq[1]/10.0);
 }
 
diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c
index 9e01c95a..254a61ec 100644
--- a/codec2-dev/src/sine.c
+++ b/codec2-dev/src/sine.c
@@ -88,7 +88,7 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[])
   for(i=0; i<M/2-NW/2; i++)
     w[i] = 0.0;
   for(i=M/2-NW/2,j=0; i<M/2+NW/2; i++,j++) {
-    w[i] = 0.5 - 0.5*cos(TWO_PI*j/(NW-1));
+    w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(NW-1));
     m += w[i]*w[i];
   }
   for(i=M/2+NW/2; i<M; i++)
@@ -97,7 +97,7 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[])
   /* Normalise - makes freq domain amplitude estimation straight
      forward */
 
-  m = 1.0/sqrt(m*FFT_ENC);
+  m = 1.0/sqrtf(m*FFT_ENC);
   for(i=0; i<M; i++) {
     w[i] *= m;
   }
@@ -452,7 +452,7 @@ float est_voicing_mbe(
 	}
     }
     
-    snr = 10.0*log10(sig/error);
+    snr = 10.0*log10f(sig/error);
     if (snr > V_THRESH)
 	model->voiced = 1;
     else
@@ -474,7 +474,7 @@ float est_voicing_mbe(
     for(l=model->L/2; l<=model->L; l++) {
 	ehigh += model->A[l]*model->A[l];
     }
-    eratio = 10.0*log10(elow/ehigh);
+    eratio = 10.0*log10f(elow/ehigh);
 
     /* Look for Type 1 errors, strongly V speech that has been
        accidentally declared UV */
diff --git a/codec2-dev/stm32/Makefile b/codec2-dev/stm32/Makefile
index 110749e1..62dd061e 100644
--- a/codec2-dev/stm32/Makefile
+++ b/codec2-dev/stm32/Makefile
@@ -18,8 +18,8 @@ CFLAGS  = -std=gnu99 -g -O2 -Wall -Tstm32_flash.ld -DSTM32F4XX -DCORTEX_M4
 CFLAGS += -mlittle-endian -mthumb -mthumb-interwork -nostartfiles -mcpu=cortex-m4
 
 ifeq ($(FLOAT_TYPE), hard)
-#CFLAGS += -fsingle-precision-constant -Wdouble-promotion
-CFLAGS += -fsingle-precision-constant
+CFLAGS += -fsingle-precision-constant -Wdouble-promotion
+#CFLAGS += -fsingle-precision-constant
 CFLAGS += -mfpu=fpv4-sp-d16 -mfloat-abi=hard -D__FPU_PRESENT=1 -D__FPU_USED=1
 else
 CFLAGS += -msoft-float
@@ -110,7 +110,7 @@ OBJS = $(SRCS:.c=.o)
 
 ###################################################
 
-all: libstm32f4.a $(PROJ_NAME).elf fft_test.elf dactest.elf
+all: libstm32f4.a $(PROJ_NAME).elf fft_test.elf ut_dac.elf
 
 dl/$(PERIPHLIBZIP):
 	mkdir -p dl
@@ -134,14 +134,14 @@ fft_test.elf: $(FFT_TEST_SRCS)
 
 DAC_TEST=$(PERIPHLIBDIR)/Project/STM32F4xx_StdPeriph_Examples/DAC/DAC_SignalsGeneration
 DAC_TEST_SRCS=\
-$(DAC_TEST)/main.c \
+src/ut_dac.c \
 $(DAC_TEST)/stm32f4xx_it.c \
 $(DAC_TEST)/system_stm32f4xx.c \
 $(PERIPHLIBDIR)/Utilities/STM32_EVAL/STM3240_41_G_EVAL/stm324xg_eval.c \
 src/startup_stm32f4xx.s \
 src/init.c
 
-dactest.elf: $(DAC_TEST_SRCS)
+ut_dac.elf: $(DAC_TEST_SRCS)
 	$(CC) $(CFLAGS) -DUSE_STM324xG_EVAL -I$(PERIPHLIBDIR)/Utilities/STM32_EVAL/STM3240_41_G_EVAL -I$(PERIPHLIBDIR)/Utilities/STM32_EVAL/Common $^ -o $@ $(LIBPATHS) $(LIBS)
 
 clean:
diff --git a/codec2-dev/stm32/src/stm32f4_timer.c b/codec2-dev/stm32/src/stm32f4_timer.c
index 4a0614c1..609f9fbc 100644
--- a/codec2-dev/stm32/src/stm32f4_timer.c
+++ b/codec2-dev/stm32/src/stm32f4_timer.c
@@ -1,3 +1,4 @@
+
 /*---------------------------------------------------------------------------*\
 
   FILE........: stm32f4_timer.c
-- 
2.25.1