fixed most float to doubel promotions to spped up STM a little

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)
diff --git a/codec2-dev/src/interp.c b/codec2-dev/src/interp.c

index d7e56abba5aef86872421634ee90090d5ac8ead5..e712a46ee58ab98ef3b4dcb79f6207c3a453bb0b 100644 (file)
--- a/codec2-dev/src/interp.c
+++ b/codec2-dev/src/interp.c
@@ -111,19 +111,19 @@ float sample_log_amp(MODEL *model, float w)
  
      assert(w > 0.0); assert (w <= PI);
  
-    m = floor(w/model->Wo + 0.5);
+    m = floorf(w/model->Wo + 0.5);
      f = (w - m*model->Wo)/w;
      assert(f <= 1.0);
  
      if (m < 1) {
-       log_amp = f*log10(model->A[1] + 1E-6);
+       log_amp = f*log10f(model->A[1] + 1E-6);
      }
      else if ((m+1) > model->L) {
-       log_amp = (1.0-f)*log10(model->A[model->L] + 1E-6);
+       log_amp = (1.0-f)*log10f(model->A[model->L] + 1E-6);
      }
      else {
-       log_amp = (1.0-f)*log10(model->A[m] + 1E-6) + 
-                  f*log10(model->A[m+1] + 1E-6);
+       log_amp = (1.0-f)*log10f(model->A[m] + 1E-6) + 
+                  f*log10f(model->A[m+1] + 1E-6);
      }
  
      return log_amp;
@@ -195,7 +195,7 @@ void interpolate_lsp(
  
      /* Interpolate LPC energy in log domain */
  
-    e = pow(10.0, (log10(prev_e) + log10(next_e))/2.0);
+    e = powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
      //printf("  interp: e: %f\n", e);
  
      /* convert back to amplitudes */
@@ -280,7 +280,7 @@ void interp_Wo2(
  
  float interp_energy(float prev_e, float next_e)
  {
-    return pow(10.0, (log10(prev_e) + log10(next_e))/2.0);
+    return powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
   
  }
  
@@ -298,7 +298,7 @@ float interp_energy(float prev_e, float next_e)
  
  float interp_energy2(float prev_e, float next_e, float weight)
  {
-    return pow(10.0, (1.0 - weight)*log10(prev_e) + weight*log10(next_e));
+    return powf(10.0, (1.0 - weight)*log10f(prev_e) + weight*log10f(next_e));
   
  }
  
diff --git a/codec2-dev/src/lpc.c b/codec2-dev/src/lpc.c

index a253289a4631024844da7eb3719dbbb0a27981c7..d03cb1782f9451b29539cc442c3a34425addf2da 100644 (file)
--- a/codec2-dev/src/lpc.c
+++ b/codec2-dev/src/lpc.c
@@ -107,7 +107,7 @@ void hanning_window(
    int i;       /* loop variable */
  
    for(i=0; i<Nsam; i++)
-    Wn[i] = Sn[i]*(0.5 - 0.5*cos(2*PI*(float)i/(Nsam-1)));
+    Wn[i] = Sn[i]*(0.5 - 0.5*cosf(2*PI*(float)i/(Nsam-1)));
  }
  
  /*---------------------------------------------------------------------------*\
@@ -169,7 +169,7 @@ void levinson_durbin(
      for(j=1; j<=i-1; j++)
        sum += a[i-1][j]*R[i-j];
      k[i] = -1.0*(R[i] + sum)/E[i-1];   /* Equation 38b, Makhoul */
-    if (fabs(k[i]) > 1.0)
+    if (fabsf(k[i]) > 1.0)
        k[i] = 0.0;
  
      a[i][i] = k[i];
@@ -304,6 +304,6 @@ void weight(
    int i;
    
    for(i=1; i<=order; i++)
-    akw[i] = ak[i]*pow(gamma,(float)i);
+    akw[i] = ak[i]*powf(gamma,(float)i);
  }
      
diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c

index 14c5600a3bd64b562bf1708e76077b34d0477500..5fb83587780938c4d8e388ba8f85568217f5bad6 100644 (file)
--- a/codec2-dev/src/nlp.c
+++ b/codec2-dev/src/nlp.c
@@ -152,7 +152,7 @@ int    m                    /* analysis window size */
  
      nlp->m = m;
      for(i=0; i<m/DEC; i++) {
-       nlp->w[i] = 0.5 - 0.5*cos(2*PI*i/(m/DEC-1));
+       nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
      }
  
      for(i=0; i<PMAX_M; i++)
diff --git a/codec2-dev/src/phase.c b/codec2-dev/src/phase.c

index 5db0ea2f0ef1986f1c71ae0504c5ec19d03ac8d9..e1476862f9072f68d2295ec05bb7a43576b84f62 100644 (file)
--- a/codec2-dev/src/phase.c
+++ b/codec2-dev/src/phase.c
@@ -216,7 +216,7 @@ void phase_synth_zero_order(
    */
    
    ex_phase[0] += (model->Wo)*N;
-  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+  ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
  
    for(m=1; m<=model->L; m++) {
        
diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c

index 2e49381418733003f38a3f9acf937efb0a7d10c4..088070cfecb5636e3e54627dce581acafcfc9be7 100644 (file)
--- a/codec2-dev/src/quantise.c
+++ b/codec2-dev/src/quantise.c
@@ -119,7 +119,7 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se)
         e = 0.0;
         for(i=0; i<k; i++) {
             diff = cb[j*k+i]-vec[i];
-           e += pow(diff*w[i],2.0);
+           e += powf(diff*w[i],2.0);
         }
         if (e < beste) {
             beste = e;
@@ -1014,7 +1014,7 @@ void aks_to_M2(
  
        model->A[m] = Am;
    }
-  *snr = 10.0*log10(signal/noise);
+  *snr = 10.0*log10f(signal/noise);
  
    TIMER_SAMPLE_AND_LOG2(tpf, "      rec"); 
  }
@@ -1037,7 +1037,7 @@ int encode_Wo(float Wo)
      float norm;
  
      norm = (Wo - Wo_min)/(Wo_max - Wo_min);
-    index = floor(WO_LEVELS * norm + 0.5);
+    index = floorf(WO_LEVELS * norm + 0.5);
      if (index < 0 ) index = 0;
      if (index > (WO_LEVELS-1)) index = WO_LEVELS-1;
  
@@ -1196,7 +1196,7 @@ float speech_to_uq_lsps(float lsp[],
      */
  
      for(i=0; i<=order; i++)
-       ak[i] *= pow(0.994,(float)i);
+       ak[i] *= powf(0.994,(float)i);
  
      roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
      if (roots != order) {
@@ -1634,7 +1634,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
      step = 25;
      for(i=0; i<2; i++) {
         lsp_hz = lsps[i]*4000.0/PI;
-       lsp_hz = floor(lsp_hz/step + 0.5)*step;
+       lsp_hz = floorf(lsp_hz/step + 0.5)*step;
         lsps[i] = lsp_hz*PI/4000.0;
         if (i) {
             if (lsps[i] == lsps[i-1])
@@ -1648,7 +1648,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
      step = 50;
      for(i=2; i<4; i++) {
         lsp_hz = lsps[i]*4000.0/PI;
-       lsp_hz = floor(lsp_hz/step + 0.5)*step;
+       lsp_hz = floorf(lsp_hz/step + 0.5)*step;
         lsps[i] = lsp_hz*PI/4000.0;
         if (i) {
             if (lsps[i] == lsps[i-1])
@@ -1662,7 +1662,7 @@ void locate_lsps_jnd_steps(float lsps[], int order)
      step = 100;
      for(i=4; i<10; i++) {
         lsp_hz = lsps[i]*4000.0/PI;
-       lsp_hz = floor(lsp_hz/step + 0.5)*step;
+       lsp_hz = floorf(lsp_hz/step + 0.5)*step;
         lsps[i] = lsp_hz*PI/4000.0;
         if (i) {
             if (lsps[i] == lsps[i-1])
@@ -1708,9 +1708,9 @@ int encode_energy(float e)
      float e_max = E_MAX_DB;
      float norm;
  
-    e = 10.0*log10(e);
+    e = 10.0*log10f(e);
      norm = (e - e_min)/(e_max - e_min);
-    index = floor(E_LEVELS * norm + 0.5);
+    index = floorf(E_LEVELS * norm + 0.5);
      if (index < 0 ) index = 0;
      if (index > (E_LEVELS-1)) index = E_LEVELS-1;
  
@@ -1736,7 +1736,7 @@ float decode_energy(int index)
  
      step = (e_max - e_min)/E_LEVELS;
      e    = e_min + step*(index);
-    e    = pow(10.0,e/10.0);
+    e    = powf(10.0,e/10.0);
  
      return e;
  }
@@ -1792,11 +1792,11 @@ void compute_weights2(const float *x, const float *xp, float *w, int ndim)
       w[1] *= .3;
    }
    /* Higher weight if pitch is stable */
-  if (fabs(x[0]-xp[0])<.2)
+  if (fabsf(x[0]-xp[0])<.2)
    {
       w[0] *= 2;
       w[1] *= 1.5;
-  } else if (fabs(x[0]-xp[0])>.5) /* Lower if not stable */
+  } else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */
    {
       w[0] *= .5;
    }
@@ -1854,8 +1854,8 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
    float Wo_min = TWO_PI/P_MAX;
    float Wo_max = TWO_PI/P_MIN;
  
-  x[0] = log10((model->Wo/PI)*4000.0/50.0)/log10(2);
-  x[1] = 10.0*log10(1e-4 + *e);
+  x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+  x[1] = 10.0*log10f(1e-4 + *e);
  
    compute_weights2(x, xq, w, ndim);
    for (i=0;i<ndim;i++)
@@ -1874,7 +1874,7 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
      Wo = (2^x)*(PI*50)/4000;
    */
    
-  model->Wo = pow(2.0, xq[0])*(PI*50.0)/4000.0;
+  model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
  
    /* bit errors can make us go out of range leading to all sorts of
       probs like seg faults */
@@ -1884,7 +1884,7 @@ void quantise_WoE(MODEL *model, float *e, float xq[])
  
    model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
  
-  *e = pow(10.0, xq[1]/10.0);
+  *e = powf(10.0, xq[1]/10.0);
  }
  
  /*---------------------------------------------------------------------------*\
@@ -1912,8 +1912,8 @@ int encode_WoE(MODEL *model, float e, float xq[])
  
    if (e < 0.0) e = 0;  /* occasional small negative energies due LPC round off I guess */
  
-  x[0] = log10((model->Wo/PI)*4000.0/50.0)/log10(2);
-  x[1] = 10.0*log10(1e-4 + e);
+  x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+  x[1] = 10.0*log10f(1e-4 + e);
  
    compute_weights2(x, xq, w, ndim);
    for (i=0;i<ndim;i++)
@@ -1959,7 +1959,7 @@ void decode_WoE(MODEL *model, float *e, float xq[], int n1)
    }
  
    //printf("dec: %f %f\n", xq[0], xq[1]);
-  model->Wo = pow(2.0, xq[0])*(PI*50.0)/4000.0;
+  model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
  
    /* bit errors can make us go out of range leading to all sorts of
       probs like seg faults */
@@ -1969,6 +1969,6 @@ void decode_WoE(MODEL *model, float *e, float xq[], int n1)
  
    model->L  = PI/model->Wo; /* if we quantise Wo re-compute L */
  
-  *e = pow(10.0, xq[1]/10.0);
+  *e = powf(10.0, xq[1]/10.0);
  }
  
diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c

index 9e01c95a51fdcfadbd4d840e4104136f0cc434ff..254a61ec7fc8e7e4fb8a02b0e34b708491690bb3 100644 (file)
--- a/codec2-dev/src/sine.c
+++ b/codec2-dev/src/sine.c
@@ -88,7 +88,7 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[])
    for(i=0; i<M/2-NW/2; i++)
      w[i] = 0.0;
    for(i=M/2-NW/2,j=0; i<M/2+NW/2; i++,j++) {
-    w[i] = 0.5 - 0.5*cos(TWO_PI*j/(NW-1));
+    w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(NW-1));
      m += w[i]*w[i];
    }
    for(i=M/2+NW/2; i<M; i++)
@@ -97,7 +97,7 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[])
    /* Normalise - makes freq domain amplitude estimation straight
       forward */
  
-  m = 1.0/sqrt(m*FFT_ENC);
+  m = 1.0/sqrtf(m*FFT_ENC);
    for(i=0; i<M; i++) {
      w[i] *= m;
    }
@@ -452,7 +452,7 @@ float est_voicing_mbe(
         }
      }
      
-    snr = 10.0*log10(sig/error);
+    snr = 10.0*log10f(sig/error);
      if (snr > V_THRESH)
         model->voiced = 1;
      else
@@ -474,7 +474,7 @@ float est_voicing_mbe(
      for(l=model->L/2; l<=model->L; l++) {
         ehigh += model->A[l]*model->A[l];
      }
-    eratio = 10.0*log10(elow/ehigh);
+    eratio = 10.0*log10f(elow/ehigh);
  
      /* Look for Type 1 errors, strongly V speech that has been
         accidentally declared UV */
diff --git a/codec2-dev/stm32/Makefile b/codec2-dev/stm32/Makefile

index 110749e1e3f5ef43d7826708daacae97ffdc7ca1..62dd061e98e68fec75b8a1b8817a07304d77458f 100644 (file)
--- a/codec2-dev/stm32/Makefile
+++ b/codec2-dev/stm32/Makefile
@@ -18,8 +18,8 @@ CFLAGS  = -std=gnu99 -g -O2 -Wall -Tstm32_flash.ld -DSTM32F4XX -DCORTEX_M4
  CFLAGS += -mlittle-endian -mthumb -mthumb-interwork -nostartfiles -mcpu=cortex-m4
  
  ifeq ($(FLOAT_TYPE), hard)
-#CFLAGS += -fsingle-precision-constant -Wdouble-promotion
-CFLAGS += -fsingle-precision-constant
+CFLAGS += -fsingle-precision-constant -Wdouble-promotion
+#CFLAGS += -fsingle-precision-constant
  CFLAGS += -mfpu=fpv4-sp-d16 -mfloat-abi=hard -D__FPU_PRESENT=1 -D__FPU_USED=1
  else
  CFLAGS += -msoft-float
@@ -110,7 +110,7 @@ OBJS = $(SRCS:.c=.o)
  
  ###################################################
  
-all: libstm32f4.a $(PROJ_NAME).elf fft_test.elf dactest.elf
+all: libstm32f4.a $(PROJ_NAME).elf fft_test.elf ut_dac.elf
  
  dl/$(PERIPHLIBZIP):
         mkdir -p dl
@@ -134,14 +134,14 @@ fft_test.elf: $(FFT_TEST_SRCS)
  
  DAC_TEST=$(PERIPHLIBDIR)/Project/STM32F4xx_StdPeriph_Examples/DAC/DAC_SignalsGeneration
  DAC_TEST_SRCS=\
-$(DAC_TEST)/main.c \
+src/ut_dac.c \
  $(DAC_TEST)/stm32f4xx_it.c \
  $(DAC_TEST)/system_stm32f4xx.c \
  $(PERIPHLIBDIR)/Utilities/STM32_EVAL/STM3240_41_G_EVAL/stm324xg_eval.c \
  src/startup_stm32f4xx.s \
  src/init.c
  
-dactest.elf: $(DAC_TEST_SRCS)
+ut_dac.elf: $(DAC_TEST_SRCS)
         $(CC) $(CFLAGS) -DUSE_STM324xG_EVAL -I$(PERIPHLIBDIR)/Utilities/STM32_EVAL/STM3240_41_G_EVAL -I$(PERIPHLIBDIR)/Utilities/STM32_EVAL/Common $^ -o $@ $(LIBPATHS) $(LIBS)
  
  clean:
diff --git a/codec2-dev/stm32/src/stm32f4_timer.c b/codec2-dev/stm32/src/stm32f4_timer.c

index 4a0614c1d480072bca88f9b0e6660b00a7244232..609f9fbc207211cc98ea2dc97fa44643ea75adc0 100644 (file)
--- a/codec2-dev/stm32/src/stm32f4_timer.c
+++ b/codec2-dev/stm32/src/stm32f4_timer.c
@@ -1,3 +1,4 @@
+
  /*---------------------------------------------------------------------------*\
  
    FILE........: stm32f4_timer.c
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Fri, 24 May 2013 10:42:26 +0000 (10:42 +0000)
codec2-dev/src/interp.c		patch \| blob \| history
codec2-dev/src/lpc.c		patch \| blob \| history
codec2-dev/src/nlp.c		patch \| blob \| history
codec2-dev/src/phase.c		patch \| blob \| history
codec2-dev/src/quantise.c		patch \| blob \| history
codec2-dev/src/sine.c		patch \| blob \| history
codec2-dev/stm32/Makefile		patch \| blob \| history
codec2-dev/stm32/src/stm32f4_timer.c		patch \| blob \| history