From 6b4201f17a97ec76cf0a4b022959390c8c08a489 Mon Sep 17 00:00:00 2001
From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Tue, 21 May 2013 11:05:04 +0000
Subject: [PATCH] profiling code and mods for faster execution as part of
 stm32f4 port

git-svn-id: https://svn.code.sf.net/p/freetel/code@1255 01035d8c-6547-0410-b346-abe4f91aad63
---
 codec2-dev/src/c2demo.c     |  16 ++-
 codec2-dev/src/c2sim.c      |  16 ++-
 codec2-dev/src/codec2.c     |  55 +++++++--
 codec2-dev/src/dump.c       |  50 +++++---
 codec2-dev/src/dump.h       |   4 +
 codec2-dev/src/machdep.h    |  51 ++++++++
 codec2-dev/src/nlp.c        |  62 ++++++++--
 codec2-dev/src/nlp.h        |   4 +-
 codec2-dev/src/phase.c      |  62 ++++------
 codec2-dev/src/postfilter.c |  12 +-
 codec2-dev/src/quantise.c   | 238 ++++++++++++------------------------
 codec2-dev/src/sine.c       |  80 ++++++------
 codec2-dev/src/sine.h       |   5 +-
 13 files changed, 364 insertions(+), 291 deletions(-)
 create mode 100644 codec2-dev/src/machdep.h

diff --git a/codec2-dev/src/c2demo.c b/codec2-dev/src/c2demo.c
index 3b674140..042ec62a 100644
--- a/codec2-dev/src/c2demo.c
+++ b/codec2-dev/src/c2demo.c
@@ -32,14 +32,13 @@
 */
 
 #include "codec2.h"
+#include "sine.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 
-#define BITS_SIZE	((CODEC2_BITS_PER_FRAME + 7) / 8)
-
 int main(int argc, char *argv[])
 {
     struct CODEC2 *codec2;
@@ -47,7 +46,12 @@ int main(int argc, char *argv[])
     FILE          *fout;
     short         *buf;
     unsigned char *bits;
-    int            nsam, nbit;
+    int            nsam, nbit, i, r;
+
+    for(i=0; i<10; i++) {
+        r = codec2_rand();
+        printf("[%d] r = %d\n", i, r);
+    }
 
     if (argc != 3) {
 	printf("usage: %s InputRawSpeechFile OutputRawSpeechFile\n", argv[0]);
@@ -66,10 +70,14 @@ int main(int argc, char *argv[])
 	exit(1);
     }
 
+    #ifdef DUMP
+    dump_on("c2demo");
+    #endif
+
     /* Note only one set of Codec 2 states is required for an encoder
        and decoder pair. */
 
-    codec2 = codec2_create(CODEC2_MODE_1400);
+    codec2 = codec2_create(CODEC2_MODE_1300);
     nsam = codec2_samples_per_frame(codec2);
     buf = (short*)malloc(nsam*sizeof(short));
     nbit = codec2_bits_per_frame(codec2);
diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c
index d47f0bbd..92f13878 100644
--- a/codec2-dev/src/c2sim.c
+++ b/codec2-dev/src/c2sim.c
@@ -140,10 +140,14 @@ int main(int argc, char *argv[])
         { "lspd", no_argument, &lspd, 1 },
         { "lspvq", no_argument, &lspvq, 1 },
         { "lspres", no_argument, &lspres, 1 },
+        #ifdef __EXPERIMENTAL__
         { "lspdt", no_argument, &lspdt, 1 },
         { "lspdt_mode", required_argument, NULL, 0 },
+        #endif
         { "lspjvm", no_argument, &lspjvm, 1 },
+        #ifdef __EXPERIMENTAL__
         { "lspanssi", no_argument, &lspanssi, 1 },
+        #endif
         { "phase0", no_argument, &phase0, 1 },
         { "phaseexp", required_argument, &phaseexp, 1 },
         { "ampexp", required_argument, &ampexp, 1 },
@@ -194,7 +198,7 @@ int main(int argc, char *argv[])
     e = prev_e = 1;
     hpf_states[0] = hpf_states[1] = 0.0;
 
-    nlp_states = nlp_create();
+    nlp_states = nlp_create(M);
 
     if (argc < 2) {
         print_help(long_options, num_opts, argv);
@@ -380,12 +384,12 @@ int main(int argc, char *argv[])
 
 	\*------------------------------------------------------------*/
 
-	nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
+	nlp(nlp_states,Sn,N,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
 	model.Wo = TWO_PI/pitch;
 
 	dft_speech(fft_fwd_cfg, Sw, Sn, w);
 	two_stage_pitch_refinement(&model, Sw);
-	estimate_amplitudes(&model, Sw, W);
+	estimate_amplitudes(&model, Sw, W, 1);
 	uq_Wo = model.Wo;
  
         #ifdef DUMP 
@@ -525,11 +529,13 @@ int main(int argc, char *argv[])
 		lsp_to_lpc(lsps_, ak, LPC_ORD);
 	    }
 
+#ifdef __EXPERIMENTAL__
 	    if (lspvq) {
 		lspvq_quantise(lsps, lsps_, LPC_ORD);
 		bw_expand_lsps(lsps_, LPC_ORD);
 		lsp_to_lpc(lsps_, ak, LPC_ORD);
 	    }
+#endif
 
 	    if (lspjvm) {
 		/* Jean-Marc's multi-stage, split VQ */
@@ -542,6 +548,7 @@ int main(int argc, char *argv[])
 		}
 	    }
 
+#ifdef __EXPERIMENTAL__
 	    if (lspanssi) {
 		/*  multi-stage VQ from Anssi Ramo OH3GDD */
 
@@ -549,6 +556,7 @@ int main(int argc, char *argv[])
 		bw_expand_lsps(lsps_, LPC_ORD);			    
 		lsp_to_lpc(lsps_, ak, LPC_ORD);
 	    }
+#endif
 
 	    /* experimenting with non-linear LSP spacing to see if
 	       it's just noticable */
@@ -607,6 +615,7 @@ int main(int argc, char *argv[])
 	    /* Odd frames are generated by quantising the difference
 	       between the previous frames LSPs and this frames */
 	
+#ifdef __EXPERIMENTAL__
 	    if (lspdt && !decimate) {
 		if (frames%2) {
 		    lspdt_quantise(lsps, lsps_, lsps__prev, lspdt_mode);
@@ -616,6 +625,7 @@ int main(int argc, char *argv[])
 		for(i=0; i<LPC_ORD; i++)
 		    lsps__prev[i] = lsps_[i];		
 	    }
+#endif
 
 	    /* 
 	       When decimation is enabled we only send LSPs to the
diff --git a/codec2-dev/src/codec2.c b/codec2-dev/src/codec2.c
index db9377db..3d54ad9b 100644
--- a/codec2-dev/src/codec2.c
+++ b/codec2-dev/src/codec2.c
@@ -44,6 +44,7 @@
 #include "codec2.h"
 #include "lsp.h"
 #include "codec2_internal.h"
+#include "machdep.h"
 
 /*---------------------------------------------------------------------------*\
                                                        
@@ -131,7 +132,7 @@ struct CODEC2 * CODEC2_WIN32SUPPORT codec2_create(int mode)
     }
     c2->prev_e_dec = 1;
 
-    c2->nlp = nlp_create();
+    c2->nlp = nlp_create(M);
     if (c2->nlp == NULL) {
 	free (c2);
 	return NULL;
@@ -335,7 +336,6 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[])
     for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
 	pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
     }
-
     assert(nbit == (unsigned)codec2_bits_per_frame(c2));
 }
 
@@ -967,7 +967,10 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
     int     Wo_index, e_index;
     int     i;
     unsigned int nbit = 0;
-    
+    #ifdef TIMER
+    unsigned int quant_start;
+    #endif
+
     assert(c2 != NULL);
 
     memset(bits, '\0',  ((codec2_bits_per_frame(c2) + 7) / 8));
@@ -995,6 +998,9 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
     Wo_index = encode_Wo(model.Wo);
     pack(bits, &nbit, Wo_index, WO_BITS);
 
+    #ifdef TIMER
+    quant_start = machdep_timer_sample();
+    #endif
     e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
     e_index = encode_energy(e);
     pack(bits, &nbit, e_index, E_BITS);
@@ -1003,6 +1009,9 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
     for(i=0; i<LSP_SCALAR_INDEXES; i++) {
 	pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
     }
+    #ifdef TIMER
+    machdep_timer_sample_and_log(quant_start, "    quant/packing"); 
+    #endif
 
     assert(nbit == (unsigned)codec2_bits_per_frame(c2));
 }
@@ -1030,6 +1039,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char *
     int     i,j;
     unsigned int nbit = 0;
     float   weight;
+    TIMER_VAR(recover_start);
     
     assert(c2 != NULL);
 
@@ -1068,6 +1078,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char *
     /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
        the 3 frames in between */
 
+    TIMER_SAMPLE(recover_start);
     for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
 	interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
         interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
@@ -1082,6 +1093,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char *
                   c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma); 
 	apply_lpc_correction(&model[i]);
     }
+    TIMER_SAMPLE_AND_LOG2(recover_start, "    recover"); 
 
     /* synthesise ------------------------------------------------*/
 
@@ -1288,10 +1300,26 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char *
 void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, float ak[])
 {
     int     i;
+    TIMER_VAR(phase_start, pf_start, synth_start);
+
+    #ifdef DUMP
+    dump_quantised_model(model);
+    #endif
+
+    TIMER_SAMPLE(phase_start);
 
     phase_synth_zero_order(c2->fft_fwd_cfg, model, ak, &c2->ex_phase, LPC_ORD);
+
+    TIMER_SAMPLE_AND_LOG(pf_start,phase_start, "    phase_synth"); 
+
     postfilter(model, &c2->bg_est);
+
+    TIMER_SAMPLE_AND_LOG(synth_start, pf_start, "    postfilter"); 
+
     synthesise(c2->fft_inv_cfg, c2->Sn_, model, c2->Pn, 1);
+
+    TIMER_SAMPLE_AND_LOG2(synth_start, "    synth"); 
+
     ear_protection(c2->Sn_, N);
 
     for(i=0; i<N; i++) {
@@ -1321,8 +1349,9 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
     COMP    Sw[FFT_ENC];
     COMP    Sw_[FFT_ENC];
     COMP    Ew[FFT_ENC];
-    float   pitch, snr;
+    float   pitch;
     int     i;
+    TIMER_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
 
     /* Read input speech */
 
@@ -1331,22 +1360,30 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
     for(i=0; i<N; i++)
       c2->Sn[i+M-N] = speech[i];
 
+    TIMER_SAMPLE(dft_start);
     dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
+    TIMER_SAMPLE_AND_LOG(nlp_start, dft_start, "    dft_speech");
 
     /* Estimate pitch */
 
-    nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+    nlp(c2->nlp,c2->Sn,N,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+    TIMER_SAMPLE_AND_LOG(model_start, nlp_start, "    nlp"); 
+
     model->Wo = TWO_PI/pitch;
     model->L = PI/model->Wo;
 
     /* estimate model parameters */
 
     two_stage_pitch_refinement(model, Sw);
-    estimate_amplitudes(model, Sw, c2->W);
-    snr = est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc);
-    //fprintf(stderr,"snr %3.2f  v: %d  Wo: %f prev_Wo: %f\n", 
-    //	   snr, model->voiced, model->Wo, c2->prev_Wo_enc);
+    TIMER_SAMPLE_AND_LOG(two_stage, model_start, "    two_stage"); 
+    estimate_amplitudes(model, Sw, c2->W, 0);
+    TIMER_SAMPLE_AND_LOG(estamps, two_stage, "    est_amps"); 
+    est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc);
     c2->prev_Wo_enc = model->Wo;
+    TIMER_SAMPLE_AND_LOG2(estamps, "    est_voicing"); 
+    #ifdef DUMP
+    dump_model(model);
+    #endif
 }
 
 /*---------------------------------------------------------------------------*\
diff --git a/codec2-dev/src/dump.c b/codec2-dev/src/dump.c
index b414c794..db4b1cc9 100644
--- a/codec2-dev/src/dump.c
+++ b/codec2-dev/src/dump.c
@@ -32,6 +32,13 @@
 #include <string.h>
 #include <math.h>
 
+#ifdef __EMBEDDED__
+#include "gdb_stdio.h"
+#define fprintf gdb_stdio_fprintf
+#define fopen gdb_stdio_fopen
+#define fclose gdb_stdio_fclose
+#endif
+
 #ifdef DUMP
 static int dumpon = 0;
 
@@ -204,6 +211,7 @@ void dump_Ew(COMP Ew[]) {
 void dump_model(MODEL *model) {
     int l;
     char s[MAX_STR];
+    char line[2048];
 
     if (!dumpon) return;
 
@@ -213,18 +221,25 @@ void dump_model(MODEL *model) {
 	assert(fmodel != NULL);
     }
 
-    fprintf(fmodel,"%f\t%d\t", model->Wo, model->L);    
-    for(l=1; l<=model->L; l++)
-	fprintf(fmodel,"%f\t",model->A[l]);
-    for(l=model->L+1; l<MAX_AMP; l++)
-	fprintf(fmodel,"0.0\t");
-    fprintf(fmodel,"%d\t",model->voiced);
-    fprintf(fmodel,"\n");    
+    sprintf(line,"%12f %12d ", model->Wo, model->L);    
+    for(l=1; l<=model->L; l++) {
+	sprintf(s,"%12f ",model->A[l]);
+        strcat(line, s);
+    }
+    for(l=model->L+1; l<=MAX_AMP; l++) {
+	sprintf(s,"%12f ", 0.0);
+        strcat(line,s);
+    }
+        
+    sprintf(s,"%d\n",model->voiced);
+    strcat(line,s);
+    fprintf(fmodel,"%s",line);    
 }
 
 void dump_quantised_model(MODEL *model) {
     int l;
     char s[MAX_STR];
+    char line[2048];
 
     if (!dumpon) return;
 
@@ -234,12 +249,19 @@ void dump_quantised_model(MODEL *model) {
 	assert(fqmodel != NULL);
     }
 
-    fprintf(fqmodel,"%f\t%d\t", model->Wo, model->L);    
-    for(l=1; l<=model->L; l++)
-	fprintf(fqmodel,"%f\t",model->A[l]);
-    for(l=model->L+1; l<MAX_AMP; l++)
-	fprintf(fqmodel,"0.0\t");
-    fprintf(fqmodel,"\n");    
+    sprintf(line,"%12f %12d ", model->Wo, model->L);    
+    for(l=1; l<=model->L; l++) {
+	sprintf(s,"%12f ",model->A[l]);
+        strcat(line, s);
+    }
+    for(l=model->L+1; l<=MAX_AMP; l++) {
+	sprintf(s,"%12f ", 0.0);
+        strcat(line, s);
+    }
+        
+    sprintf(s,"%d\n",model->voiced);
+    strcat(line, s);
+    fprintf(fqmodel, "%s", line);    
 }
 
 void dump_phase(float phase[], int L) {
@@ -256,7 +278,7 @@ void dump_phase(float phase[], int L) {
 
     for(l=1; l<=L; l++)
 	fprintf(fphase,"%f\t",phase[l]);
-    for(l=L+1; l<MAX_AMP; l++)
+    for(l=L+1; l<=MAX_AMP; l++)
 	fprintf(fphase,"%f\t",0.0);
     fprintf(fphase,"\n");    
 }
diff --git a/codec2-dev/src/dump.h b/codec2-dev/src/dump.h
index a8ccbc4c..3f26b266 100644
--- a/codec2-dev/src/dump.h
+++ b/codec2-dev/src/dump.h
@@ -26,7 +26,10 @@
 #ifndef __DUMP__
 #define __DUMP__
 
+#include "defines.h"
 #include "comp.h"
+#include "kiss_fft.h"
+#include "codec2_internal.h"
 
 void dump_on(char filename_prefix[]);
 void dump_off();
@@ -69,5 +72,6 @@ void dump_Rk(float Rk[]);
 /* post filter */
 
 void dump_bg(float e, float bg_est, float percent_uv);
+void dump_Pwb(COMP Pwb[]);
 
 #endif
diff --git a/codec2-dev/src/machdep.h b/codec2-dev/src/machdep.h
new file mode 100644
index 00000000..ef2e6494
--- /dev/null
+++ b/codec2-dev/src/machdep.h
@@ -0,0 +1,51 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: machdep.h
+  AUTHOR......: David Rowe
+  DATE CREATED: May 2 2013
+
+  Machine dependant functions.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2013 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __MACHDEP__
+#define __MACHDEP__
+
+#ifdef TIMER
+#define TIMER_VAR(...) unsigned int __VA_ARGS__
+#define TIMER_SAMPLE(timestamp) timestamp = machdep_timer_sample()
+#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label) \
+    timestamp = machdep_timer_sample_and_log(prev_timestamp, label)
+#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label) \
+    machdep_timer_sample_and_log(prev_timestamp, label)
+#else
+#define TIMER_VAR(...)
+#define TIMER_SAMPLE(timestamp)
+#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label)
+#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label)
+#endif
+
+void         machdep_timer_init(void);
+void         machdep_timer_reset(void);
+unsigned int machdep_timer_sample(void);
+unsigned int machdep_timer_sample_and_log(unsigned int start, char s[]);
+void         machdep_timer_print_logged_samples(void);
+
+#endif
diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c
index 3214578e..14c5600a 100644
--- a/codec2-dev/src/nlp.c
+++ b/codec2-dev/src/nlp.c
@@ -29,6 +29,8 @@
 #include "nlp.h"
 #include "dump.h"
 #include "kiss_fft.h"
+#undef TIMER
+#include "machdep.h"
 
 #include <assert.h>
 #include <math.h>
@@ -51,6 +53,8 @@
 #define CNLP        0.3	        /* post processor constant              */
 #define NLP_NTAP 48	        /* Decimation LPF order */
 
+#undef DUMP
+
 /*---------------------------------------------------------------------------*\
                                                                             
  				GLOBALS                                       
@@ -111,6 +115,8 @@ const float nlp_fir[] = {
 };
 
 typedef struct {
+    int           m;
+    float         w[PMAX_M/DEC];     /* DFT window                   */ 
     float         sq[PMAX_M];	     /* squared speech samples       */
     float         mem_x,mem_y;       /* memory for notch filter      */
     float         mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
@@ -131,15 +137,24 @@ float post_process_sub_multiples(COMP Fw[],
 
 \*---------------------------------------------------------------------------*/
 
-void *nlp_create()
+void *nlp_create(
+int    m			/* analysis window size */
+)
 {
     NLP *nlp;
     int  i;
 
+    assert(m <= PMAX_M);
+
     nlp = (NLP*)malloc(sizeof(NLP));
     if (nlp == NULL)
 	return NULL;
 
+    nlp->m = m;
+    for(i=0; i<m/DEC; i++) {
+	nlp->w[i] = 0.5 - 0.5*cos(2*PI*i/(m/DEC-1));
+    }
+
     for(i=0; i<PMAX_M; i++)
 	nlp->sq[i] = 0.0;
     nlp->mem_x = 0.0;
@@ -205,7 +220,6 @@ float nlp(
   void *nlp_state, 
   float  Sn[],			/* input speech vector */
   int    n,			/* frames shift (no. new samples in Sn[]) */
-  int    m,			/* analysis window size */
   int    pmin,                  /* minimum pitch value */
   int    pmax,			/* maximum pitch value */
   float *pitch,			/* estimated pitch period in samples */
@@ -220,12 +234,15 @@ float nlp(
     COMP   Fw[PE_FFT_SIZE];	    /* DFT of squared signal (output) */
     float  gmax;
     int    gmax_bin;
-    int   i,j;
-    float best_f0;
-
+    int    m, i,j;
+    float  best_f0;
+    TIMER_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
+    
     assert(nlp_state != NULL);
-    assert(m <= PMAX_M);
     nlp = (NLP*)nlp_state;
+    m = nlp->m;
+
+    TIMER_SAMPLE(start);
 
     /* Square, notch filter at DC, and LP filter vector */
 
@@ -247,6 +264,8 @@ float nlp(
 				      exactly sure why. */
     }
 
+    TIMER_SAMPLE_AND_LOG(tnotch, start, "      square and notch");
+
     for(i=m-n; i<m; i++) {	/* FIR filter vector */
 
 	for(j=0; j<NLP_NTAP-1; j++)
@@ -258,6 +277,8 @@ float nlp(
 	    nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
     }
 
+    TIMER_SAMPLE_AND_LOG(filter, tnotch, "      filter");
+ 
     /* Decimate and DFT */
 
     for(i=0; i<PE_FFT_SIZE; i++) {
@@ -265,16 +286,20 @@ float nlp(
 	fw[i].imag = 0.0;
     }
     for(i=0; i<m/DEC; i++) {
-	fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+	fw[i].real = nlp->sq[i*DEC]*nlp->w[i];
     }
+    TIMER_SAMPLE_AND_LOG(window, filter, "      window");
     #ifdef DUMP
     dump_dec(Fw);
     #endif
 
     kiss_fft(nlp->fft_cfg, (kiss_fft_cpx *)fw, (kiss_fft_cpx *)Fw);
+    TIMER_SAMPLE_AND_LOG(fft, window, "      fft");
+
     for(i=0; i<PE_FFT_SIZE; i++)
 	Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
 
+    TIMER_SAMPLE_AND_LOG(magsq, fft, "      mag sq");
     #ifdef DUMP
     dump_sq(nlp->sq);
     dump_Fw(Fw);
@@ -291,6 +316,8 @@ float nlp(
 	}
     }
     
+    TIMER_SAMPLE_AND_LOG(peakpick, magsq, "      peak pick");
+
     //#define POST_PROCESS_MBE
     #ifdef POST_PROCESS_MBE
     best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo);
@@ -298,6 +325,8 @@ float nlp(
     best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
     #endif
 
+    TIMER_SAMPLE_AND_LOG(shiftmem, peakpick,  "      post process");
+
     /* Shift samples in buffer to make room for new samples */
 
     for(i=0; i<m-n; i++)
@@ -306,6 +335,11 @@ float nlp(
     /* return pitch and F0 estimate */
 
     *pitch = (float)SAMPLE_RATE/best_f0;
+
+    TIMER_SAMPLE_AND_LOG2(shiftmem,  "      shift mem");
+
+    TIMER_SAMPLE_AND_LOG2(start,  "      nlp int");
+
     return(best_f0);  
 }
 
@@ -338,7 +372,7 @@ float post_process_sub_multiples(COMP Fw[],
     int   mult;
     float thresh, best_f0;
     int   b, bmin, bmax, lmax_bin;
-    float lmax, cmax;
+    float lmax;
     int   prev_f0_bin;
 
     /* post process estimate by searching submultiples */
@@ -374,7 +408,6 @@ float post_process_sub_multiples(COMP Fw[],
 
 	if (lmax > thresh)
 	    if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
-		cmax = lmax;
 		cmax_bin = lmax_bin;
 	    }
 
@@ -404,7 +437,9 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM
   float f0,best_f0;		/* fundamental frequency */
   float e,e_min;                /* MBE cost function */
   int   i;
+  #ifdef DUMP
   float e_hz[F0_MAX];
+  #endif
   int   bin;
   float f0_min, f0_max;
   float f0_start, f0_end;
@@ -415,8 +450,10 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM
   /* Now look for local maxima.  Each local maxima is a candidate
      that we test using the MBE pitch estimation algotithm */
 
+  #ifdef DUMP
   for(i=0; i<F0_MAX; i++)
       e_hz[i] = -1;
+  #endif
   e_min = 1E32;
   best_f0 = 50;
   for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
@@ -437,7 +474,9 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM
 	    for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
 		e = test_candidate_mbe(Sw, W, f0);
 		bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
-		e_hz[bin] = e;
+		#ifdef DUMP
+                e_hz[bin] = e;
+                #endif
 		if (e < e_min) {
 		    e_min = e;
 		    best_f0 = f0;
@@ -460,7 +499,9 @@ float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax, COMP Sw[], COM
   for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
       e = test_candidate_mbe(Sw, W, f0);
       bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
+      #ifdef DUMP
       e_hz[bin] = e;
+      #endif
       if (e < e_min) {
 	  e_min = e;
 	  best_f0 = f0;
@@ -540,4 +581,3 @@ float test_candidate_mbe(
     return error;
 }
 
-
diff --git a/codec2-dev/src/nlp.h b/codec2-dev/src/nlp.h
index 38265959..2d2dada1 100644
--- a/codec2-dev/src/nlp.h
+++ b/codec2-dev/src/nlp.h
@@ -30,9 +30,9 @@
 
 #include "comp.h"
 
-void *nlp_create();
+void *nlp_create(int m);
 void nlp_destroy(void *nlp_state);
-float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax, 
+float nlp(void *nlp_state, float Sn[], int n, int pmin, int pmax, 
 	  float *pitch, COMP Sw[], COMP W[], float *prev_Wo);
 
 #endif
diff --git a/codec2-dev/src/phase.c b/codec2-dev/src/phase.c
index d41303b7..5db0ea2f 100644
--- a/codec2-dev/src/phase.c
+++ b/codec2-dev/src/phase.c
@@ -29,7 +29,7 @@
 #include "phase.h"
 #include "kiss_fft.h"
 #include "comp.h"
-#include "glottal.c"
+#include "sine.h"
 
 #include <assert.h>
 #include <ctype.h>
@@ -37,8 +37,6 @@
 #include <string.h>
 #include <stdlib.h>
 
-#define GLOTTAL_FFT_SIZE 512
-
 /*---------------------------------------------------------------------------*\
 
   aks_to_H()
@@ -84,18 +82,18 @@ void aks_to_H(
   /* Sample magnitude and phase at harmonics */
 
   for(m=1; m<=model->L; m++) {
-    am = floor((m - 0.5)*model->Wo/r + 0.5);
-    bm = floor((m + 0.5)*model->Wo/r + 0.5);
-    b = floor(m*model->Wo/r + 0.5);
-
-    Em = 0.0;
-    for(i=am; i<bm; i++)
-      Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
-    Am = sqrt(fabs(Em/(bm-am)));
-
-    phi_ = -atan2(Pw[b].imag,Pw[b].real);
-    H[m].real = Am*cos(phi_);
-    H[m].imag = Am*sin(phi_);
+      am = (int)((m - 0.5)*model->Wo/r + 0.5);
+      bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+      b = (int)(m*model->Wo/r + 0.5);
+      
+      Em = 0.0;
+      for(i=am; i<bm; i++)
+          Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
+      Am = sqrtf(fabsf(Em/(bm-am)));
+
+      phi_ = -atan2f(Pw[b].imag,Pw[b].real);
+      H[m].real = Am*cosf(phi_);
+      H[m].imag = Am*sinf(phi_);
   }
 }
 
@@ -204,9 +202,6 @@ void phase_synth_zero_order(
   COMP  A_[MAX_AMP+1];		/* synthesised harmonic samples */
   COMP  H[MAX_AMP+1];           /* LPC freq domain samples */
   float G;
-  float jitter = 0.0;
-  float r;
-  int   b;
 
   G = 1.0;
   aks_to_H(fft_fwd_cfg, model, aks, G, H, order);
@@ -222,30 +217,15 @@ void phase_synth_zero_order(
   
   ex_phase[0] += (model->Wo)*N;
   ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
-  r = TWO_PI/GLOTTAL_FFT_SIZE;
 
   for(m=1; m<=model->L; m++) {
       
     /* generate excitation */
 	    
-    if (model->voiced) {
-	//float rnd;
-
-        b = floor(m*model->Wo/r + 0.5);
-	if (b > ((GLOTTAL_FFT_SIZE/2)-1)) {
-		b = (GLOTTAL_FFT_SIZE/2)-1;
-	}
-
-	/* I think adding a little jitter helps improve low pitch
-	   males like hts1a. This moves the onset of each harmonic
-	   over +/- 0.25 of a sample.
-	*/
-	//jitter = 0.25*(1.0 - 2.0*rand()/RAND_MAX);
-	jitter = 0;
-
-	//rnd = (PI/8)*(1.0 - 2.0*rand()/RAND_MAX);
-	Ex[m].real = cos(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/);
-	Ex[m].imag = sin(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/);
+      if (model->voiced) {
+
+	Ex[m].real = cosf(ex_phase[0]*m);
+	Ex[m].imag = sinf(ex_phase[0]*m);
     }
     else {
 
@@ -253,9 +233,9 @@ void phase_synth_zero_order(
 	   phase is not needed in the unvoiced case, but no harm in
 	   keeping it.
         */
-	float phi = TWO_PI*(float)rand()/RAND_MAX;
-        Ex[m].real = cos(phi);
-	Ex[m].imag = sin(phi);
+	float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
+        Ex[m].real = cosf(phi);
+	Ex[m].imag = sinf(phi);
     }
 
     /* filter using LPC filter */
@@ -265,7 +245,7 @@ void phase_synth_zero_order(
 
     /* modify sinusoidal phase */
    
-    new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
+    new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
     model->phi[m] = new_phi;
   }
 
diff --git a/codec2-dev/src/postfilter.c b/codec2-dev/src/postfilter.c
index c78f495b..f347658c 100644
--- a/codec2-dev/src/postfilter.c
+++ b/codec2-dev/src/postfilter.c
@@ -35,6 +35,7 @@
 #include "defines.h"
 #include "comp.h"
 #include "dump.h"
+#include "sine.h"
 #include "postfilter.h"
 
 /*---------------------------------------------------------------------------*\
@@ -103,16 +104,16 @@ void postfilter(
 )	
 {
   int   m, uv;
-  float e;
+  float e, thresh;
 
   /* determine average energy across spectrum */
 
-  e = 0.0;
+  e = 1E-12;
   for(m=1; m<=model->L; m++)
       e += model->A[m]*model->A[m];
 
   assert(e > 0.0);
-  e = 10.0*log10(e/model->L);
+  e = 10.0*log10f(e/model->L);
 
   /* If beneath threhold, update bg estimate.  The idea
      of the threshold is to prevent updating during high level
@@ -126,10 +127,11 @@ void postfilter(
   */
 
   uv = 0;
+  thresh = powf(10.0, (*bg_est + BG_MARGIN)/20.0);
   if (model->voiced)
       for(m=1; m<=model->L; m++)
-	  if (20.0*log10(model->A[m]) < (*bg_est + BG_MARGIN)) {
-	      model->phi[m] = TWO_PI*(float)rand()/RAND_MAX;
+	  if (model->A[m] < thresh) {
+	      model->phi[m] = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
 	      uv++;
 	  }
 
diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c
index 62728b84..2e493814 100644
--- a/codec2-dev/src/quantise.c
+++ b/codec2-dev/src/quantise.c
@@ -37,6 +37,8 @@
 #include "lpc.h"
 #include "lsp.h"
 #include "kiss_fft.h"
+#undef TIMER
+#include "machdep.h"
 
 #define LSP_DELTA1 0.01         /* grid spacing for LSP root searches */
 
@@ -63,9 +65,11 @@ int lspd_bits(int i) {
     return lsp_cbd[i].log2m;
 }
 
+#ifdef __EXPERIMENTAL__
 int lspdt_bits(int i) {
     return lsp_cbdt[i].log2m;
 }
+#endif
 
 int lsp_pred_vq_bits(int i) {
     return lsp_cbjvm[i].log2m;
@@ -223,7 +227,7 @@ void decode_lspds_scalar(
 
 }
 
-
+#ifdef __EXPERIMENTAL__
 /*---------------------------------------------------------------------------*\
 									      
   lspvq_quantise
@@ -393,6 +397,7 @@ void lspdt_quantise(float lsps[], float lsps_[], float lsps__prev[], int mode)
 #endif
 
 }
+#endif
 
 #define MIN(a,b) ((a)<(b)?(a):(b))
 #define MAX_ENTRIES 16384
@@ -524,6 +529,8 @@ void lspjvm_quantise(float *x, float *xq, int ndim)
   }
 }
 
+#ifdef __EXPERIMENTAL__
+
 #define MBEST_STAGES 4
 
 struct MBEST_LIST {
@@ -718,6 +725,7 @@ void lspanssi_quantise(float *x, float *xq, int ndim, int mbest_entries)
   mbest_destroy(mbest_stage3);
   mbest_destroy(mbest_stage4);
 }
+#endif
 
 int check_lsp_order(float lsp[], int lpc_order)
 {
@@ -748,130 +756,6 @@ void force_min_lsp_dist(float lsp[], int lpc_order)
 	}
 }
 
-#ifdef NOT_USED
-/*---------------------------------------------------------------------------*\
-									      
-  lpc_model_amplitudes
-
-  Derive a LPC model for amplitude samples then estimate amplitude samples
-  from this model with optional LSP quantisation.
-
-  Returns the spectral distortion for this frame.
-
-\*---------------------------------------------------------------------------*/
-
-float lpc_model_amplitudes(
-  float  Sn[],			/* Input frame of speech samples */
-  float  w[],			
-  MODEL *model,			/* sinusoidal model parameters */
-  int    order,                 /* LPC model order */
-  int    lsp_quant,             /* optional LSP quantisation if non-zero */
-  float  ak[]                   /* output aks */
-)
-{
-  float Wn[M];
-  float R[LPC_MAX+1];
-  float E;
-  int   i,j;
-  float snr;	
-  float lsp[LPC_MAX];
-  float lsp_hz[LPC_MAX];
-  float lsp_[LPC_MAX];
-  int   roots;                  /* number of LSP roots found */
-  float wt[LPC_MAX];
-
-  for(i=0; i<M; i++)
-    Wn[i] = Sn[i]*w[i];
-  autocorrelate(Wn,R,M,order);
-  levinson_durbin(R,ak,order);
-  
-  E = 0.0;
-  for(i=0; i<=order; i++)
-      E += ak[i]*R[i];
- 
-  for(i=0; i<order; i++)
-      wt[i] = 1.0;
-
-  if (lsp_quant) {
-    roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
-    if (roots != order)
-	printf("LSP roots not found\n");
-
-    /* convert from radians to Hz to make quantisers more
-       human readable */
-
-    for(i=0; i<order; i++)
-	lsp_hz[i] = (4000.0/PI)*lsp[i];
-    
-#ifdef NOT_NOW
-    /* simple uniform scalar quantisers */
-
-    for(i=0; i<10; i++) {
-	k = lsp_cb[i].k;
-	m = lsp_cb[i].m;
-	cb = lsp_cb[i].cb;
-	index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
-	lsp_hz[i] = cb[index*k];
-    }
-#endif
-    
-    /* experiment: simulating uniform quantisation error
-    for(i=0; i<order; i++)
-	lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
-    */
-
-    for(i=0; i<order; i++)
-	lsp[i] = (PI/4000.0)*lsp_hz[i];
-
-    /* Bandwidth Expansion (BW).  Prevents any two LSPs getting too
-       close together after quantisation.  We know from experiment
-       that LSP quantisation errors < 12.5Hz (25Hz step size) are
-       inaudible so we use that as the minimum LSP separation.
-    */
-
-    for(i=1; i<5; i++) {
-	if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
-	    lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
-    }
-
-    /* as quantiser gaps increased, larger BW expansion was required
-       to prevent twinkly noises */
-
-    for(i=5; i<8; i++) {
-	if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
-	    lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
-    }
-    for(i=8; i<order; i++) {
-	if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
-	    lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
-    }
-
-    for(j=0; j<order; j++) 
-	lsp_[j] = lsp[j];
-
-    lsp_to_lpc(lsp_, ak, order);
-#ifdef DUMP
-    dump_lsp(lsp);
-#endif
-  }
-
-#ifdef DUMP
-  dump_E(E);
-#endif
-  #ifdef SIM_QUANT
-  /* simulated LPC energy quantisation */
-  {
-      float e = 10.0*log10(E);
-      e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX);
-      E = pow(10.0,e/10.0);
-  }
-  #endif
-
-  aks_to_M2(ak,order,model,E,&snr, 1, 0, 1);   /* {ak} -> {Am} LPC decode */
-
-  return snr;
-}
-#endif
 
 /*---------------------------------------------------------------------------*\
                                                                          
@@ -913,8 +797,10 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak
     float e_before, e_after, gain;
     float Pfw[FFT_ENC]; /* Post filter mag spectrum     */
     float max_Rw, min_Rw;
-    float range, thresh, r, w;
-    int   m, bin;
+    float coeff;
+    TIMER_VAR(tstart, tfft1, taw, tfft2, tww, tr);
+
+    TIMER_SAMPLE(tstart);
 
     /* Determine LPC inverse filter spectrum 1/A(exp(jw)) -----------*/
 
@@ -932,10 +818,14 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak
 	x[i].real = ak[i];
     kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Aw);
 
+    TIMER_SAMPLE_AND_LOG(tfft1, tstart, "        fft1"); 
+
     for(i=0; i<FFT_ENC/2; i++) {
-	Aw[i].real = 1.0/sqrt(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag);
+	Aw[i].real = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag);
     }
 
+    TIMER_SAMPLE_AND_LOG(taw, tfft1, "        Aw"); 
+
     /* Determine weighting filter spectrum W(exp(jw)) ---------------*/
 
     for(i=0; i<FFT_ENC; i++) {
@@ -943,25 +833,36 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak
 	x[i].imag = 0.0; 
     }
     
-    for(i=0; i<=order; i++)
-	x[i].real = ak[i] * pow(gamma, (float)i);
+    x[0].real = ak[0];
+    coeff = gamma;
+    for(i=1; i<=order; i++) {
+	x[i].real = ak[i] * coeff;
+        coeff *= gamma;
+    }
     kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Ww);
 
+    TIMER_SAMPLE_AND_LOG(tfft2, taw, "        fft2"); 
+
     for(i=0; i<FFT_ENC/2; i++) {
-	Ww[i].real = sqrt(Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag);
+	Ww[i].real = Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag;
     }
 
+    TIMER_SAMPLE_AND_LOG(tww, tfft2, "        Ww"); 
+
     /* Determined combined filter R = WA ---------------------------*/
 
     max_Rw = 0.0; min_Rw = 1E32;
     for(i=0; i<FFT_ENC/2; i++) {
-	Rw[i] = Ww[i].real * Aw[i].real;
+	Rw[i] = sqrtf(Ww[i].real * Aw[i].real);
 	if (Rw[i] > max_Rw)
 	    max_Rw = Rw[i];
 	if (Rw[i] < min_Rw)
 	    min_Rw = Rw[i];
 
     }
+
+    TIMER_SAMPLE_AND_LOG(tr, tww, "        R"); 
+
     #ifdef DUMP
     if (dump)
       dump_Rw(Rw);
@@ -984,7 +885,7 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak
 
     e_after = 1E-4;
     for(i=0; i<FFT_ENC/2; i++) {
-	Pfw[i] = pow(Rw[i], beta);
+	Pfw[i] = powf(Rw[i], beta);
 	Pw[i].real *= Pfw[i] * Pfw[i];
 	e_after += Pw[i].real;
     }
@@ -1003,6 +904,8 @@ void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak
             Pw[i].real *= 1.4*1.4;
         }    
     }
+
+    TIMER_SAMPLE_AND_LOG2(tr, "        filt"); 
 }
 
 
@@ -1039,6 +942,9 @@ void aks_to_M2(
   float Em;		/* energy in band */
   float Am;		/* spectral amplitude sample */
   float signal, noise;
+  TIMER_VAR(tstart, tfft, tpw, tpf);
+
+  TIMER_SAMPLE(tstart);
 
   r = TWO_PI/(FFT_ENC);
 
@@ -1052,15 +958,21 @@ void aks_to_M2(
   for(i=0; i<=order; i++)
     pw[i].real = ak[i];
   kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)pw, (kiss_fft_cpx *)Pw);
+  
+  TIMER_SAMPLE_AND_LOG(tfft, tstart, "      fft"); 
 
   /* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
 
   for(i=0; i<FFT_ENC/2; i++)
     Pw[i].real = E/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
 
+  TIMER_SAMPLE_AND_LOG(tpw, tfft, "      Pw"); 
+
   if (pf)
       lpc_post_filter(fft_fwd_cfg, model, Pw, ak, order, dump, beta, gamma, bass_boost);
 
+  TIMER_SAMPLE_AND_LOG(tpf, tpw, "      LPC post filter"); 
+
   #ifdef DUMP
   if (dump) 
       dump_Pw(Pw);
@@ -1074,35 +986,37 @@ void aks_to_M2(
   signal = 1E-30; noise = 1E-32;
 
   for(m=1; m<=model->L; m++) {
-    am = floor((m - 0.5)*model->Wo/r + 0.5);
-    bm = floor((m + 0.5)*model->Wo/r + 0.5);
-    Em = 0.0;
-
-    for(i=am; i<bm; i++)
-      Em += Pw[i].real;
-    Am = sqrt(Em);
-
-    signal += pow(model->A[m],2.0);
-    noise  += pow(model->A[m] - Am,2.0);
-
-    /* This code significantly improves perf of LPC model, in
-       particular when combined with phase0.  The LPC spectrum tends
-       to track just under the peaks of the spectral envelope, and
-       just above nulls.  This algorithm does the reverse to
-       compensate - raising the amplitudes of spectral peaks, while
-       attenuating the null.  This enhances the formants, and
-       supresses the energy between formants. */
-
-    if (sim_pf) {
-	if (Am > model->A[m])
-	    Am *= 0.7;
-	if (Am < model->A[m])
-	    Am *= 1.4;
-    }
-
-    model->A[m] = Am;
+      am = (int)((m - 0.5)*model->Wo/r + 0.5);
+      bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+      Em = 0.0;
+
+      for(i=am; i<bm; i++)
+          Em += Pw[i].real;
+      Am = sqrtf(Em);
+
+      signal += model->A[m]*model->A[m];
+      noise  += (model->A[m] - Am)*(model->A[m] - Am);
+
+      /* This code significantly improves perf of LPC model, in
+         particular when combined with phase0.  The LPC spectrum tends
+         to track just under the peaks of the spectral envelope, and
+         just above nulls.  This algorithm does the reverse to
+         compensate - raising the amplitudes of spectral peaks, while
+         attenuating the null.  This enhances the formants, and
+         supresses the energy between formants. */
+
+      if (sim_pf) {
+          if (Am > model->A[m])
+              Am *= 0.7;
+          if (Am < model->A[m])
+              Am *= 1.4;
+      }
+
+      model->A[m] = Am;
   }
   *snr = 10.0*log10(signal/noise);
+
+  TIMER_SAMPLE_AND_LOG2(tpf, "      rec"); 
 }
 
 /*---------------------------------------------------------------------------*\
@@ -1360,6 +1274,8 @@ void decode_lsps_scalar(float lsp[], int indexes[], int order)
 }
 
 
+#ifdef __EXPERIMENTAL__
+
 /*---------------------------------------------------------------------------*\
                                                        
   FUNCTION....: encode_lsps_diff_freq_vq()	     
@@ -1540,7 +1456,7 @@ void decode_lsps_diff_time(
     }
 
 }
-
+#endif
 
 /*---------------------------------------------------------------------------*\
                                                        
diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c
index 67cbeaeb..9e01c95a 100644
--- a/codec2-dev/src/sine.c
+++ b/codec2-dev/src/sine.c
@@ -289,7 +289,7 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
   float Wo;		/* current "test" fundamental freq. */
   float Wom;		/* Wo that maximises E */
   float Em;		/* mamimum energy */
-  float r;		/* number of rads/bin */
+  float r, one_on_r;	/* number of rads/bin */
   float p;		/* current pitch */
   
   /* Initialisation */
@@ -298,7 +298,8 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
   Wom = model->Wo;
   Em = 0.0;
   r = TWO_PI/FFT_ENC;
-  
+  one_on_r = 1.0/r;
+
   /* Determine harmonic sum for a range of Wo values */
 
   for(p=pmin; p<=pmax; p+=pstep) {
@@ -306,12 +307,10 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
     Wo = TWO_PI/p;
 
     /* Sum harmonic magnitudes */
-
     for(m=1; m<=model->L; m++) {
-      b = floor(m*Wo/r + 0.5);
-      E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
+        b = (int)(m*Wo*one_on_r + 0.5);
+        E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
     }  
-
     /* Compare to see if this is a maximum */
     
     if (E > Em) {
@@ -333,40 +332,45 @@ void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float
 									      
 \*---------------------------------------------------------------------------*/
 
-void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[])
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase)
 {
   int   i,m;		/* loop variables */
   int   am,bm;		/* bounds of current harmonic */
   int   b;		/* DFT bin of centre of current harmonic */
   float den;		/* denominator of amplitude expression */
-  float r;		/* number of rads/bin */
+  float r, one_on_r;	/* number of rads/bin */
   int   offset;
   COMP  Am;
 
   r = TWO_PI/FFT_ENC;
+  one_on_r = 1.0/r;
 
   for(m=1; m<=model->L; m++) {
     den = 0.0;
-    am = floor((m - 0.5)*model->Wo/r + 0.5);
-    bm = floor((m + 0.5)*model->Wo/r + 0.5);
-    b = floor(m*model->Wo/r + 0.5);
+    am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
+    bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
+    b = (int)(m*model->Wo/r + 0.5);
 
     /* Estimate ampltude of harmonic */
 
     den = 0.0;
     Am.real = Am.imag = 0.0;
+    offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5);
     for(i=am; i<bm; i++) {
       den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
-      offset = i + FFT_ENC/2 - floor(m*model->Wo/r + 0.5);
-      Am.real += Sw[i].real*W[offset].real;
-      Am.imag += Sw[i].imag*W[offset].real;
+      Am.real += Sw[i].real*W[i + offset].real;
+      Am.imag += Sw[i].imag*W[i + offset].real;
     }
 
-    model->A[m] = sqrt(den);
+    model->A[m] = sqrtf(den);
 
-    /* Estimate phase of harmonic */
+    if (est_phase) {
 
-    model->phi[m] = atan2(Sw[b].imag,Sw[b].real);
+        /* Estimate phase of harmonic, this is expensive in CPU for
+           embedded devicesso we make it an option */
+
+        model->phi[m] = atan2(Sw[b].imag,Sw[b].real);
+    }
   }
 }
 
@@ -398,7 +402,7 @@ float est_voicing_mbe(
     float Wo;            
     float sig, snr;
     float elow, ehigh, eratio;
-    float dF0, sixty;
+    float sixty;
 
     sig = 1E-4;
     for(l=1; l<=model->L/4; l++) {
@@ -425,11 +429,11 @@ float est_voicing_mbe(
 
 	/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
 
+        offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
 	for(m=al; m<bl; m++) {
-	    offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
-	    Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
-	    Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
-	    den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+	    Am.real += Sw[m].real*W[offset+m].real;
+	    Am.imag += Sw[m].imag*W[offset+m].real;
+	    den += W[offset+m].real*W[offset+m].real;
         }
 
         Am.real = Am.real/den;
@@ -437,10 +441,10 @@ float est_voicing_mbe(
 
         /* Determine error between estimated harmonic and original */
 
+        offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
         for(m=al; m<bl; m++) {
-	    offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
-	    Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
-	    Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+	    Sw_[m].real = Am.real*W[offset+m].real;
+	    Sw_[m].imag = Am.imag*W[offset+m].real;
 	    Ew[m].real = Sw[m].real - Sw_[m].real;
 	    Ew[m].imag = Sw[m].imag - Sw_[m].imag;
 	    error += Ew[m].real*Ew[m].real;
@@ -471,7 +475,6 @@ float est_voicing_mbe(
 	ehigh += model->A[l]*model->A[l];
     }
     eratio = 10.0*log10(elow/ehigh);
-    dF0 = 0.0;
 
     /* Look for Type 1 errors, strongly V speech that has been
        accidentally declared UV */
@@ -487,16 +490,6 @@ float est_voicing_mbe(
 	if (eratio < -10.0)
 	    model->voiced = 0;
 
-	/* If pitch is jumping about it's likely this is UV */
-	
-	/* 13 Feb 2012 - this seems to add some V errors so comment out for now.  Maybe
-	   double check on bg noise files
-
-	   dF0 = (model->Wo - prev_Wo)*FS/TWO_PI;
-	   if (fabs(dF0) > 15.0) 
-	   model->voiced = 0;
-	*/
-
 	/* A common source of Type 2 errors is the pitch estimator
 	   gives a low (50Hz) estimate for UV speech, which gives a
 	   good match with noise due to the close harmoonic spacing.
@@ -570,7 +563,6 @@ void synthesise(
 
     if (shift) {
 	/* Update memories */
-
 	for(i=0; i<N-1; i++) {
 	    Sn_[i] = Sn_[i+N];
 	}
@@ -602,12 +594,12 @@ void synthesise(
     for(l=1; l<=model->L; l++) {
     //for(l=model->L/2; l<=model->L; l++) {
     //for(l=1; l<=model->L/4; l++) {
-	b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+	b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
 	if (b > ((FFT_DEC/2)-1)) {
 		b = (FFT_DEC/2)-1;
 	}
-	Sw_[b].real = model->A[l]*cos(model->phi[l]);
-	Sw_[b].imag = model->A[l]*sin(model->phi[l]);
+	Sw_[b].real = model->A[l]*cosf(model->phi[l]);
+	Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
 	Sw_[FFT_DEC-b].real = Sw_[b].real;
 	Sw_[FFT_DEC-b].imag = -Sw_[b].imag;
     }
@@ -646,3 +638,11 @@ void synthesise(
 	    Sn_[i] += sw_[j].real*Pn[i];
 }
 
+
+static unsigned long next = 1;
+
+int codec2_rand(void) {
+    next = next * 1103515245 + 12345;
+    return((unsigned)(next/65536) % 32768);
+}
+
diff --git a/codec2-dev/src/sine.h b/codec2-dev/src/sine.h
index 202adfd2..da7dc28e 100644
--- a/codec2-dev/src/sine.h
+++ b/codec2-dev/src/sine.h
@@ -36,10 +36,13 @@ void make_analysis_window(kiss_fft_cfg fft_fwd_cfg, float w[], COMP W[]);
 float hpf(float x, float states[]);
 void dft_speech(kiss_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]);
 void two_stage_pitch_refinement(MODEL *model, COMP Sw[]);
-void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]);
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase);
 float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], COMP Sw_[],COMP Ew[], 
 		      float prev_Wo);
 void make_synthesis_window(float Pn[]);
 void synthesise(kiss_fft_cfg fft_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift);
 
+#define CODEC2_RAND_MAX 32767
+int codec2_rand(void);
+
 #endif
-- 
2.25.1