From a15279eb922cfec28394905372309ac1e7446182 Mon Sep 17 00:00:00 2001
From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Mon, 21 Nov 2011 02:54:21 +0000
Subject: [PATCH] c2sim and c2enc/c2dec now match exactly on hts1a and hts2a,
 many small bugs found and fixed.  Still need to clean up and work on 1400
 bit/s

git-svn-id: https://svn.code.sf.net/p/freetel/code@309 01035d8c-6547-0410-b346-abe4f91aad63
---
 codec2-dev/src/c2dec.c  |  7 ++---
 codec2-dev/src/c2enc.c  |  5 ++--
 codec2-dev/src/c2sim.c  | 57 ++++++++++++++++++++++++++---------------
 codec2-dev/src/codec2.c | 46 ++++++++++++++++++++-------------
 codec2-dev/src/interp.c | 10 ++++++++
 codec2-dev/src/sine.c   |  4 +--
 6 files changed, 84 insertions(+), 45 deletions(-)

diff --git a/codec2-dev/src/c2dec.c b/codec2-dev/src/c2dec.c
index d8b4e3c3..fed4c3e3 100644
--- a/codec2-dev/src/c2dec.c
+++ b/codec2-dev/src/c2dec.c
@@ -42,7 +42,7 @@ int main(int argc, char *argv[])
     FILE          *fout;
     short         *buf;
     unsigned char *bits;
-    int            nsam, nbit;
+    int            nsam, nbit, nbyte;
 
     if (argc != 4) {
 	printf("usage: c2dec 2500|1400 InputBitFile OutputRawSpeechFile\n");
@@ -58,7 +58,7 @@ int main(int argc, char *argv[])
 	fprintf(stderr, "Error in mode: %s.  Must be 2500 or 1400\n", argv[1]);
 	exit(1);
     }
-    printf("mode: %d\n", mode);
+    
     if (strcmp(argv[2], "-")  == 0) fin = stdin;
     else if ( (fin = fopen(argv[2],"rb")) == NULL ) {
 	fprintf(stderr, "Error opening input bit file: %s: %s.\n",
@@ -77,7 +77,8 @@ int main(int argc, char *argv[])
     nsam = codec2_samples_per_frame(codec2);
     nbit = codec2_bits_per_frame(codec2);
     buf = (short*)malloc(nsam*sizeof(short));
-    bits = (unsigned char*)malloc(nbit*sizeof(char));
+    nbyte = (nbit + 7) / 8;
+    bits = (unsigned char*)malloc(nbyte*sizeof(char));
 
     while(fread(bits, sizeof(char), nbit, fin) == nbit) {
 	codec2_decode(codec2, buf, bits);
diff --git a/codec2-dev/src/c2enc.c b/codec2-dev/src/c2enc.c
index 361f34c4..f78fca94 100644
--- a/codec2-dev/src/c2enc.c
+++ b/codec2-dev/src/c2enc.c
@@ -43,7 +43,7 @@ int main(int argc, char *argv[])
     FILE          *fout;
     short         *buf;
     unsigned char *bits;
-    int            nsam, nbit;
+    int            nsam, nbit, nbyte;
 
     if (argc != 4) {
 	printf("usage: c2enc 2500|1400 InputRawspeechFile OutputBitFile\n");
@@ -78,7 +78,8 @@ int main(int argc, char *argv[])
     nsam = codec2_samples_per_frame(codec2);
     nbit = codec2_bits_per_frame(codec2);
     buf = (short*)malloc(nsam*sizeof(short));
-    bits = (unsigned char*)malloc(nbit*sizeof(char));
+    nbyte = (nbit + 7) / 8;
+    bits = (unsigned char*)malloc(nbyte*sizeof(char));
 
     while(fread(buf, sizeof(short), nsam, fin) == nsam) {
 	codec2_encode(codec2, bits, buf);
diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c
index b03f2029..2e3a0425 100644
--- a/codec2-dev/src/c2sim.c
+++ b/codec2-dev/src/c2sim.c
@@ -68,7 +68,7 @@ int main(int argc, char *argv[])
     float Sn_[2*N];	/* synthesised speech */
     int   i;		/* loop variable                         */
     int   frames;
-    float prev_Wo, prev__Wo;
+    float prev_Wo, prev__Wo, uq_Wo, prev_uq_Wo;
     float pitch;
     int   voiced1 = 0;
 
@@ -114,7 +114,7 @@ int main(int argc, char *argv[])
     for(i=0; i<2*N; i++)
 	Sn_[i] = 0;
 
-    prev_Wo = prev__Wo = TWO_PI/P_MAX;
+    prev_uq_Wo = prev_Wo = prev__Wo = TWO_PI/P_MAX;
 
     prev_model.Wo = TWO_PI/P_MIN;
     prev_model.L = floor(PI/prev_model.Wo);
@@ -286,14 +286,15 @@ int main(int argc, char *argv[])
  
 	/* Estimate pitch */
 
-	nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
+	nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_uq_Wo);
 	model.Wo = TWO_PI/pitch;
-
+	
 	/* estimate model parameters --------------------------------------*/
 
 	dft_speech(Sw, Sn, w); 
 	two_stage_pitch_refinement(&model, Sw);
 	estimate_amplitudes(&model, Sw, W);
+	uq_Wo = model.Wo;
 #ifdef DUMP 
 	dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
 #endif
@@ -322,7 +323,9 @@ int main(int argc, char *argv[])
 	
 	    /* determine voicing */
 
-	    snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_Wo);
+	    snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_uq_Wo);
+	    printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced,
+		   model.Wo, prev_uq_Wo);
 #ifdef DUMP
 	    dump_Sw_(Sw_);
 	    dump_Ew(Ew);
@@ -508,19 +511,26 @@ int main(int argc, char *argv[])
 
 	    if ((frames%2) == 0) {
 		printf("interp\n");
-		printf("Wo: %1.5f  L: %d e: %3.2f \n", model.Wo, model.L, e);
-		for(i=0; i<LPC_ORD; i++)
-		    printf("lsp_indexes: %d lsps_: %2.3f prev_lsps_: %2.3f\n", 
-			   lsp_indexes[i], lsps_[i], prev_lsps[i]);
-		printf("ak: ");
-		for(i=0; i<LPC_ORD; i++)
-		    printf("%2.3f  ", ak[i]);
+		printf("Wo: %1.5f  L: %d e: %3.2f v2: %d\n", 
+		       model.Wo, model.L, e, model.voiced);
+		//for(i=0; i<LPC_ORD; i++)
+		//    printf("lsp_indexes: %d lsps_: %2.3f prev_lsps_: %2.3f\n", 
+		//	   lsp_indexes[i], lsps_[i], prev_lsps[i]);
+		//printf("ak: ");
+		//for(i=0; i<LPC_ORD; i++)
+		//    printf("%2.3f  ", ak[i]);
+		//printf("\n");
+		printf("Am: ");
+		for(i=0; i<5; i++)
+		    printf("%2.3f  ", model.A[i]);
 		printf("\n");
 
 		/* decode interpolated frame */
 
 		interp_model.voiced = voiced1;
-
+		//printf("before Wo: %1.5f  L: %d  prev_e: %3.2f\n", 
+		//       prev_model.Wo, prev_model.L, prev_e);
+		
 #ifdef LOG_LIN_INTERP
 		interpolate(&interp_model, &prev_model, &model);
 #else
@@ -528,13 +538,18 @@ int main(int argc, char *argv[])
 				prev_lsps, prev_e, lsps_, e, ak_interp);
 		apply_lpc_correction(&interp_model);
 #endif
-		printf("Wo: %1.5f  L: %d  prev_e: %3.2f\n", 
-		       interp_model.Wo, interp_model.L, prev_e);
-		printf("ak_interp: ");
-		for(i=0; i<LPC_ORD; i++)
-		    printf("%2.3f  ", ak_interp[i]);
+		printf("Wo: %1.5f  L: %d  prev_e: %3.2f v1: %d pv: %d\n", 
+		       interp_model.Wo, interp_model.L, prev_e, voiced1,
+		       prev_model.voiced);
+		//printf("ak_interp: ");
+		//for(i=0; i<LPC_ORD; i++)
+		//    printf("%2.3f  ", ak_interp[i]);
+		//printf("\n");
+		printf("Am: ");
+		for(i=0; i<5; i++)
+		    printf("%2.3f  ", interp_model.A[i]);
 		printf("\n");
-		//if (frames==40) 
+		//if (frames == 6) 
 		//    exit(0);
 		if (phase0)
 		    phase_synth_zero_order(&interp_model, ak_interp, ex_phase,
@@ -574,8 +589,10 @@ int main(int argc, char *argv[])
 	    synth_one_frame(buf, &model, Sn_, Pn);
 	    if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
 	}
+
 	prev__Wo = prev_Wo;
-	prev_Wo = TWO_PI/pitch;
+	prev_Wo = model.Wo;
+	prev_uq_Wo = uq_Wo;
     }
 
     /* End Main Loop -----------------------------------------------------*/
diff --git a/codec2-dev/src/codec2.c b/codec2-dev/src/codec2.c
index b973fce8..f290a446 100644
--- a/codec2-dev/src/codec2.c
+++ b/codec2-dev/src/codec2.c
@@ -284,7 +284,7 @@ void codec2_encode_2500(struct CODEC2 *c2, unsigned char * bits, short speech[])
     pack(bits, &nbit, energy_index, E_BITS);
     pack(bits, &nbit, voiced1, 1);
     pack(bits, &nbit, voiced2, 1);
-    
+    printf("v2: %d  v1: %d\n", voiced2, voiced1);
     assert(nbit == codec2_bits_per_frame(c2));
 }
 
@@ -345,13 +345,17 @@ void codec2_decode_2500(struct CODEC2 *c2, short speech[], const unsigned char *
     aks_to_M2(ak, LPC_ORD, &model, energy, &snr, 1); 
     apply_lpc_correction(&model);
 
-    printf("Wo: %1.5f  L: %d e: %3.2f \n", model.Wo, model.L, energy);
-    for(i=0; i<LPC_ORD; i++)
-    	printf("lsp_indexes: %d lsp_: %2.3f prev_lsp_: %2.3f\n", 
-	       lsp_indexes[i], lsps_[i], c2->prev_lsps_[i]);
-    printf("ak: ");
-    for(i=0; i<LPC_ORD; i++)
-    	printf("%2.3f  ", ak[i]);
+    printf("Wo: %1.5f  L: %d e: %3.2f v2: %d\n", 
+	   model.Wo, model.L, energy, voiced2 );
+    //for(i=0; i<LPC_ORD; i++)
+    //	printf("lsp_indexes: %d lsp_: %2.3f prev_lsp_: %2.3f\n", 
+    //	       lsp_indexes[i], lsps_[i], c2->prev_lsps_[i]);
+    //printf("ak: ");
+    //for(i=0; i<LPC_ORD; i++)
+    //	printf("%2.3f  ", ak[i]);
+    printf("Am: ");
+    for(i=0; i<5; i++)
+    	printf("%2.3f  ", model.A[i]);
     printf("\n");
     
     /* interpolate odd frame model parameters from adjacent frames */
@@ -364,13 +368,18 @@ void codec2_decode_2500(struct CODEC2 *c2, short speech[], const unsigned char *
     interpolate_lsp(&model_interp, &c2->prev_model, &model,
     		    c2->prev_lsps_, c2->prev_energy, lsps_, energy, ak_interp);
     apply_lpc_correction(&model_interp);
-    printf("Wo: %1.5f  L: %d  prev_e: %3.2f\n", 
-	   model_interp.Wo, model_interp.L, c2->prev_energy );
-    printf("ak_interp: ");
-    for(i=0; i<LPC_ORD; i++)
-    	printf("%2.3f  ", ak_interp[i]);
+    printf("Wo: %1.5f  L: %d prev_e: %3.2f v1: %d pv: %d\n", 
+	   model_interp.Wo, model_interp.L, c2->prev_energy, voiced1,
+	   c2->prev_model.voiced);
+    //printf("ak_interp: ");
+    //for(i=0; i<LPC_ORD; i++)
+    //	printf("%2.3f  ", ak_interp[i]);
+    //printf("\n");
+    printf("Am: ");
+    for(i=0; i<5; i++)
+    	printf("%2.3f  ", model_interp.A[i]);
     printf("\n");
-    //if (frames ==40)
+    //if (frames == 6)
     //	exit(0);
 
     /* synthesise two 10ms frames */
@@ -652,7 +661,7 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
     COMP    Sw[FFT_ENC];
     COMP    Sw_[FFT_ENC];
     COMP    Ew[FFT_ENC];
-    float   pitch;
+    float   pitch, snr;
     int     i;
 
     /* Read input speech */
@@ -666,7 +675,7 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
 
     /* Estimate pitch */
 
-    nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+    nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, &c2->prev_Wo);
     model->Wo = TWO_PI/pitch;
     model->L = PI/model->Wo;
 
@@ -674,7 +683,8 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
 
     two_stage_pitch_refinement(model, Sw);
     estimate_amplitudes(model, Sw, c2->W);
-    est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo);
-
+    snr = est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo);
+    printf("snr %3.2f  v: %d  Wo: %f prev_Wo: %f\n", 
+	   snr, model->voiced, model->Wo, c2->prev_Wo);
     c2->prev_Wo = model->Wo;
 }
diff --git a/codec2-dev/src/interp.c b/codec2-dev/src/interp.c
index dbdb5bf5..8b2c70ce 100644
--- a/codec2-dev/src/interp.c
+++ b/codec2-dev/src/interp.c
@@ -159,6 +159,12 @@ void interpolate_lsp(
     float lsps[LPC_ORD],e;
     float snr;
 
+    /* trap corner case where V est is probably wrong */
+
+    if (interp->voiced && !prev->voiced && !next->voiced) {
+	interp->voiced = 0;
+    }	
+   
     /* Wo depends on voicing of this and adjacent frames */
 
     if (interp->voiced) {
@@ -174,6 +180,10 @@ void interpolate_lsp(
     }
     interp->L = PI/interp->Wo;
 
+    //printf("interp: prev_v: %d next_v: %d prev_Wo: %f next_Wo: %f\n",
+    //	   prev->voiced, next->voiced, prev->Wo, next->Wo);
+    //printf("interp: Wo: %1.5f  L: %d\n", interp->Wo, interp->L);
+
     /* interpolate LSPs */
 
     for(i=0; i<LPC_ORD; i++) {
diff --git a/codec2-dev/src/sine.c b/codec2-dev/src/sine.c
index 45cc9de7..577fd2f1 100644
--- a/codec2-dev/src/sine.c
+++ b/codec2-dev/src/sine.c
@@ -455,7 +455,7 @@ float est_voicing_mbe(
     /* post processing, helps clean up some voicing errors ------------------*/
 
     /* 
-       Determine the ratio of low freancy to high frequency energy,
+       Determine the ratio of low freqency to high frequency energy,
        voiced speech tends to be dominated by low frequency energy,
        unvoiced by high frequency. This measure can be used to
        determine if we have made any gross errors.
@@ -578,7 +578,7 @@ void synthesise(
       Nov 2010 - found that synthesis using time domain cos() functions
       gives better results for synthesis frames greater than 10ms.  Inverse
       FFT synthesis using a 512 pt FFT works well for 10ms window.  I think
-      (but am not sure) that the problem is realted to the quantisation of
+      (but am not sure) that the problem is related to the quantisation of
       the harmonic frequencies to the FFT bin size, e.g. there is a 
       8000/512 Hz step between FFT bins.  For some reason this makes
       the speech from longer frame > 10ms sound poor.  The effect can also
-- 
2.25.1