alpha version fully quantised codec at 51 bits per 20ms frame working, yaaaaaaayyyyy

author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)

committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>

Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)
author drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)
committer drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)
diff --git a/codec2/src/c2dec.c b/codec2/src/c2dec.c

index 815a77460aad05a0fe4d07d81cc9f12df932a64b..642ba41df9a4a2d4ba61839fe96e49894c060d77 100644 (file)
--- a/codec2/src/c2dec.c
+++ b/codec2/src/c2dec.c
@@ -39,6 +39,7 @@ int main(int argc, char *argv[])
      FILE *fout;
      short buf[CODEC2_SAMPLES_PER_FRAME];
      char  bits[CODEC2_BITS_PER_FRAME];
+    int   i;
  
      if (argc != 3) {
         printf("usage: %s InputBitFile OutputRawSpeechFile\n", argv[0]);
@@ -59,8 +60,10 @@ int main(int argc, char *argv[])
  
      codec2 = codec2_create();
  
-    while(fread(bits, sizeof(buf), CODEC2_BITS_PER_FRAME, fin) ==
+    while(fread(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fin) ==
           CODEC2_BITS_PER_FRAME) {
+       //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
+       //    printf("bit[%d] = %d\n", i, bits[i]);
         codec2_decode(codec2, buf, bits);
         fwrite(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fout);
      }
diff --git a/codec2/src/c2enc.c b/codec2/src/c2enc.c

index f4d8a6dbad07c1b5c5962cac7f0a7868d8c5c9d8..646dafa76f51051ce46cf0b633a5292e75109ae9 100644 (file)
--- a/codec2/src/c2enc.c
+++ b/codec2/src/c2enc.c
@@ -40,6 +40,7 @@ int main(int argc, char *argv[])
      FILE *fout;
      short buf[CODEC2_SAMPLES_PER_FRAME];
      char  bits[CODEC2_BITS_PER_FRAME];
+    int   i;
  
      if (argc != 3) {
         printf("usage: %s InputRawspeechFile OutputBitFile\n", argv[0]);
@@ -63,6 +64,8 @@ int main(int argc, char *argv[])
      while(fread(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fin) ==
           CODEC2_SAMPLES_PER_FRAME) {
         codec2_encode(codec2, bits, buf);
+       //for(i=0; i<CODEC2_BITS_PER_FRAME; i++)
+       //    printf("bit[%d] = %d\n", i, bits[i]);
         fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fout);
      }
  
diff --git a/codec2/src/c2sim.c b/codec2/src/c2sim.c

index 4049f3b7cfd402789297538b0742f922ae473b86..fc59991b1ee76a9bfe4ced8529e7ff402808e720 100644 (file)
--- a/codec2/src/c2sim.c
+++ b/codec2/src/c2sim.c
@@ -38,6 +38,7 @@
  #include "nlp.h"
  #include "dump.h"
  #include "lpc.h"
+#include "lsp.h"
  #include "quantise.h"
  #include "phase.h"
  #include "postfilter.h"
@@ -92,7 +93,6 @@ int main(int argc, char *argv[])
    float prev_Wo;
    float pitch;
    int   voiced1;
-  float phi1[MAX_AMP];
  
    char  out_file[MAX_STR];
    int   arg;
@@ -115,10 +115,8 @@ int main(int argc, char *argv[])
    int   hand_voicing;
    FILE *fvoicing;
  
-  MODEL prev_model, interp_model, tmp_model;
+  MODEL prev_model, interp_model;
    int decimate;
-  float tmp_phi = 0;
-  float ak_phase[LPC_ORD+1];
  
    for(i=0; i<M; i++)
        Sn[i] = 1.0;
@@ -246,23 +244,17 @@ int main(int argc, char *argv[])
         
         dump_phase(&model.phi[0], model.L);
  
-       /* Determine LPCs for phase modelling.  Note that we may also
-          find the LPCs as part of the {Am} modelling, this can
-          probably be combined in the final codec.  However during
-          development some subtle bugs were found when combining LPC
-          and phase models so for the purpose of development it's
-          easier to find LPCs indepenently for phase modelling
-          here. */
+       /* find aks here, these are overwritten if LPC modelling is enabled */
  
         for(i=0; i<M; i++)
             Wn[i] = Sn[i]*w[i];
         autocorrelate(Wn,Rk,M,LPC_ORD);
-       levinson_durbin(Rk,ak_phase,LPC_ORD);
+       levinson_durbin(Rk,ak,LPC_ORD);
  
         if (lpc_model)
             assert(order == LPC_ORD);
  
-       dump_ak(ak_phase, LPC_ORD);
+       dump_ak(ak, LPC_ORD);
         
         /* determine voicing */
  
@@ -283,7 +275,32 @@ int main(int argc, char *argv[])
      /* optional LPC model amplitudes */
  
      if (lpc_model) {
-       snr = lpc_model_amplitudes(Sn, w, &model, order, lsp, ak);
+       int lpc_correction;
+       float e;
+       float lsps[LPC_ORD];
+       int   lsp_indexes[LPC_ORD];
+
+       e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
+       lpc_correction = need_lpc_correction(&model, ak, e);
+
+       if (lsp) {
+           encode_lsps(lsp_indexes, lsps, LPC_ORD);
+           /*
+             for(i=0; i<LPC_ORD; i++)
+               printf("lsps[%d] = %f lsp_indexes[%d] = %d\n", 
+                      i, lsps[i], i, lsp_indexes[i]);
+             printf("\n");
+           */
+           decode_lsps(lsps, lsp_indexes, LPC_ORD);
+           bw_expand_lsps(lsps, LPC_ORD);
+           lsp_to_lpc(lsps, ak, LPC_ORD);
+       }
+
+       e = decode_energy(encode_energy(e));
+       model.Wo = decode_Wo(encode_Wo(model.Wo));
+
+       aks_to_M2(ak, order, &model, e, &snr); 
+       apply_lpc_correction(&model, lpc_correction);
         sum_snr += snr;
          dump_quantised_model(&model);
      }
@@ -320,18 +337,18 @@ int main(int argc, char *argv[])
             interpolate(&interp_model, &prev_model, &model);
             
             if (phase0)
-               phase_synth_zero_order(&interp_model, ak_phase, ex_phase);      
+               phase_synth_zero_order(&interp_model, ak, ex_phase);    
             if (postfilt)
                 postfilter(&interp_model, &bg_est);
             synth_one_frame(buf, &interp_model, Sn_, Pn);
-           fwrite(buf,sizeof(short),N,fout);
+           if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
  
             if (phase0)
-               phase_synth_zero_order(&model, ak_phase, ex_phase);     
+               phase_synth_zero_order(&model, ak, ex_phase);   
             if (postfilt)
                 postfilter(&model, &bg_est);
             synth_one_frame(buf, &model, Sn_, Pn);
-           fwrite(buf,sizeof(short),N,fout);
+           if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
  
             prev_model = model;
         }
@@ -341,11 +358,11 @@ int main(int argc, char *argv[])
      }
      else {
         if (phase0)
-           phase_synth_zero_order(&model, ak_phase, ex_phase); 
+          phase_synth_zero_order(&model, ak, ex_phase);        
         if (postfilt)
             postfilter(&model, &bg_est);
         synth_one_frame(buf, &model, Sn_, Pn);
-       fwrite(buf,sizeof(short),N,fout);
+       if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
      }
    }
  
diff --git a/codec2/src/codec2.c b/codec2/src/codec2.c

index f2b80f1ac3b0e3828d6d489729ab4e91cb046eab..842382ff96ca0c8bd80ebe648982f979725a736e 100644 (file)
--- a/codec2/src/codec2.c
+++ b/codec2/src/codec2.c
@@ -184,13 +184,14 @@ void codec2_encode(void *codec2_state, char bits[], short speech[])
                        c2->w);   
  
      pack(bits, &nbit, Wo_index, WO_BITS);
-    for(i=0; i<LPC_ORD; i++)
+    for(i=0; i<LPC_ORD; i++) {
         pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+    }
      pack(bits, &nbit, lpc_correction, 1);
      pack(bits, &nbit, energy_index, E_BITS);
      pack(bits, &nbit, voiced1, 1);
      pack(bits, &nbit, voiced2, 1);
-
+    
      assert(nbit == CODEC2_BITS_PER_FRAME);
  }
  
@@ -221,22 +222,23 @@ void codec2_decode(void *codec2_state, short speech[], char bits[])
      c2 = (CODEC2*)codec2_state;
  
      Wo_index = unpack(bits, &nbit, WO_BITS);
-    for(i=0; i<LPC_ORD; i++)
+    for(i=0; i<LPC_ORD; i++) {
         lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+    }
      lpc_correction = unpack(bits, &nbit, 1);
      energy_index = unpack(bits, &nbit, E_BITS);
      voiced1 = unpack(bits, &nbit, 1);
      voiced2 = unpack(bits, &nbit, 1);
      assert(nbit == CODEC2_BITS_PER_FRAME);
  
+    model.Wo = decode_Wo(Wo_index);
+    model.L = PI/model.Wo;
      decode_amplitudes(&model, 
                       ak,
                       lsp_indexes,
                       lpc_correction, 
                       energy_index);
  
-    model.Wo = decode_Wo(Wo_index);
-    model.L = PI/model.Wo;
      model.voiced = voiced2;
      model_interp.voiced = voiced1;
      interpolate(&model_interp, &c2->prev_model, &model);
diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c

index f44a099ea38940b1acc07b01ef5bf4c2ce409cb1..b5ba7faef4a69736e479471a06836a5e61e72817 100644 (file)
--- a/codec2/src/quantise.c
+++ b/codec2/src/quantise.c
@@ -74,7 +74,7 @@ LSP_CB lsp_q[] = {
      {1,4,16, "../unittest/lsp7.txt"},
      {1,3,8, "../unittest/lsp8.txt"},
      {1,3,8, "../unittest/lsp9.txt"},
-    {1,3,4, "../unittest/lsp10.txt"},
+    {1,2,4, "../unittest/lsp10.txt"},
      {0,0,0, ""}
  };
  
@@ -697,7 +697,7 @@ void bw_expand_lsps(float lsp[],
  
  /*---------------------------------------------------------------------------*\
                                                         
-  FUNCTION....: lpc_correction()            
+  FUNCTION....: need_lpc_correction()       
    AUTHOR......: David Rowe                           
    DATE CREATED: 22/8/2010 
  
@@ -834,6 +834,7 @@ void encode_amplitudes(int    lsp_indexes[],
      float lsps[LPC_ORD];
      float ak[LPC_ORD+1];
      float e;
+    int   i;
  
      e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD);
      encode_lsps(lsp_indexes, lsps, LPC_ORD);
@@ -865,6 +866,7 @@ float decode_amplitudes(MODEL *model,
  
      decode_lsps(lsps, lsp_indexes, LPC_ORD);
      bw_expand_lsps(lsps, LPC_ORD);
+    lsp_to_lpc(lsps, ak, LPC_ORD);
      e = decode_energy(energy_index);
      aks_to_M2(ak, LPC_ORD, model, e, &snr); 
      apply_lpc_correction(model, lpc_correction);
@@ -911,7 +913,7 @@ int unpack(char bits[], int *nbit, int index_bits)
  
      for(i=0; i<index_bits; i++) {
         index <<= 1;
-       index |= bits[i];
+       index |= bits[*nbit+i];
      }
      
      *nbit += index_bits;
diff --git a/codec2/src/quantise.h b/codec2/src/quantise.h

index 6fc94f3ecf8b51a533ce84fbae8df44752f45845..00849f7fd305a34d9da87a39eda739c4fb376a67 100644 (file)
--- a/codec2/src/quantise.h
+++ b/codec2/src/quantise.h
@@ -67,4 +67,17 @@ int  unpack(char bits[], int *nbit, int index_bits);
  
  int lsp_bits(int i);
  
+int need_lpc_correction(MODEL *model, float ak[], float E);
+void apply_lpc_correction(MODEL *model, int lpc_correction);
+float speech_to_uq_lsps(float lsp[],
+                       float ak[],
+                       float Sn[], 
+                       float w[],
+                       int   order
+                       );
+void bw_expand_lsps(float lsp[],
+                   int   order
+                   );
+void decode_lsps(float lsp[], int indexes[], int order);
+
  #endif
diff --git a/codec2/unittest/Makefile b/codec2/unittest/Makefile

index 56657d618c8ae67bb3d55bade3eca95de1ffc477..d5f436a803c6a6e17a71e91190b854abefa72435 100644 (file)
--- a/codec2/unittest/Makefile
+++ b/codec2/unittest/Makefile
@@ -1,6 +1,6 @@
  CFLAGS = -I. -I../src -Wall -g -DFLOATING_POINT -DVAR_ARRAYS
  
-all: genres genlsp extract vqtrain tnlp tinterp tquant
+all: genres genlsp extract vqtrain tnlp tinterp tquant tcodec2
  
  genres: genres.o ../src/lpc.o
         gcc $(CFLAGS) -o genres genres.o ../src/lpc.o -lm
@@ -17,6 +17,10 @@ TINTERP_OBJ    = tinterp.o ../src/sine.o ../src/four1.o ../src/interp.o
  TQUANT_OBJ     = tquant.o ../src/quantise.o ../src/lpc.o ../src/lsp.o \
                   ../src/dump.o ../src/four1.o
  
+TCODEC2_OBJ    = tcodec2.o ../src/quantise.o ../src/lpc.o ../src/lsp.o \
+                 ../src/dump.o ../src/four1.o ../src/codec2.o ../src/sine.o \
+               ../src/nlp.o ../src/postfilter.o ../src/phase.o ../src/interp.o
+
  lsptest: $(LSP_TEST_OBJ)
         gcc $(CFLAGS) -o lsptest $(LSP_TEST_OBJ) -lm
  
@@ -44,6 +48,9 @@ tinterp: $(TINTERP_OBJ)
  tquant: $(TQUANT_OBJ)
         gcc $(CFLAGS) -o tquant $(TQUANT_OBJ) -lm
  
+tcodec2: $(TCODEC2_OBJ)
+       gcc $(CFLAGS) -o tcodec2 $(TCODEC2_OBJ) -lm
+
  %.o : %.c
         $(CC) -c $(CFLAGS) $< -o $@
  
diff --git a/codec2/unittest/tcodec2.c b/codec2/unittest/tcodec2.c

new file mode 100644 (file)

index 0000000..9c5a4c3
--- /dev/null
+++ b/codec2/unittest/tcodec2.c
@@ -0,0 +1,215 @@
+/*---------------------------------------------------------------------------*\
+                                                                          
+  FILE........: tcodec2.c                                                  
+  AUTHOR......: David Rowe                                            
+  DATE CREATED: 24/8/10                                        
+                                                               
+  Test program for codec2.c functions.
+                                                              
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2010 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "defines.h"
+#include "codec2.h"
+#include "quantise.h"
+#include "interp.h"
+
+/* CODEC2 struct copies from codec2.c to help with testing */
+
+typedef struct {
+    float  Sn[M];        /* input speech                              */
+    float  w[M];        /* time domain hamming window                */
+    COMP   W[FFT_ENC];  /* DFT of w[]                                */
+    float  Pn[2*N];     /* trapezoidal synthesis window              */
+    float  Sn_[2*N];    /* synthesised speech                        */
+    float  prev_Wo;      /* previous frame's pitch estimate           */
+    float  ex_phase;     /* excitation model phase track              */
+    float  bg_est;       /* background noise estimate for post filter */
+    MODEL  prev_model;   /* model parameters from 20ms ago            */
+} CODEC2;
+
+void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[]);
+void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model, float ak[]);
+
+int test1()
+{
+    FILE   *fin, *fout;
+    short   buf[N];
+    void   *c2;
+    CODEC2 *c3;
+    MODEL   model;
+    float   ak[LPC_ORD+1];
+    float   lsps[LPC_ORD];
+
+    c2 = codec2_create();
+    c3 = (CODEC2*)c2;
+
+    fin = fopen("../raw/hts1a.raw", "rb");
+    assert(fin != NULL);
+    fout = fopen("hts1a_test.raw", "wb");
+    assert(fout != NULL);
+
+    while(fread(buf, sizeof(short), N, fin) == N) {
+       analyse_one_frame(c3, &model, buf);
+       speech_to_uq_lsps(lsps, ak, c3->Sn, c3->w, LPC_ORD);
+       synthesise_one_frame(c3, buf, &model, ak);
+       fwrite(buf, sizeof(short), N, fout);
+    }
+
+    codec2_destroy(c2);
+
+    fclose(fin);
+    fclose(fout);
+
+    return 0;
+}
+ 
+int test2()
+{
+    FILE   *fin, *fout;
+    short   buf[2*N];
+    void   *c2;
+    CODEC2 *c3;
+    MODEL   model, model_interp;
+    float   ak[LPC_ORD+1];
+    int     voiced1, voiced2;
+    int     lsp_indexes[LPC_ORD];
+    int     lpc_correction;
+    int     energy_index;
+    int     Wo_index;
+    char    bits[CODEC2_BITS_PER_FRAME];
+    int     nbit;
+    int     i;
+
+    c2 = codec2_create();
+    c3 = (CODEC2*)c2;
+
+    fin = fopen("../raw/hts1a.raw", "rb");
+    assert(fin != NULL);
+    fout = fopen("hts1a_test.raw", "wb");
+    assert(fout != NULL);
+
+    while(fread(buf, sizeof(short), 2*N, fin) == 2*N) {
+       /* first 10ms analysis frame - we just want voicing */
+
+       analyse_one_frame(c3, &model, buf);
+       voiced1 = model.voiced;
+
+       /* second 10ms analysis frame */
+
+       analyse_one_frame(c3, &model, &buf[N]);
+       voiced2 = model.voiced;
+    
+       Wo_index = encode_Wo(model.Wo);
+       encode_amplitudes(lsp_indexes, 
+                         &lpc_correction, 
+                         &energy_index,
+                         &model, 
+                         c3->Sn, 
+                         c3->w);   
+       nbit = 0;
+       pack(bits, &nbit, Wo_index, WO_BITS);
+       for(i=0; i<LPC_ORD; i++) {
+           pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+       }
+       pack(bits, &nbit, lpc_correction, 1);
+       pack(bits, &nbit, energy_index, E_BITS);
+       pack(bits, &nbit, voiced1, 1);
+       pack(bits, &nbit, voiced2, 1);
+ 
+       nbit = 0;
+       Wo_index = unpack(bits, &nbit, WO_BITS);
+       for(i=0; i<LPC_ORD; i++) {
+           lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+       }
+       lpc_correction = unpack(bits, &nbit, 1);
+       energy_index = unpack(bits, &nbit, E_BITS);
+       voiced1 = unpack(bits, &nbit, 1);
+       voiced2 = unpack(bits, &nbit, 1);
+
+       model.Wo = decode_Wo(Wo_index);
+       model.L = PI/model.Wo;
+       decode_amplitudes(&model, 
+                         ak,
+                         lsp_indexes,
+                         lpc_correction, 
+                         energy_index);
+
+       model.voiced = voiced2;
+       model_interp.voiced = voiced1;
+       interpolate(&model_interp, &c3->prev_model, &model);
+
+       synthesise_one_frame(c3,  buf,     &model_interp, ak);
+       synthesise_one_frame(c3, &buf[N],  &model, ak);
+
+       memcpy(&c3->prev_model, &model, sizeof(MODEL));
+       fwrite(buf, sizeof(short), 2*N, fout);
+    }
+
+    codec2_destroy(c2);
+
+    fclose(fin);
+    fclose(fout);
+
+    return 0;
+}
+
+int test3()
+{
+    FILE   *fin, *fout, *fbits;
+    short   buf1[2*N];
+    short   buf2[2*N];
+    char    bits[CODEC2_BITS_PER_FRAME];
+    void   *c2;
+
+    c2 = codec2_create();
+
+    fin = fopen("../raw/hts1a.raw", "rb");
+    assert(fin != NULL);
+    fout = fopen("hts1a_test.raw", "wb");
+    assert(fout != NULL);
+    fbits = fopen("hts1a_test3.bit", "wb");
+    assert(fout != NULL);
+
+    while(fread(buf1, sizeof(short), 2*N, fin) == 2*N) {
+       codec2_encode(c2, bits, buf1);
+       fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fbits);
+       codec2_decode(c2, buf2, bits);
+       fwrite(buf2, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fout);
+    }
+
+    codec2_destroy(c2);
+
+    fclose(fin);
+    fclose(fout);
+    fclose(fbits);
+
+    return 0;
+}
+
+int main() {
+    test3();
+    return 0;
+}
author	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)
committer	drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
	Tue, 24 Aug 2010 07:02:39 +0000 (07:02 +0000)
codec2/src/c2dec.c		patch \| blob \| history
codec2/src/c2enc.c		patch \| blob \| history
codec2/src/c2sim.c		patch \| blob \| history
codec2/src/codec2.c		patch \| blob \| history
codec2/src/quantise.c		patch \| blob \| history
codec2/src/quantise.h		patch \| blob \| history
codec2/unittest/Makefile		patch \| blob \| history
codec2/unittest/tcodec2.c	[new file with mode: 0644]	patch \| blob