From 3b54d50ae64f8dd81ccaaab74eef5798fc34ae9f Mon Sep 17 00:00:00 2001 From: drowe67 Date: Mon, 23 Aug 2010 10:03:18 +0000 Subject: [PATCH] first pass at c2enc and c2dec, all builds OK but now need to unit test quantise.c and codec2.c functions git-svn-id: https://svn.code.sf.net/p/freetel/code@183 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2/src/Makefile | 16 ++- codec2/src/c2dec.c | 74 +++++++++++++ codec2/src/c2enc.c | 75 +++++++++++++ codec2/src/codec2.c | 226 +++++++++++++++++++++++---------------- codec2/src/codec2.h | 2 +- codec2/src/quantise.c | 10 +- codec2/src/quantise.h | 13 ++- codec2/unittest/tquant.c | 47 +++++++- 8 files changed, 360 insertions(+), 103 deletions(-) create mode 100644 codec2/src/c2dec.c create mode 100644 codec2/src/c2enc.c diff --git a/codec2/src/Makefile b/codec2/src/Makefile index d59afe93..a207c6f0 100644 --- a/codec2/src/Makefile +++ b/codec2/src/Makefile @@ -2,13 +2,25 @@ CC=gcc CFLAGS=-g -Wall -I. -I../src -Wall -g -DFLOATING_POINT -DVAR_ARRAYS C2SIM_OBJ = sine.o nlp.o four1.o dump.o quantise.o lpc.o lsp.o phase.o \ - postfilter.o interp.o c2sim.o + postfilter.o interp.o codec2.o c2sim.o -all: c2sim +C2ENC_OBJ = sine.o nlp.o four1.o dump.o quantise.o lpc.o lsp.o phase.o \ + postfilter.o interp.o codec2.o c2enc.o + +C2DEC_OBJ = sine.o nlp.o four1.o dump.o quantise.o lpc.o lsp.o phase.o \ + postfilter.o interp.o codec2.o c2dec.o + +all: c2sim c2enc c2dec c2sim: $(C2SIM_OBJ) $(CC) $(CFLAGS) $(C2SIM_OBJ) -o c2sim -lm +c2enc: $(C2ENC_OBJ) + $(CC) $(CFLAGS) $(C2ENC_OBJ) -o c2enc -lm + +c2dec: $(C2DEC_OBJ) + $(CC) $(CFLAGS) $(C2DEC_OBJ) -o c2dec -lm + %.o : %.c $(CC) -c $(CFLAGS) $< -o $@ diff --git a/codec2/src/c2dec.c b/codec2/src/c2dec.c new file mode 100644 index 00000000..815a7746 --- /dev/null +++ b/codec2/src/c2dec.c @@ -0,0 +1,74 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: c2dec.c + AUTHOR......: David Rowe + DATE CREATED: 23/8/2010 + + Decodes a file of bits to a file of raw speech samples using codec2. Demo + program for codec2. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2010 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "codec2.h" + +#include +#include + +int main(int argc, char *argv[]) +{ + void *codec2; + FILE *fin; + FILE *fout; + short buf[CODEC2_SAMPLES_PER_FRAME]; + char bits[CODEC2_BITS_PER_FRAME]; + + if (argc != 3) { + printf("usage: %s InputBitFile OutputRawSpeechFile\n", argv[0]); + exit(0); + } + + fin = fopen(argv[1],"rb"); + if (fin == NULL) { + printf("Error opening input bit file: %s\n", argv[1]); + exit(0); + } + + fout = fopen(argv[2],"wb"); + if (fout == NULL) { + printf("Error opening output speech file: %s\n", argv[2]); + exit(0); + } + + codec2 = codec2_create(); + + while(fread(bits, sizeof(buf), CODEC2_BITS_PER_FRAME, fin) == + CODEC2_BITS_PER_FRAME) { + codec2_decode(codec2, buf, bits); + fwrite(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fout); + } + + codec2_destroy(codec2); + + fclose(fin); + fclose(fout); + + return 0; +} diff --git a/codec2/src/c2enc.c b/codec2/src/c2enc.c new file mode 100644 index 00000000..f4d8a6db --- /dev/null +++ b/codec2/src/c2enc.c @@ -0,0 +1,75 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: c2enc.c + AUTHOR......: David Rowe + DATE CREATED: 23/8/2010 + + Encodes a file of raw speech samples using codec2 and ouputs a file + of bits (each bit is stored in the LSB or each output byte). Demo + program for codec2. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2010 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "codec2.h" + +#include +#include + +int main(int argc, char *argv[]) +{ + void *codec2; + FILE *fin; + FILE *fout; + short buf[CODEC2_SAMPLES_PER_FRAME]; + char bits[CODEC2_BITS_PER_FRAME]; + + if (argc != 3) { + printf("usage: %s InputRawspeechFile OutputBitFile\n", argv[0]); + exit(0); + } + + fin = fopen(argv[1],"rb"); + if (fin == NULL) { + printf("Error opening input speech file: %s\n", argv[1]); + exit(0); + } + + fout = fopen(argv[2],"wb"); + if (fout == NULL) { + printf("Error opening output bit file: %s\n", argv[2]); + exit(0); + } + + codec2 = codec2_create(); + + while(fread(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fin) == + CODEC2_SAMPLES_PER_FRAME) { + codec2_encode(codec2, bits, buf); + fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fout); + } + + codec2_destroy(codec2); + + fclose(fin); + fclose(fout); + + return 0; +} diff --git a/codec2/src/codec2.c b/codec2/src/codec2.c index e288c90d..f2b80f1a 100644 --- a/codec2/src/codec2.c +++ b/codec2/src/codec2.c @@ -5,7 +5,7 @@ DATE CREATED: 21/8/2010 Codec2 fully quantised encoder and decoder functions. If you want use - codec2, these are the functions you need to call. + codec2, the codec2_xxx functions are for you. \*---------------------------------------------------------------------------*/ @@ -40,8 +40,9 @@ #include "lpc.h" #include "quantise.h" #include "phase.h" -#include "postfilter.h" #include "interp.h" +#include "postfilter.h" +#include "codec2.h" typedef struct { float Sn[M]; /* input speech */ @@ -52,9 +53,24 @@ typedef struct { float prev_Wo; /* previous frame's pitch estimate */ float ex_phase; /* excitation model phase track */ float bg_est; /* background noise estimate for post filter */ - MODEL *prev_model; /* model parameters from 20ms ago */ + MODEL prev_model; /* model parameters from 20ms ago */ } CODEC2; +/*---------------------------------------------------------------------------*\ + + FUNCTION HEADERS + +\*---------------------------------------------------------------------------*/ + +void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[]); +void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model,float ak[]); + +/*---------------------------------------------------------------------------*\ + + FUNCTIONS + +\*---------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------*\ FUNCTION....: codec2_create @@ -85,7 +101,7 @@ void *codec2_create() for(l=1; l<=MAX_AMP; l++) c2->prev_model.A[l] = 0.0; - c2->prev_model.Wo = = TWO_PI/P_MAX; + c2->prev_model.Wo = TWO_PI/P_MAX; return (void*)c2; } @@ -129,7 +145,7 @@ void codec2_destroy(void *codec2_state) Harmonic magnitudes (LSPs) 36 Low frequency LPC correction 1 Energy 5 - Pitch (fundamental frequnecy) 7 + Wo (fundamental frequnecy) 7 Voicing (10ms update) 2 TOTAL 51 @@ -138,73 +154,44 @@ void codec2_destroy(void *codec2_state) void codec2_encode(void *codec2_state, char bits[], short speech[]) { CODEC2 *c2; - COMP Sw[FFT_ENC]; - COMP Sw_[FFT_ENC]; MODEL model; - float pitch; int voiced1, voiced2; - int i, nbits; + int lsp_indexes[LPC_ORD]; + int lpc_correction; + int energy_index; + int Wo_index; + int i, nbit = 0; assert(codec2_state != NULL); c2 = (CODEC2*)codec2_state; - /* First Frame - just send voicing ----------------------------------*/ - - /* Read input speech */ - - for(i=0; iSn[i] = c2->Sn[i+N]; - for(i=0; iSn[i+M-N] = speech[i]; - dft_speech(Sw, c2->Sn, c2->w); + /* first 10ms analysis frame - we just want voicing */ - /* Estimate pitch */ - - nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); - c2->prev_Wo = TWO_PI/pitch; - model.Wo = TWO_PI/pitch; - model.L = PI/model.Wo; - - /* estimate model parameters */ - - dft_speech(Sw, c2->Sn, c2->w); - two_stage_pitch_refinement(&model, Sw); - estimate_amplitudes(&model, Sw, c2->W); - est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_); + analyse_one_frame(c2, &model, speech); voiced1 = model.voiced; - /* Second Frame - send all parameters --------------------------------*/ - - /* Read input speech */ - - for(i=0; iSn[i] = c2->Sn[i+N]; - for(i=0; iSn[i+M-N] = speech[i+N]; - dft_speech(Sw, c2->Sn, c2->w); - - /* Estimate pitch */ - - nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); - c2->prev_Wo = TWO_PI/pitch; - model.Wo = TWO_PI/pitch; - model.L = PI/model.Wo; + /* second 10ms analysis frame */ - /* estimate model parameters */ - - dft_speech(Sw, c2->Sn, c2->w); - two_stage_pitch_refinement(&model, Sw); - estimate_amplitudes(&model, Sw, c2->W); - est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_); + analyse_one_frame(c2, &model, &speech[N]); voiced2 = model.voiced; - - /* quantise */ - nbits = 0; - encode_Wo(bits, &nbits, model.Wo); - encode_voicing(bits, &nbits, voiced1, voiced2); - encode_amplitudes(bits, &nbits, c2->Sn, c2->w); - assert(nbits == CODEC2_BITS_PER_FRAME); + Wo_index = encode_Wo(model.Wo); + encode_amplitudes(lsp_indexes, + &lpc_correction, + &energy_index, + &model, + c2->Sn, + c2->w); + + pack(bits, &nbit, Wo_index, WO_BITS); + for(i=0; iprev_model, &model); - /* First synthesis frame - interpolated from adjacent frames */ + synthesise_one_frame(c2, speech, &model_interp, ak); + synthesise_one_frame(c2, &speech[N], &model, ak); - model_interp.voiced = voiced1; - interp(&model_interp, &c2->prev_model, &model); - phase_synth_zero_order(&model_interp, ak, voiced1, &c2->ex_phase); - postfilter(&model_interp, voiced1, &c2->bg_est); - synthesise(c2->Sn_, &model_interp, c2->Pn, 1); + memcpy(&c2->prev_model, &model, sizeof(MODEL)); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: synthesise_one_frame() + AUTHOR......: David Rowe + DATE CREATED: 23/8/2010 + + Synthesise 80 speech samples (10ms) from model parameters. + +\*---------------------------------------------------------------------------*/ + +void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model, float ak[]) +{ + int i; + + phase_synth_zero_order(model, ak, &c2->ex_phase); + postfilter(model, &c2->bg_est); + synthesise(c2->Sn_, model, c2->Pn, 1); for(i=0; i 32767.0) + if (c2->Sn_[i] > 32767.0) speech[i] = 32767; - else if (Sn_[i] < -32767.0) + else if (c2->Sn_[i] < -32767.0) speech[i] = -32767; else - speech[i] = Sn_[i]; + speech[i] = c2->Sn_[i]; } - /* Second synthesis frame */ +} - model.voiced = voiced2; - phase_synth_zero_order(&model, ak, voiced2, &c2->ex_phase); - postfilter(&model, voiced2, &c2->bg_est); - synthesise(c2->Sn_, &model, c2->Pn, 1); +/*---------------------------------------------------------------------------*\ + + FUNCTION....: analyse_one_frame() + AUTHOR......: David Rowe + DATE CREATED: 23/8/2010 - for(i=0; i 32767.0) - speech[i+N] = 32767; - else if (Sn_[i] < -32767.0) - speech[i+N] = -32767; - else - speech[i+N] = Sn_[i]; - } + Extract sinusoidal model parameters from 80 speech samples (10ms of + speech). + +\*---------------------------------------------------------------------------*/ - memcpy(&c2->prev_model, model, sizeof(MODEL); -} +void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[]) +{ + COMP Sw[FFT_ENC]; + COMP Sw_[FFT_ENC]; + float pitch; + int i; + + /* Read input speech */ + + for(i=0; iSn[i] = c2->Sn[i+N]; + for(i=0; iSn[i+M-N] = speech[i]; + dft_speech(Sw, c2->Sn, c2->w); + + /* Estimate pitch */ + + nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); + c2->prev_Wo = TWO_PI/pitch; + model->Wo = TWO_PI/pitch; + model->L = PI/model->Wo; + + /* estimate model parameters */ + dft_speech(Sw, c2->Sn, c2->w); + two_stage_pitch_refinement(model, Sw); + estimate_amplitudes(model, Sw, c2->W); + est_voicing_mbe(model, Sw, c2->W, (FS/TWO_PI)*model->Wo, Sw_); +} diff --git a/codec2/src/codec2.h b/codec2/src/codec2.h index 8c3d4043..6427a9fe 100644 --- a/codec2/src/codec2.h +++ b/codec2/src/codec2.h @@ -36,6 +36,6 @@ void *codec2_create(); void codec2_destroy(void *codec2_state); void codec2_encode(void *codec2_state, char bits[], short speech_in[]); -void codec2_decode((void *codec2_state, int speech_out[], char bits[]); +void codec2_decode(void *codec2_state, short speech_out[], char bits[]); #endif diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c index a21a4735..f44a099e 100644 --- a/codec2/src/quantise.c +++ b/codec2/src/quantise.c @@ -45,7 +45,7 @@ typedef struct { int k; /* dimension of vector */ - int log2k; /* number of bits in dimension */ + int log2m; /* number of bits in m */ int m; /* elements in codebook */ char *fn; /* file name of text file storing the VQ */ } LSP_CB; @@ -97,6 +97,10 @@ float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], \*---------------------------------------------------------------------------*/ +int lsp_bits(int i) { + return lsp_q[i].log2m; +} + /*---------------------------------------------------------------------------*\ quantise_uniform @@ -774,6 +778,7 @@ int encode_energy(float e) float e_max = E_MAX_DB; float norm; + e = 10.0*log10(e); norm = (e - e_min)/(e_max - e_min); index = floor(E_LEVELS * norm + 0.5); if (index < 0 ) index = 0; @@ -801,6 +806,7 @@ float decode_energy(int index) step = (e_max - e_min)/E_LEVELS; e = e_min + step*(index); + e = pow(10.0,e/10.0); return e; } @@ -847,13 +853,13 @@ void encode_amplitudes(int lsp_indexes[], \*---------------------------------------------------------------------------*/ float decode_amplitudes(MODEL *model, + float ak[], int lsp_indexes[], int lpc_correction, int energy_index ) { float lsps[LPC_ORD]; - float ak[LPC_ORD+1]; float e; float snr; diff --git a/codec2/src/quantise.h b/codec2/src/quantise.h index 86b240f1..6fc94f3e 100644 --- a/codec2/src/quantise.h +++ b/codec2/src/quantise.h @@ -56,12 +56,15 @@ void encode_amplitudes(int lsp_indexes[], float Sn[], float w[]); -float decode_amplitudes(MODEL *model, - int lsp_indexes[], - int lpc_correction, - int energy_index); +float decode_amplitudes(MODEL *model, + float ak[], + int lsp_indexes[], + int lpc_correction, + int energy_index); void pack(char bits[], int *nbit, int index, int index_bits); -int unpack(char bits[], int *nbit, int index_bits); +int unpack(char bits[], int *nbit, int index_bits); + +int lsp_bits(int i); #endif diff --git a/codec2/unittest/tquant.c b/codec2/unittest/tquant.c index 868d55a9..85d3a1e9 100644 --- a/codec2/unittest/tquant.c +++ b/codec2/unittest/tquant.c @@ -39,11 +39,13 @@ int test_Wo_quant(); int test_lsp_quant(); int test_lsp(int lsp_number, int levels, float max_error_hz); +int test_energy_quant(int levels, float max_error_dB); int main() { quantise_init(); test_Wo_quant(); test_lsp_quant(); + test_energy_quant(E_LEVELS, 0.5*(E_MAX_DB - E_MIN_DB)/E_LEVELS); return 0; } @@ -63,6 +65,45 @@ int test_lsp_quant() { return 0; } +int test_energy_quant(int levels, float max_error_dB) { + FILE *fe; + float e,e_dec, error, low_e, high_e; + int index, index_in, index_out, i; + + /* check 1:1 match between input and output levels */ + + for(i=0; i max_error_dB) { + printf("error: %f %f\n", error, max_error_dB); + exit(0); + } + } + + fclose(fe); + return 0; +} + int test_lsp(int lsp_number, int levels, float max_error_hz) { float lsp[LPC_ORD]; int indexes_in[LPC_ORD]; @@ -117,6 +158,8 @@ int test_lsp(int lsp_number, int levels, float max_error_hz) { fclose(flsp); + printf("OK\n"); + return 0; } @@ -126,7 +169,7 @@ int test_Wo_quant() { float Wo,Wo_dec, error, step_size; int index, index_in, index_out; - /* output pitch quant curve for plotting */ + /* output Wo quant curve for plotting */ f = fopen("quant_pitch.txt","wt"); @@ -137,7 +180,7 @@ int test_Wo_quant() { fclose(f); - /* check for all pitch codes we get 1:1 match between encoder + /* check for all Wo codes we get 1:1 match between encoder and decoder Wo levels */ for(c=0; c