From 3d047bded62dec1f6bfb3baea4fa3340185f5446 Mon Sep 17 00:00:00 2001 From: drowe67 Date: Thu, 13 Sep 2012 06:43:43 +0000 Subject: [PATCH] removed 4800 and 3600 and added 3200 bits/s mode git-svn-id: https://svn.code.sf.net/p/freetel/code@685 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/src/Makefile.am | 3 - codec2-dev/src/Makefile.in | 3 - codec2-dev/src/c2dec.c | 10 +- codec2-dev/src/c2enc.c | 10 +- codec2-dev/src/c2sim.c | 26 +- codec2-dev/src/codebook/dlsp1.txt | 11 +- codec2-dev/src/codebook/dlsp10.txt | 29 +- codec2-dev/src/codebook/dlsp2.txt | 12 +- codec2-dev/src/codebook/dlsp3.txt | 23 +- codec2-dev/src/codebook/dlsp4.txt | 32 +- codec2-dev/src/codebook/dlsp5.txt | 31 +- codec2-dev/src/codebook/dlsp6.txt | 31 +- codec2-dev/src/codebook/dlsp7.txt | 33 +- codec2-dev/src/codebook/dlsp8.txt | 30 +- codec2-dev/src/codebook/dlsp9.txt | 30 +- codec2-dev/src/codebook/lspvqexp1.txt | 2049 +++++++++++++++++++++++++ codec2-dev/src/codebook/lspvqexp2.txt | 2049 +++++++++++++++++++++++++ codec2-dev/src/codebook/lspvqexp3.txt | 2049 +++++++++++++++++++++++++ codec2-dev/src/codebookd.c | 286 +++- codec2-dev/src/codec2.c | 292 ++-- codec2-dev/src/codec2.h | 9 +- codec2-dev/src/listensim.sh | 2 +- codec2-dev/src/quantise.c | 42 +- codec2-dev/src/quantise.h | 4 +- 24 files changed, 6744 insertions(+), 352 deletions(-) create mode 100644 codec2-dev/src/codebook/lspvqexp1.txt create mode 100644 codec2-dev/src/codebook/lspvqexp2.txt create mode 100644 codec2-dev/src/codebook/lspvqexp3.txt diff --git a/codec2-dev/src/Makefile.am b/codec2-dev/src/Makefile.am index c73a54a1..d983de95 100644 --- a/codec2-dev/src/Makefile.am +++ b/codec2-dev/src/Makefile.am @@ -102,9 +102,6 @@ codebookvq.c: generate_codebook $(CODEBOOKSVQ) codebookjnd.c: generate_codebook $(CODEBOOKSJND) ./generate_codebook lsp_cbjnd $(CODEBOOKSJND) > codebookjnd.c -codebookdt.c: generate_codebook $(CODEBOOKSDT) - ./generate_codebook lsp_cbdt $(CODEBOOKSDT) > codebookdt.c - codebookjvm.c: generate_codebook $(CODEBOOKSJVM) ./generate_codebook lsp_cbjvm $(CODEBOOKSJVM) > codebookjvm.c diff --git a/codec2-dev/src/Makefile.in b/codec2-dev/src/Makefile.in index a015bc35..1233e426 100644 --- a/codec2-dev/src/Makefile.in +++ b/codec2-dev/src/Makefile.in @@ -948,9 +948,6 @@ codebookvq.c: generate_codebook $(CODEBOOKSVQ) codebookjnd.c: generate_codebook $(CODEBOOKSJND) ./generate_codebook lsp_cbjnd $(CODEBOOKSJND) > codebookjnd.c -codebookdt.c: generate_codebook $(CODEBOOKSDT) - ./generate_codebook lsp_cbdt $(CODEBOOKSDT) > codebookdt.c - codebookjvm.c: generate_codebook $(CODEBOOKSJVM) ./generate_codebook lsp_cbjvm $(CODEBOOKSJVM) > codebookjvm.c diff --git a/codec2-dev/src/c2dec.c b/codec2-dev/src/c2dec.c index a72e91b9..0a5945e6 100644 --- a/codec2-dev/src/c2dec.c +++ b/codec2-dev/src/c2dec.c @@ -44,15 +44,13 @@ int main(int argc, char *argv[]) float ber, r; if (argc < 4) { - printf("usage: c2dec 4800|3600|2400|1400|1200 InputBitFile OutputRawSpeechFile\n"); + printf("usage: c2dec 3200|2400|1400|1200 InputBitFile OutputRawSpeechFile\n"); printf("e.g c2dec 1400 hts1a.c2 hts1a_1400.raw\n"); exit(1); } - if (strcmp(argv[1],"4800") == 0) - mode = CODEC2_MODE_4800; - else if (strcmp(argv[1],"3600") == 0) - mode = CODEC2_MODE_3600; + if (strcmp(argv[1],"3200") == 0) + mode = CODEC2_MODE_3200; else if (strcmp(argv[1],"2400") == 0) mode = CODEC2_MODE_2400; else if (strcmp(argv[1],"1400") == 0) @@ -60,7 +58,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1],"1200") == 0) mode = CODEC2_MODE_1200; else { - fprintf(stderr, "Error in mode: %s. Must be 4800, 3600, 2400, 1400 or 1200\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 4800, 3200, 2400, 1400 or 1200\n", argv[1]); exit(1); } diff --git a/codec2-dev/src/c2enc.c b/codec2-dev/src/c2enc.c index d4a22dc7..d171c39a 100644 --- a/codec2-dev/src/c2enc.c +++ b/codec2-dev/src/c2enc.c @@ -44,15 +44,13 @@ int main(int argc, char *argv[]) int nsam, nbit, nbyte; if (argc != 4) { - printf("usage: c2enc 4800|3600|2400|1400|1200 InputRawspeechFile OutputBitFile\n"); + printf("usage: c2enc 3200|2400|1400|1200 InputRawspeechFile OutputBitFile\n"); printf("e.g c2enc 1400 ../raw/hts1a.raw hts1a.c2\n"); exit(1); } - if (strcmp(argv[1],"4800") == 0) - mode = CODEC2_MODE_4800; - else if (strcmp(argv[1],"3600") == 0) - mode = CODEC2_MODE_3600; + if (strcmp(argv[1],"3200") == 0) + mode = CODEC2_MODE_3200; else if (strcmp(argv[1],"2400") == 0) mode = CODEC2_MODE_2400; else if (strcmp(argv[1],"1400") == 0) @@ -60,7 +58,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1],"1200") == 0) mode = CODEC2_MODE_1200; else { - fprintf(stderr, "Error in mode: %s. Must be 4800, 3600, 2400, 1400 or 1200\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1400 or 1200\n", argv[1]); exit(1); } diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 287efbd6..3e5d8c97 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) float bg_est; int hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0; - int lpcpf; + int lpcpf = 0; FILE *fvoicing = 0; MODEL prev_model, interp_model; @@ -247,17 +247,17 @@ int main(int argc, char *argv[]) } else if(strcmp(long_options[option_index].name, "ampexp") == 0) { strcpy(ampexp_arg, optarg); } else if(strcmp(long_options[option_index].name, "rate") == 0) { - if(strcmp(optarg,"4800") == 0) { + if(strcmp(optarg,"3200") == 0) { lpc_model = 1; order = 10; - vector_quant_Wo_e = 1; - lsp = 1; + scalar_quant_Wo_e = 1; + lspd = 1; phase0 = 1; postfilt = 1; - decimate = 0; + decimate = 1; lpcpf = 1; } else if(strcmp(optarg,"2400") == 0) { lpc_model = 1; order = 10; - scalar_quant_Wo_e = 1; + vector_quant_Wo_e = 1; lsp = 1; phase0 = 1; postfilt = 1; @@ -493,8 +493,8 @@ int main(int argc, char *argv[]) } if (lspd) { - lspd_quantise(lsps, lsps_, LPC_ORD); - //bw_expand_lsps(lsps_, LPC_ORD); + encode_lspds_scalar(lsp_indexes, lsps, LPC_ORD); + decode_lspds_scalar(lsps_, lsp_indexes, LPC_ORD); lsp_to_lpc(lsps_, ak, LPC_ORD); } @@ -537,7 +537,7 @@ int main(int argc, char *argv[]) for(i=0; imode == CODEC2_MODE_4800) - return 48; - if (c2->mode == CODEC2_MODE_3600) - return 72; + if (c2->mode == CODEC2_MODE_3200) + return 64; if (c2->mode == CODEC2_MODE_2400) return 48; if (c2->mode == CODEC2_MODE_1400) @@ -195,9 +190,7 @@ int CODEC2_WIN32SUPPORT codec2_bits_per_frame(struct CODEC2 *c2) { \*---------------------------------------------------------------------------*/ int CODEC2_WIN32SUPPORT codec2_samples_per_frame(struct CODEC2 *c2) { - if (c2->mode == CODEC2_MODE_4800) - return 80; - if (c2->mode == CODEC2_MODE_3600) + if (c2->mode == CODEC2_MODE_3200) return 160; if (c2->mode == CODEC2_MODE_2400) return 160; @@ -213,17 +206,14 @@ void CODEC2_WIN32SUPPORT codec2_encode(struct CODEC2 *c2, unsigned char *bits, s { assert(c2 != NULL); assert( - (c2->mode == CODEC2_MODE_4800) || - (c2->mode == CODEC2_MODE_3600) || + (c2->mode == CODEC2_MODE_3200) || (c2->mode == CODEC2_MODE_2400) || (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1200) ); - if (c2->mode == CODEC2_MODE_4800) - codec2_encode_4800(c2, bits, speech); - if (c2->mode == CODEC2_MODE_3600) - codec2_encode_3600(c2, bits, speech); + if (c2->mode == CODEC2_MODE_3200) + codec2_encode_3200(c2, bits, speech); if (c2->mode == CODEC2_MODE_2400) codec2_encode_2400(c2, bits, speech); if (c2->mode == CODEC2_MODE_1400) @@ -236,17 +226,14 @@ void CODEC2_WIN32SUPPORT codec2_decode(struct CODEC2 *c2, short speech[], const { assert(c2 != NULL); assert( - (c2->mode == CODEC2_MODE_4800) || - (c2->mode == CODEC2_MODE_3600) || + (c2->mode == CODEC2_MODE_3200) || (c2->mode == CODEC2_MODE_2400) || (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1200) ); - if (c2->mode == CODEC2_MODE_4800) - codec2_decode_4800(c2, speech, bits); - if (c2->mode == CODEC2_MODE_3600) - codec2_decode_3600(c2, speech, bits); + if (c2->mode == CODEC2_MODE_3200) + codec2_decode_3200(c2, speech, bits); if (c2->mode == CODEC2_MODE_2400) codec2_decode_2400(c2, speech, bits); if (c2->mode == CODEC2_MODE_1400) @@ -255,54 +242,68 @@ void CODEC2_WIN32SUPPORT codec2_decode(struct CODEC2 *c2, short speech[], const codec2_decode_1200(c2, speech, bits); } + /*---------------------------------------------------------------------------*\ - FUNCTION....: codec2_encode_4800 + FUNCTION....: codec2_encode_3200 AUTHOR......: David Rowe - DATE CREATED: Sep 12 2012 + DATE CREATED: 13 Sep 2012 + + Encodes 160 speech samples (20ms of speech) into 64 bits. - Encodes 80 speech samples (10ms of speech) into 48 bits. + The codec2 algorithm actually operates internally on 10ms (80 + sample) frames, so we run the encoding algorithm twice. On the + first frame we just send the voicing bits. On the second frame we + send all model parameters. Compared to 2400 we use a larger number + of bits for the LSPs and non-VQ pitch and energy. The bit allocation is: Parameter bits/frame -------------------------------------- - Harmonic magnitudes (LSPs) 36 - Joint VQ of Energy and Wo 8 - Voicing 1 - Spare 3 - TOTAL 48 + Harmonic magnitudes (LSPs) 50 + Pitch (Wo) 7 + Energy 5 + Voicing (10ms update) 2 + TOTAL 64 \*---------------------------------------------------------------------------*/ -void codec2_encode_4800(struct CODEC2 *c2, unsigned char * bits, short speech[]) +void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) { MODEL model; float ak[LPC_ORD+1]; float lsps[LPC_ORD]; float e; - int WoE_index; - int lsp_indexes[LPC_ORD]; + int Wo_index, e_index; + int lspd_indexes[LPC_ORD]; int i; - int spare = 0; unsigned int nbit = 0; assert(c2 != NULL); memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); + /* first 10ms analysis frame - we just want voicing */ + analyse_one_frame(c2, &model, speech); pack(bits, &nbit, model.voiced, 1); - + + /* second 10ms analysis frame */ + + analyse_one_frame(c2, &model, &speech[N]); + pack(bits, &nbit, model.voiced, 1); + Wo_index = encode_Wo(model.Wo); + pack(bits, &nbit, Wo_index, WO_BITS); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); - WoE_index = encode_WoE(&model, e, c2->xq_enc); - pack(bits, &nbit, WoE_index, WO_E_BITS); + e_index = encode_energy(e); + pack(bits, &nbit, e_index, E_BITS); - encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); - for(i=0; ixq_dec, WoE_index); - - for(i=0; ifft_fwd_cfg, ak, LPC_ORD, &model, e, &snr, 0, 0, 1); - apply_lpc_correction(&model); - - synthesise_one_frame(c2, speech, &model, ak); -} - - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: codec2_encode_3600 - AUTHOR......: David Rowe - DATE CREATED: 12 Sep 2012 - - Encodes 160 speech samples (20ms of speech) into 72 bits. - - The codec2 algorithm actually operates internally on 10ms (80 - sample) frames, so we run the encoding algorithm twice. On the - first frame we just send the full LSPs, on the 2nd frame the LSP - differences from frame 1. - - The bit allocation is: - - Parameter frame 1 frame 2 Total - ------------------------------------------------------ - Harmonic magnitudes (LSPs) 36 18 54 - Joint VQ of Energy and Wo 8 8 16 - Voicing 1 1 2 - TOTAL 45 27 72 - -\*---------------------------------------------------------------------------*/ - -void codec2_encode_3600(struct CODEC2 *c2, unsigned char * bits, short speech[]) -{ - MODEL model; - float ak[LPC_ORD+1]; - float lsps1[LPC_ORD]; - float lsps2[LPC_ORD]; - float e; - int WoE_index; - int lsp_indexes[LPC_ORD]; - int i; - unsigned int nbit = 0; - - assert(c2 != NULL); + /* this will partially fill the model params for the 2 x 10ms + frames */ - memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); + model[0].voiced = unpack(bits, &nbit, 1); + model[1].voiced = unpack(bits, &nbit, 1); - /* first 10ms analysis frame - full LSP */ + Wo_index = unpack(bits, &nbit, WO_BITS); + model[1].Wo = decode_Wo(Wo_index); + model[1].L = PI/model[1].Wo; - analyse_one_frame(c2, &model, speech); - pack(bits, &nbit, model.voiced, 1); - e = speech_to_uq_lsps(lsps1, ak, c2->Sn, c2->w, LPC_ORD); - WoE_index = encode_WoE(&model, e, c2->xq_enc); - pack(bits, &nbit, WoE_index, WO_E_BITS); + e_index = unpack(bits, &nbit, E_BITS); + e[1] = decode_energy(e_index); - encode_lsps_scalar(lsp_indexes, lsps1, LPC_ORD); - decode_lsps_scalar(lsps1, lsp_indexes, LPC_ORD); - for(i=0; iSn, c2->w, LPC_ORD); - WoE_index = encode_WoE(&model, e, c2->xq_enc); - pack(bits, &nbit, WoE_index, WO_E_BITS); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1]); + e[0] = interp_energy(c2->prev_e_dec, e[1]); + + /* LSPs are sampled every 20ms so we interpolate the frame in + between, then recover spectral amplitudes */ - encode_lsps_diff_time(lsp_indexes, lsps2, lsps1, LPC_ORD); - for(i=0; iprev_lsps_dec, &lsps[1][0], 0.5); + for(i=0; i<2; i++) { + lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); + aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, 1); + apply_lpc_correction(&model[i]); } - assert(nbit == (unsigned)codec2_bits_per_frame(c2)); -} - - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: codec2_decode_3600 - AUTHOR......: David Rowe - DATE CREATED: 12 Sep 2012 - - Decodes frames of 72 bits into 160 samples (20ms) of speech. - -\*---------------------------------------------------------------------------*/ - -void codec2_decode_3600(struct CODEC2 *c2, short speech[], const unsigned char * bits) -{ - MODEL model; - int lsp_indexes[LPC_ORD]; - float lsps1[LPC_ORD]; - float lsps2[LPC_ORD]; - int WoE_index; - float e; - float snr; - float ak[LPC_ORD+1]; - int i,j; - unsigned int nbit = 0; - - assert(c2 != NULL); - - /* only need to zero these out due to (unused) snr calculation */ - - for(j=1; j<=MAX_AMP; j++) - model.A[j] = 0.0; - - /* frame 1 - full LSPs ---------------------------------------*/ - - model.voiced = unpack(bits, &nbit, 1); - WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model, &e, c2->xq_dec, WoE_index); + /* synthesise ------------------------------------------------*/ - for(i=0; ifft_fwd_cfg, ak, LPC_ORD, &model, e, &snr, 0, 0, 1); - apply_lpc_correction(&model); - synthesise_one_frame(c2, speech, &model, ak); - - /* frame 2 - delta LSPs --------------------------------------*/ + for(i=0; i<2; i++) + synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]); - model.voiced = unpack(bits, &nbit, 1); - WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model, &e, c2->xq_dec, WoE_index); + /* update memories for next frame ----------------------------*/ - for(i=0; ifft_fwd_cfg, ak, LPC_ORD, &model, e, &snr, 0, 0, 1); - apply_lpc_correction(&model); - synthesise_one_frame(c2, &speech[N], &model, ak); + c2->prev_model_dec = model[1]; + c2->prev_e_dec = e[1]; + for(i=0; iprev_lsps_dec[i] = lsps[1][i]; } @@ -505,7 +401,7 @@ void codec2_decode_3600(struct CODEC2 *c2, short speech[], const unsigned char * The codec2 algorithm actually operates internally on 10ms (80 sample) frames, so we run the encoding algorithm twice. On the - first frame we just send the voicing bit. One the second frame we + first frame we just send the voicing bit. On the second frame we send all model parameters. The bit allocation is: @@ -616,7 +512,7 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * interp_Wo(&model[0], &c2->prev_model_dec, &model[1]); e[0] = interp_energy(c2->prev_e_dec, e[1]); - /* LSPs are sampled every 40ms so we interpolate the frame in + /* LSPs are sampled every 20ms so we interpolate the frame in between, then recover spectral amplitudes */ interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5); diff --git a/codec2-dev/src/codec2.h b/codec2-dev/src/codec2.h index d4d0159f..9741449c 100644 --- a/codec2-dev/src/codec2.h +++ b/codec2-dev/src/codec2.h @@ -46,11 +46,10 @@ #define CODEC2_WIN32SUPPORT #endif -#define CODEC2_MODE_4800 0 -#define CODEC2_MODE_3600 1 -#define CODEC2_MODE_2400 2 -#define CODEC2_MODE_1400 3 -#define CODEC2_MODE_1200 4 +#define CODEC2_MODE_3200 0 +#define CODEC2_MODE_2400 1 +#define CODEC2_MODE_1400 2 +#define CODEC2_MODE_1200 3 struct CODEC2; diff --git a/codec2-dev/src/listensim.sh b/codec2-dev/src/listensim.sh index 5fd32267..72964d4f 100755 --- a/codec2-dev/src/listensim.sh +++ b/codec2-dev/src/listensim.sh @@ -4,6 +4,6 @@ # # Listen to files processed with sim.sh -../script/menu.sh $1_lpc10.raw $1_lsp.raw $1_jnd.raw $1_mel.raw +../script/menu.sh $1_lpcpf.raw $1_3200.raw $1_2400.raw $2 $3 diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c index a1a88d14..274c524e 100644 --- a/codec2-dev/src/quantise.c +++ b/codec2-dev/src/quantise.c @@ -130,16 +130,16 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se) /*---------------------------------------------------------------------------*\ - lspd_quantise + encode_lspds_scalar() Scalar/VQ LSP difference quantiser. \*---------------------------------------------------------------------------*/ -void lspd_quantise( - float lsp[], - float lsp_[], - int order +void encode_lspds_scalar( + int indexes[], + float lsp[], + int order ) { int i,k,m,index; @@ -150,7 +150,6 @@ void lspd_quantise( float wt[LPC_MAX]; const float *cb; float se; - int indexes[LPC_MAX]; assert(order == LPC_ORD); @@ -180,7 +179,35 @@ void lspd_quantise( indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se); dlsp_[i] = cb[indexes[i]*k]; - printf("%d dlsp %3.2f dlsp_ %3.2f\n", i, dlsp[i], dlsp_[i]); + //printf("%d dlsp %3.2f dlsp_ %3.2f\n", i, dlsp[i], dlsp_[i]); + + if (i) + lsp__hz[i] = lsp__hz[i-1] + dlsp_[i]; + else + lsp__hz[0] = dlsp_[0]; + } + +} + +void decode_lspds_scalar( + float lsp_[], + int indexes[], + int order +) +{ + int i,k,index; + float lsp__hz[LPC_MAX]; + float dlsp_[LPC_MAX]; + const float *cb; + float se; + + assert(order == LPC_ORD); + + for(i=0; i