From: drowe67 Date: Mon, 10 Aug 2015 09:59:12 +0000 (+0000) Subject: codec 2 mode 700b works, thinks its step up on 700, not quite as robust to bit errors X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=8ce7c189f1ea0577732b6e8782e616c9923bb1b7;p=freetel-svn-tracking.git codec 2 mode 700b works, thinks its step up on 700, not quite as robust to bit errors git-svn-id: https://svn.code.sf.net/p/freetel/code@2267 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2-dev/src/c2dec.c b/codec2-dev/src/c2dec.c index 8d186382..a6d9348c 100644 --- a/codec2-dev/src/c2dec.c +++ b/codec2-dev/src/c2dec.c @@ -92,8 +92,10 @@ int main(int argc, char *argv[]) mode = CODEC2_MODE_1200; else if (strcmp(argv[1],"700") == 0) mode = CODEC2_MODE_700; + else if (strcmp(argv[1],"700B") == 0) + mode = CODEC2_MODE_700B; else { - fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200, or 700\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200, 700 or 700B\n", argv[1]); exit(1); } bit_rate = atoi(argv[1]); diff --git a/codec2-dev/src/c2enc.c b/codec2-dev/src/c2enc.c index 5a000b86..2037dd72 100644 --- a/codec2-dev/src/c2enc.c +++ b/codec2-dev/src/c2enc.c @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) int bit, byte,i; if (argc < 4) { - printf("usage: c2enc 3200|2400|1600|1400|1300|1200|700 InputRawspeechFile OutputBitFile [--natural] [--softdec]\n"); + printf("usage: c2enc 3200|2400|1600|1400|1300|1200|700|700B InputRawspeechFile OutputBitFile [--natural] [--softdec]\n"); printf("e.g c2enc 1400 ../raw/hts1a.raw hts1a.c2\n"); printf("e.g c2enc 1300 ../raw/hts1a.raw hts1a.c2 --natural\n"); exit(1); @@ -66,8 +66,10 @@ int main(int argc, char *argv[]) mode = CODEC2_MODE_1200; else if (strcmp(argv[1],"700") == 0) mode = CODEC2_MODE_700; + else if (strcmp(argv[1],"700B") == 0) + mode = CODEC2_MODE_700B; else { - fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200 or 700\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200, 700 or 700B\n", argv[1]); exit(1); } diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 1dc5af2c..2a077a6a 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -128,6 +128,7 @@ int main(int argc, char *argv[]) void *nlp_states; float hpf_states[2]; int scalar_quant_Wo_e = 0; + int scalar_quant_Wo_e_low = 0; int vector_quant_Wo_e = 0; int dump_pitch_e = 0; FILE *fjvm = NULL; @@ -169,6 +170,7 @@ int main(int argc, char *argv[]) { "prede", no_argument, &prede, 1 }, { "dump_pitch_e", required_argument, &dump_pitch_e, 1 }, { "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 }, + { "sq_pitch_e_low", no_argument, &scalar_quant_Wo_e_low, 1 }, { "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 }, { "rate", required_argument, NULL, 0 }, { "gain", required_argument, NULL, 0 }, @@ -674,8 +676,9 @@ int main(int argc, char *argv[]) } if (lspmelvq) { + int indexes[3]; //lspmelvq_mse += lspmelvq_quantise(mel, mel, order); - lspmelvq_mse += lspmelvq_mbest_quantise(mel, mel, order, 5); + lspmelvq_mse += lspmelvq_mbest_encode(indexes, mel, mel, order, 5); } /* ensure no unstable filters after quantisation */ @@ -705,6 +708,13 @@ int main(int argc, char *argv[]) model.L = PI/model.Wo; /* if we quantise Wo re-compute L */ } + if (scalar_quant_Wo_e_low) { + int ind; + e = decode_energy(ind = encode_energy(e, 3), 3); + model.Wo = decode_log_Wo(encode_log_Wo(model.Wo, 5), 5); + model.L = PI/model.Wo; /* if we quantise Wo re-compute L */ + } + if (vector_quant_Wo_e) { /* JVM's experimental joint Wo & LPC energy quantiser */ @@ -869,6 +879,7 @@ void print_help(const struct option* long_options, int num_opts, char* argv[]) } fprintf(stderr, "\t--%s%s\n", long_options[i].name, option_parameters); } + exit(1); } diff --git a/codec2-dev/src/codec2.c b/codec2-dev/src/codec2.c index 8482fb87..a41827e0 100644 --- a/codec2-dev/src/codec2.c +++ b/codec2-dev/src/codec2.c @@ -46,6 +46,7 @@ #include "codec2_internal.h" #include "machdep.h" #include "bpf.h" +#include "bpfb.h" /*---------------------------------------------------------------------------*\ @@ -70,6 +71,8 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits); void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]); void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits); +void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]); +void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits); static void ear_protection(float in_out[], int n); /*---------------------------------------------------------------------------*\ @@ -108,7 +111,8 @@ struct CODEC2 * codec2_create(int mode) (mode == CODEC2_MODE_1400) || (mode == CODEC2_MODE_1300) || (mode == CODEC2_MODE_1200) || - (mode == CODEC2_MODE_700) + (mode == CODEC2_MODE_700) || + (mode == CODEC2_MODE_700B) ); c2->mode = mode; for(i=0; igray = 1; + if (mode == CODEC2_MODE_700B) + c2->gray = 0; // natural binary better for trellis decoding (hopefully added later) + else + c2->gray = 1; c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA; @@ -206,6 +213,8 @@ int codec2_bits_per_frame(struct CODEC2 *c2) { return 48; if (c2->mode == CODEC2_MODE_700) return 28; + if (c2->mode == CODEC2_MODE_700B) + return 28; return 0; /* shouldn't get here */ } @@ -236,6 +245,8 @@ int codec2_samples_per_frame(struct CODEC2 *c2) { return 320; if (c2->mode == CODEC2_MODE_700) return 320; + if (c2->mode == CODEC2_MODE_700B) + return 320; return 0; /* shouldnt get here */ } @@ -250,7 +261,8 @@ void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1300) || (c2->mode == CODEC2_MODE_1200) || - (c2->mode == CODEC2_MODE_700) + (c2->mode == CODEC2_MODE_700) || + (c2->mode == CODEC2_MODE_700B) ); if (c2->mode == CODEC2_MODE_3200) @@ -267,6 +279,8 @@ void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[]) codec2_encode_1200(c2, bits, speech); if (c2->mode == CODEC2_MODE_700) codec2_encode_700(c2, bits, speech); + if (c2->mode == CODEC2_MODE_700B) + codec2_encode_700b(c2, bits, speech); } void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) @@ -284,7 +298,8 @@ void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *b (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1300) || (c2->mode == CODEC2_MODE_1200) || - (c2->mode == CODEC2_MODE_700) + (c2->mode == CODEC2_MODE_700) || + (c2->mode == CODEC2_MODE_700B) ); if (c2->mode == CODEC2_MODE_3200) @@ -301,6 +316,8 @@ void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *b codec2_decode_1200(c2, speech, bits); if (c2->mode == CODEC2_MODE_700) codec2_decode_700(c2, speech, bits); + if (c2->mode == CODEC2_MODE_700B) + codec2_decode_700b(c2, speech, bits); } @@ -1522,6 +1539,210 @@ void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * } +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_encode_700b + AUTHOR......: David Rowe + DATE CREATED: August 2015 + + Version b of 700 bit/s codec. After some experiments over the air I + wanted was unhappy with the rate 700 codec so spent a few weeks + trying to improve the speech quality. This version uses a wider BPF + and vector quantised mel-lsps. + + Encodes 320 speech samples (40ms of speech) into 28 bits. + + The codec2 algorithm actually operates internally on 10ms (80 + sample) frames, so we run the encoding algorithm four times: + + frame 0: nothing + frame 1: nothing + frame 2: nothing + frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ, + 1 spare + + The bit allocation is: + + Parameter frames 1-3 frame 4 Total + ----------------------------------------------------------- + Harmonic magnitudes (LSPs) 0 18 18 + Energy 0 3 3 + log Wo 0 5 5 + Voicing 0 1 1 + spare 0 1 1 + TOTAL 0 28 28 + +\*---------------------------------------------------------------------------*/ + +void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]) +{ + MODEL model; + float lsps[LPC_ORD_LOW]; + float mel[LPC_ORD_LOW]; + float mel_[LPC_ORD_LOW]; + float ak[LPC_ORD_LOW+1]; + float e, f; + int indexes[3]; + int Wo_index, e_index, i; + unsigned int nbit = 0; + float bpf_out[4*N]; + short bpf_speech[4*N]; + int spare = 0; + + assert(c2 != NULL); + + memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); + + /* band pass filter */ + + for(i=0; ibpf_buf[i] = c2->bpf_buf[4*N+i]; + for(i=0; i<4*N; i++) + c2->bpf_buf[BPF_N+i] = speech[i]; + inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*N, bpf_out, BPF_N); + for(i=0; i<4*N; i++) + bpf_speech[i] = bpf_out[i]; + + /* frame 1 --------------------------------------------------------*/ + + analyse_one_frame(c2, &model, bpf_speech); + + /* frame 2 --------------------------------------------------------*/ + + analyse_one_frame(c2, &model, &bpf_speech[N]); + + /* frame 3 --------------------------------------------------------*/ + + analyse_one_frame(c2, &model, &bpf_speech[2*N]); + + /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/ + + analyse_one_frame(c2, &model, &bpf_speech[3*N]); + pack(bits, &nbit, model.voiced, 1); + Wo_index = encode_log_Wo(model.Wo, 5); + pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); + + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW); + e_index = encode_energy(e, 3); + pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); + + for(i=0; igray); + } + + pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray); + + assert(nbit == (unsigned)codec2_bits_per_frame(c2)); +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_decode_700b + AUTHOR......: David Rowe + DATE CREATED: August 2015 + + Decodes frames of 28 bits into 320 samples (40ms) of speech. + +\*---------------------------------------------------------------------------*/ + +void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits) +{ + MODEL model[4]; + int indexes[3]; + float mel[LPC_ORD_LOW]; + float lsps[4][LPC_ORD_LOW]; + int Wo_index, e_index; + float e[4]; + float snr, f_; + float ak[4][LPC_ORD_LOW+1]; + int i,j; + unsigned int nbit = 0; + float weight; + COMP Aw[FFT_ENC]; + + assert(c2 != NULL); + + /* only need to zero these out due to (unused) snr calculation */ + + for(i=0; i<4; i++) + for(j=1; j<=MAX_AMP; j++) + model[i].A[j] = 0.0; + + /* unpack bits from channel ------------------------------------*/ + + model[3].voiced = unpack(bits, &nbit, 1); + model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; + + Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); + model[3].Wo = decode_log_Wo(Wo_index, 5); + model[3].L = PI/model[3].Wo; + + e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); + e[3] = decode_energy(e_index, 3); + + for(i=0; i<3; i++) { + indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray); + } + + lspmelvq_decode(indexes, mel, LPC_ORD_LOW); + + #define MEL_ROUND 10 + for(i=1; iprev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); + interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); + e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); + } + for(i=0; i<4; i++) { + lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); + aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, + c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); + apply_lpc_correction(&model[i]); + synthesise_one_frame(c2, &speech[N*i], &model[i], Aw); + } + + #ifdef DUMP + dump_lsp_(&lsps[3][0]); + dump_ak_(&ak[3][0], LPC_ORD_LOW); + dump_model(&model[3]); + if (c2->softdec) + dump_softdec(c2->softdec, nbit); + #endif + + /* update memories for next frame ----------------------------*/ + + c2->prev_model_dec = model[3]; + c2->prev_e_dec = e[3]; + for(i=0; iprev_lsps_dec[i] = lsps[3][i]; +} + + /*---------------------------------------------------------------------------*\ FUNCTION....: synthesise_one_frame() @@ -1693,6 +1914,9 @@ int codec2_get_spare_bit_index(struct CODEC2 *c2) case CODEC2_MODE_700: return 26; // bits 26 and 27 are spare break; + case CODEC2_MODE_700B: + return 26; // bits 26 and 27 are spare + break; } return -1; diff --git a/codec2-dev/src/codec2.h b/codec2-dev/src/codec2.h index d3c6d27d..65cfb65e 100644 --- a/codec2-dev/src/codec2.h +++ b/codec2-dev/src/codec2.h @@ -40,6 +40,7 @@ #define CODEC2_MODE_1300 4 #define CODEC2_MODE_1200 5 #define CODEC2_MODE_700 6 +#define CODEC2_MODE_700B 7 struct CODEC2; diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c index 345a763e..c7222800 100644 --- a/codec2-dev/src/quantise.c +++ b/codec2-dev/src/quantise.c @@ -69,6 +69,10 @@ int mel_bits(int i) { return mel_cb[i].log2m; } +int lspmelvq_cb_bits(int i) { + return lspmelvq_cb[i].log2m; +} + #ifdef __EXPERIMENTAL__ int lspdt_bits(int i) { return lsp_cbdt[i].log2m; @@ -698,7 +702,7 @@ static void mbest_search( /* 3 stage VQ LSP quantiser useing mbest search. Design and guidance kindly submitted by Anssi, OH3GDD */ -float lspmelvq_mbest_quantise(float *x, float *xq, int ndim, int mbest_entries) +float lspmelvq_mbest_encode(int *indexes, float *x, float *xq, int ndim, int mbest_entries) { int i, j, n1, n2, n3; const float *codebook1 = lspmelvq_cb[0].cb; @@ -712,6 +716,7 @@ float lspmelvq_mbest_quantise(float *x, float *xq, int ndim, int mbest_entries) for(i=0; i