From: drowe67 Date: Wed, 15 Apr 2015 05:03:00 +0000 (+0000) Subject: some refactoring of c2sim to make it closer to c2enc/c2dec X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=ac856a9242050f87f33f1478daade2c94e8ed71b;p=freetel-svn-tracking.git some refactoring of c2sim to make it closer to c2enc/c2dec git-svn-id: https://svn.code.sf.net/p/freetel/code@2117 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2-dev/src/c2dec.c b/codec2-dev/src/c2dec.c index 0c5103a7..880db0b3 100644 --- a/codec2-dev/src/c2dec.c +++ b/codec2-dev/src/c2dec.c @@ -88,10 +88,10 @@ int main(int argc, char *argv[]) mode = CODEC2_MODE_1300; else if (strcmp(argv[1],"1200") == 0) mode = CODEC2_MODE_1200; - else if (strcmp(argv[1],"450") == 0) - mode = CODEC2_MODE_450; + else if (strcmp(argv[1],"650") == 0) + mode = CODEC2_MODE_650; else { - fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200, or 450\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200, or 650\n", argv[1]); exit(1); } bit_rate = atoi(argv[1]); diff --git a/codec2-dev/src/c2enc.c b/codec2-dev/src/c2enc.c index a457a8f9..1aa57563 100644 --- a/codec2-dev/src/c2enc.c +++ b/codec2-dev/src/c2enc.c @@ -44,7 +44,7 @@ int main(int argc, char *argv[]) int nsam, nbit, nbyte, gray; if (argc < 4) { - printf("usage: c2enc 3200|2400|1600|1400|1300|1200|450 InputRawspeechFile OutputBitFile [--natural]\n"); + printf("usage: c2enc 3200|2400|1600|1400|1300|1200|650 InputRawspeechFile OutputBitFile [--natural]\n"); printf("e.g c2enc 1400 ../raw/hts1a.raw hts1a.c2\n"); printf("e.g c2enc 1300 ../raw/hts1a.raw hts1a.c2 --natural\n"); exit(1); @@ -62,10 +62,10 @@ int main(int argc, char *argv[]) mode = CODEC2_MODE_1300; else if (strcmp(argv[1],"1200") == 0) mode = CODEC2_MODE_1200; - else if (strcmp(argv[1],"450") == 0) - mode = CODEC2_MODE_450; + else if (strcmp(argv[1],"650") == 0) + mode = CODEC2_MODE_650; else { - fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200 or 450\n", argv[1]); + fprintf(stderr, "Error in mode: %s. Must be 3200, 2400, 1600, 1400, 1300, 1200 or 650\n", argv[1]); exit(1); } diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 44fe1670..f5ade776 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -76,11 +76,10 @@ int main(int argc, char *argv[]) MODEL model; float Pn[2*N]; /* trapezoidal synthesis window */ float Sn_[2*N]; /* synthesised speech */ - int i; /* loop variable */ + int i,m; /* loop variable */ int frames; - float prev_Wo, prev__Wo, uq_Wo, prev_uq_Wo; + float prev_Wo, prev__Wo, prev_uq_Wo; float pitch; - int voiced1 = 0; char out_file[MAX_STR]; char ampexp_arg[MAX_STR]; char phaseexp_arg[MAX_STR]; @@ -90,8 +89,10 @@ int main(int argc, char *argv[]) int lpc_model = 0, order = LPC_ORD; int lsp = 0, lspd = 0, lspvq = 0; int lspres = 0; - int lspdt = 0, lspdt_mode = LSPDT_ALL; - int dt = 0, lspjvm = 0, lspanssi = 0, lspjnd = 0, lspmel = 0; + int lspjvm = 0, lspjnd = 0, lspmel = 0; + #ifdef __EXPERIMENTAL__ + int lspanssi = 0, + #endif int prede = 0; float pre_mem = 0.0, de_mem = 0.0; float ak[order]; @@ -102,24 +103,24 @@ int main(int argc, char *argv[]) float ex_phase[MAX_AMP+1]; int postfilt; - float bg_est; int hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0; int lpcpf = 0; FILE *fvoicing = 0; - MODEL prev_model, interp_model; - int decimate = 0; + MODEL prev_model; + int dec; + int decimate = 1; float lsps[order]; - float prev_lsps[order], prev_lsps_[order]; - float lsps__prev[order]; - float lsps__prev2[order]; float e, prev_e; - float ak_interp[order]; int lsp_indexes[order]; float lsps_[order]; float Woe_[2]; + float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD]; + MODEL model_dec[4], prev_model_dec; + float prev_lsps_dec[order], prev_e_dec; + void *nlp_states; float hpf_states[2]; int scalar_quant_Wo_e = 0; @@ -144,10 +145,6 @@ int main(int argc, char *argv[]) { "lspd", no_argument, &lspd, 1 }, { "lspvq", no_argument, &lspvq, 1 }, { "lspres", no_argument, &lspres, 1 }, - #ifdef __EXPERIMENTAL__ - { "lspdt", no_argument, &lspdt, 1 }, - { "lspdt_mode", required_argument, NULL, 0 }, - #endif { "lspjvm", no_argument, &lspjvm, 1 }, #ifdef __EXPERIMENTAL__ { "lspanssi", no_argument, &lspanssi, 1 }, @@ -157,8 +154,7 @@ int main(int argc, char *argv[]) { "ampexp", required_argument, &exp, 1 }, { "postfilter", no_argument, &postfilt, 1 }, { "hand_voicing", required_argument, &hand_voicing, 1 }, - { "dec", no_argument, &decimate, 1 }, - { "dt", no_argument, &dt, 1 }, + { "dec", required_argument, &dec, 1 }, { "hi", no_argument, &hi, 1 }, { "simlpcpf", no_argument, &simlpcpf, 1 }, { "lpcpf", no_argument, &lpcpf, 1 }, @@ -197,10 +193,6 @@ int main(int argc, char *argv[]) //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX; ex_phase[i] = 0.0; } - for(i=0; imode = mode; for(i=0; imode == CODEC2_MODE_1200) return 48; - if (c2->mode == CODEC2_MODE_450) - return 18; + if (c2->mode == CODEC2_MODE_650) + return 26; return 0; /* shouldn't get here */ } @@ -225,7 +225,7 @@ int CODEC2_WIN32SUPPORT codec2_samples_per_frame(struct CODEC2 *c2) { return 320; if (c2->mode == CODEC2_MODE_1200) return 320; - if (c2->mode == CODEC2_MODE_450) + if (c2->mode == CODEC2_MODE_650) return 320; return 0; /* shouldnt get here */ @@ -241,7 +241,7 @@ void CODEC2_WIN32SUPPORT codec2_encode(struct CODEC2 *c2, unsigned char *bits, s (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1300) || (c2->mode == CODEC2_MODE_1200) || - (c2->mode == CODEC2_MODE_450) + (c2->mode == CODEC2_MODE_650) ); if (c2->mode == CODEC2_MODE_3200) @@ -256,8 +256,8 @@ void CODEC2_WIN32SUPPORT codec2_encode(struct CODEC2 *c2, unsigned char *bits, s codec2_encode_1300(c2, bits, speech); if (c2->mode == CODEC2_MODE_1200) codec2_encode_1200(c2, bits, speech); - if (c2->mode == CODEC2_MODE_450) - codec2_encode_450(c2, bits, speech); + if (c2->mode == CODEC2_MODE_650) + codec2_encode_650(c2, bits, speech); } void CODEC2_WIN32SUPPORT codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits) @@ -275,7 +275,7 @@ void CODEC2_WIN32SUPPORT codec2_decode_ber(struct CODEC2 *c2, short speech[], co (c2->mode == CODEC2_MODE_1400) || (c2->mode == CODEC2_MODE_1300) || (c2->mode == CODEC2_MODE_1200) || - (c2->mode == CODEC2_MODE_450) + (c2->mode == CODEC2_MODE_650) ); if (c2->mode == CODEC2_MODE_3200) @@ -290,8 +290,8 @@ void CODEC2_WIN32SUPPORT codec2_decode_ber(struct CODEC2 *c2, short speech[], co codec2_decode_1300(c2, speech, bits, ber_est); if (c2->mode == CODEC2_MODE_1200) codec2_decode_1200(c2, speech, bits); - if (c2->mode == CODEC2_MODE_450) - codec2_decode_450(c2, speech, bits); + if (c2->mode == CODEC2_MODE_650) + codec2_decode_650(c2, speech, bits); } @@ -422,7 +422,7 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * /* LSPs are sampled every 20ms so we interpolate the frame in between, then recover spectral amplitudes */ - interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5); + interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); for(i=0; i<2; i++) { lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); @@ -566,7 +566,7 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * /* LSPs are sampled every 20ms so we interpolate the frame in between, then recover spectral amplitudes */ - interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5); + interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD); for(i=0; i<2; i++) { lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, @@ -749,7 +749,7 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * between, then recover spectral amplitudes */ for(i=0, weight=0.25; i<3; i++, weight += 0.25) { - interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight); + interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); } for(i=0; i<4; i++) { lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); @@ -919,7 +919,7 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * between, then recover spectral amplitudes */ for(i=0, weight=0.25; i<3; i++, weight += 0.25) { - interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight); + interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); } for(i=0; i<4; i++) { lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); @@ -1035,7 +1035,7 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) Decodes frames of 52 bits into 320 samples (40ms) of speech. \*---------------------------------------------------------------------------*/ - +static int frames; void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est) { MODEL model[4]; @@ -1052,7 +1052,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * PROFILE_VAR(recover_start); assert(c2 != NULL); - + frames+= 4; /* only need to zero these out due to (unused) snr calculation */ for(i=0; i<4; i++) @@ -1097,7 +1097,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * PROFILE_SAMPLE(recover_start); for(i=0, weight=0.25; i<3; i++, weight += 0.25) { - interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight); + interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); } @@ -1111,6 +1111,13 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * apply_lpc_correction(&model[i]); synthesise_one_frame(c2, &speech[N*i], &model[i], Aw); } + /* + for(i=0; i<4; i++) { + printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced); + } + if (frames == 4*50) + exit(0); + */ PROFILE_SAMPLE_AND_LOG2(recover_start, " recover"); #ifdef DUMP dump_lsp_(&lsps[3][0]); @@ -1282,7 +1289,7 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * between, then recover spectral amplitudes */ for(i=0, weight=0.25; i<3; i++, weight += 0.25) { - interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight); + interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); } for(i=0; i<4; i++) { lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); @@ -1303,11 +1310,11 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * /*---------------------------------------------------------------------------*\ - FUNCTION....: codec2_encode_450 + FUNCTION....: codec2_encode_650 AUTHOR......: David Rowe - DATE CREATED: Oct 1 2014 + DATE CREATED: April 2015 - Encodes 320 speech samples (40ms of speech) into 18 bits. + Encodes 320 speech samples (40ms of speech) into 26 bits. The codec2 algorithm actually operates internally on 10ms (80 sample) frames, so we run the encoding algorithm four times: @@ -1315,29 +1322,29 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * frame 0: nothing frame 1: nothing frame 2: nothing - frame 3: voicing bit, scalar Wo and E, 9 bit VQ of LSPs + frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar The bit allocation is: Parameter frames 1-3 frame 4 Total ----------------------------------------------------------- - Harmonic magnitudes (LSPs) 0 9 9 + Harmonic magnitudes (LSPs) 0 17 17 Energy 0 3 3 log Wo 0 5 5 Voicing 0 1 1 - TOTAL 0 18 18 + TOTAL 0 26 26 \*---------------------------------------------------------------------------*/ -void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]) +void codec2_encode_650(struct CODEC2 *c2, unsigned char * bits, short speech[]) { MODEL model; - float lsps[LPC_ORD]; - float lsps_[LPC_ORD]; - float ak[LPC_ORD+1]; - float e; - int lsp_indexes[LPC_ORD]; - int Wo_index, e_index; + float lsps[LPC_ORD_LOW]; + float mel[LPC_ORD_LOW]; + float ak[LPC_ORD_LOW+1]; + float e, f; + int indexes[LPC_ORD_LOW]; + int Wo_index, e_index, i; unsigned int nbit = 0; assert(c2 != NULL); @@ -1363,36 +1370,44 @@ void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]) Wo_index = encode_log_Wo(model.Wo, 5); pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW); e_index = encode_energy(e, 3); pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); - encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD); - pack(bits, &nbit, lsp_indexes[0], lsp_pred_vq_bits(0)); - + for(i=0; igray); e[3] = decode_energy(e_index, 3); - lsp_indexes[0] = unpack(bits, &nbit, lsp_pred_vq_bits(0)); - decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD, 1); - check_lsp_order(&lsps[3][0], LPC_ORD); - bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0); + for(i=0; iprev_lsps_dec, &lsps[3][0], weight); + interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); } for(i=0; i<4; i++) { - lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD); - aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0, + lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW); + aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0, c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw); apply_lpc_correction(&model[i]); synthesise_one_frame(c2, &speech[N*i], &model[i], Aw); @@ -1445,7 +1468,7 @@ void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * c2->prev_model_dec = model[3]; c2->prev_e_dec = e[3]; - for(i=0; iprev_lsps_dec[i] = lsps[3][i]; } diff --git a/codec2-dev/src/codec2.h b/codec2-dev/src/codec2.h index ac888d7b..77c0f2dd 100644 --- a/codec2-dev/src/codec2.h +++ b/codec2-dev/src/codec2.h @@ -52,7 +52,7 @@ #define CODEC2_MODE_1400 3 #define CODEC2_MODE_1300 4 #define CODEC2_MODE_1200 5 -#define CODEC2_MODE_450 6 +#define CODEC2_MODE_650 6 struct CODEC2; diff --git a/codec2-dev/src/defines.h b/codec2-dev/src/defines.h index df7efd83..eac1ab70 100644 --- a/codec2-dev/src/defines.h +++ b/codec2-dev/src/defines.h @@ -48,7 +48,8 @@ #define FFT_DEC 512 /* size of FFT used in decoder */ #define TW 40 /* Trapezoidal synthesis window overlap */ #define V_THRESH 6.0 /* voicing threshold in dB */ -#define LPC_ORD 10 /* phase modelling LPC order */ +#define LPC_ORD 10 /* LPC order */ +#define LPC_ORD_LOW 6 /* LPC order for lower rates */ /* Pitch estimation defines */ diff --git a/codec2-dev/src/interp.c b/codec2-dev/src/interp.c index 21ccb47c..5862bc2e 100644 --- a/codec2-dev/src/interp.c +++ b/codec2-dev/src/interp.c @@ -315,11 +315,11 @@ float interp_energy2(float prev_e, float next_e, float weight) \*---------------------------------------------------------------------------*/ -void interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight) +void interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order) { int i; - for(i=0; i