From: drowe67 Date: Tue, 30 May 2017 09:38:01 +0000 (+0000) Subject: p_min/p_max/m_pitch run time constants, bit exact with c2sim and cenc/dec 1300/700C X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=fc95b270596f89dc9df49965f576404f7413718d;p=freetel-svn-tracking.git p_min/p_max/m_pitch run time constants, bit exact with c2sim and cenc/dec 1300/700C git-svn-id: https://svn.code.sf.net/p/freetel/code@3147 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 3d28bd09..35f8f609 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -66,17 +66,19 @@ int main(int argc, char *argv[]) { C2CONST c2const = c2const_create(8000); int n_samp = c2const.n_samp; + int m_pitch = c2const.m_pitch; + FILE *fout = NULL; /* output speech file */ FILE *fin; /* input speech file */ short buf[N_SAMP]; /* input/output buffer */ float buf_float[N_SAMP]; - float Sn[M_PITCH]; /* float input speech samples */ - float Sn_pre[M_PITCH]; /* pre-emphasised input speech samples */ + float Sn[m_pitch]; /* float input speech samples */ + float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */ COMP Sw[FFT_ENC]; /* DFT of Sn[] */ codec2_fft_cfg fft_fwd_cfg; codec2_fftr_cfg fftr_fwd_cfg; codec2_fftr_cfg fftr_inv_cfg; - float w[M_PITCH]; /* time domain hamming window */ + float w[m_pitch]; /* time domain hamming window */ COMP W[FFT_ENC]; /* DFT of w[] */ MODEL model; float Pn[2*N_SAMP]; /* trapezoidal synthesis window */ @@ -201,16 +203,16 @@ int main(int argc, char *argv[]) COMP Aw[FFT_ENC]; COMP H[MAX_AMP]; - for(i=0; i= 0) && (mode <= CODEC2_MODE_700C))) { return NULL; @@ -115,29 +113,39 @@ struct CODEC2 * codec2_create(int mode) c2->mode = mode; - c2->Fs = c2const.Fs; - c2->n_samp = n_samp; + /* store constants in a few places for convenience */ + + c2->c2const = c2const_create(8000); + c2->Fs = c2->c2const.Fs; + int n_samp = c2->n_samp = c2->c2const.n_samp; + int m_pitch = c2->m_pitch = c2->c2const.m_pitch; + c2->Pn = (float*)malloc(2*n_samp*sizeof(float)); if (c2->Pn == NULL) { - free(c2); return NULL; } c2->Sn_ = (float*)malloc(2*n_samp*sizeof(float)); if (c2->Sn_ == NULL) { - free(c2->Pn); - free(c2); + return NULL; + } + c2->w = (float*)malloc(m_pitch*sizeof(float)); + if (c2->w == NULL) { + return NULL; + } + c2->Sn = (float*)malloc(m_pitch*sizeof(float)); + if (c2->Sn == NULL) { return NULL; } - for(i=0; iSn[i] = 1.0; c2->hpf_states[0] = c2->hpf_states[1] = 0.0; for(i=0; i<2*n_samp; i++) c2->Sn_[i] = 0; c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); - make_analysis_window(&c2const, c2->fft_fwd_cfg, c2->w,c2->W); - make_synthesis_window(&c2const, c2->Pn); + make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W); + make_synthesis_window(&c2->c2const, c2->Pn); c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); quantise_init(); c2->prev_Wo_enc = 0.0; @@ -146,7 +154,7 @@ struct CODEC2 * codec2_create(int mode) for(l=1; l<=MAX_AMP; l++) c2->prev_model_dec.A[l] = 0.0; - c2->prev_model_dec.Wo = TWO_PI/P_MAX; + c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max; c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo; c2->prev_model_dec.voiced = 0; @@ -155,11 +163,8 @@ struct CODEC2 * codec2_create(int mode) } c2->prev_e_dec = 1; - c2->nlp = nlp_create(M_PITCH); + c2->nlp = nlp_create(m_pitch); if (c2->nlp == NULL) { - free(c2->Sn_); - free(c2->Pn); - free (c2); return NULL; } @@ -223,6 +228,10 @@ void codec2_destroy(struct CODEC2 *c2) codec2_fft_free(c2->phase_fft_fwd_cfg); codec2_fft_free(c2->phase_fft_inv_cfg); } + free(c2->Pn); + free(c2->Sn); + free(c2->w); + free(c2->Sn_); free(c2); } @@ -403,10 +412,10 @@ void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - Wo_index = encode_Wo(model.Wo, WO_BITS); + Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); pack(bits, &nbit, Wo_index, WO_BITS); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); e_index = encode_energy(e, E_BITS); pack(bits, &nbit, e_index, E_BITS); @@ -458,7 +467,7 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * model[1].voiced = unpack(bits, &nbit, 1); Wo_index = unpack(bits, &nbit, WO_BITS); - model[1].Wo = decode_Wo(Wo_index, WO_BITS); + model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); model[1].L = PI/model[1].Wo; e_index = unpack(bits, &nbit, E_BITS); @@ -474,7 +483,7 @@ void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * /* Wo and energy are sampled every 20ms, so we interpolate just 1 10ms frame between 20ms samples */ - interp_Wo(&model[0], &c2->prev_model_dec, &model[1]); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); e[0] = interp_energy(c2->prev_e_dec, e[1]); /* LSPs are sampled every 20ms so we interpolate the frame in @@ -550,7 +559,7 @@ void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); WoE_index = encode_WoE(&model, e, c2->xq_enc); pack(bits, &nbit, WoE_index, WO_E_BITS); @@ -604,7 +613,7 @@ void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * model[1].voiced = unpack(bits, &nbit, 1); WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); for(i=0; iprev_model_dec, &model[1]); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); e[0] = interp_energy(c2->prev_e_dec, e[1]); /* LSPs are sampled every 20ms so we interpolate the frame in @@ -695,11 +704,11 @@ void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - Wo_index = encode_Wo(model.Wo, WO_BITS); + Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); pack(bits, &nbit, Wo_index, WO_BITS); /* need to run this just to get LPC energy */ - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); e_index = encode_energy(e, E_BITS); pack(bits, &nbit, e_index, E_BITS); @@ -713,10 +722,10 @@ void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - Wo_index = encode_Wo(model.Wo, WO_BITS); + Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); pack(bits, &nbit, Wo_index, WO_BITS); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); e_index = encode_energy(e, E_BITS); pack(bits, &nbit, e_index, E_BITS); @@ -770,7 +779,7 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * model[1].voiced = unpack(bits, &nbit, 1); Wo_index = unpack(bits, &nbit, WO_BITS); - model[1].Wo = decode_Wo(Wo_index, WO_BITS); + model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); model[1].L = PI/model[1].Wo; e_index = unpack(bits, &nbit, E_BITS); @@ -780,7 +789,7 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * model[3].voiced = unpack(bits, &nbit, 1); Wo_index = unpack(bits, &nbit, WO_BITS); - model[3].Wo = decode_Wo(Wo_index, WO_BITS); + model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); model[3].L = PI/model[3].Wo; e_index = unpack(bits, &nbit, E_BITS); @@ -798,9 +807,9 @@ void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * /* Wo and energy are sampled every 20ms, so we interpolate just 1 10ms frame between 20ms samples */ - interp_Wo(&model[0], &c2->prev_model_dec, &model[1]); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); e[0] = interp_energy(c2->prev_e_dec, e[1]); - interp_Wo(&model[2], &model[1], &model[3]); + interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); e[2] = interp_energy(e[1], e[3]); /* LSPs are sampled every 40ms so we interpolate the 3 frames in @@ -879,7 +888,7 @@ void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) pack(bits, &nbit, model.voiced, 1); /* need to run this just to get LPC energy */ - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); WoE_index = encode_WoE(&model, e, c2->xq_enc); pack(bits, &nbit, WoE_index, WO_E_BITS); @@ -894,7 +903,7 @@ void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); WoE_index = encode_WoE(&model, e, c2->xq_enc); pack(bits, &nbit, WoE_index, WO_E_BITS); @@ -948,13 +957,13 @@ void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * model[1].voiced = unpack(bits, &nbit, 1); WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); model[2].voiced = unpack(bits, &nbit, 1); model[3].voiced = unpack(bits, &nbit, 1); WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); for(i=0; iprev_model_dec, &model[1]); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); e[0] = interp_energy(c2->prev_e_dec, e[1]); - interp_Wo(&model[2], &model[1], &model[3]); + interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); e[2] = interp_energy(e[1], e[3]); /* LSPs are sampled every 40ms so we interpolate the 3 frames in @@ -1062,13 +1071,13 @@ void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray); - Wo_index = encode_Wo(model.Wo, WO_BITS); + Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS); pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray); #ifdef PROFILE quant_start = machdep_profile_sample(); #endif - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); e_index = encode_energy(e, E_BITS); pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray); @@ -1128,7 +1137,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray); Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray); - model[3].Wo = decode_Wo(Wo_index, WO_BITS); + model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS); model[3].L = PI/model[3].Wo; e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray); @@ -1156,7 +1165,7 @@ void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * PROFILE_SAMPLE(recover_start); for(i=0, weight=0.25; i<3; i++, weight += 0.25) { interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD); - interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); + interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); } @@ -1248,7 +1257,7 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) pack(bits, &nbit, model.voiced, 1); /* need to run this just to get LPC energy */ - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); WoE_index = encode_WoE(&model, e, c2->xq_enc); pack(bits, &nbit, WoE_index, WO_E_BITS); @@ -1263,7 +1272,7 @@ void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &speech[3*c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD); WoE_index = encode_WoE(&model, e, c2->xq_enc); pack(bits, &nbit, WoE_index, WO_E_BITS); @@ -1318,13 +1327,13 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * model[1].voiced = unpack(bits, &nbit, 1); WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index); model[2].voiced = unpack(bits, &nbit, 1); model[3].voiced = unpack(bits, &nbit, 1); WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index); for(i=0; iprev_model_dec, &model[1]); + interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min); e[0] = interp_energy(c2->prev_e_dec, e[1]); - interp_Wo(&model[2], &model[1], &model[3]); + interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min); e[2] = interp_energy(e[1], e[3]); /* LSPs are sampled every 40ms so we interpolate the 3 frames in @@ -1440,10 +1449,10 @@ void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - Wo_index = encode_log_Wo(model.Wo, 5); + Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); e_index = encode_energy(e, 3); pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); @@ -1502,7 +1511,7 @@ void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); - model[3].Wo = decode_log_Wo(Wo_index, 5); + model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); model[3].L = PI/model[3].Wo; e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); @@ -1544,7 +1553,7 @@ void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * for(i=0, weight=0.25; i<3; i++, weight += 0.25) { interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); - interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); + interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); } for(i=0; i<4; i++) { @@ -1652,10 +1661,10 @@ void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]) analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]); pack(bits, &nbit, model.voiced, 1); - Wo_index = encode_log_Wo(model.Wo, 5); + Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5); pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray); - e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW); + e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW); e_index = encode_energy(e, 3); pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray); @@ -1714,7 +1723,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced; Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray); - model[3].Wo = decode_log_Wo(Wo_index, 5); + model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5); model[3].L = PI/model[3].Wo; e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray); @@ -1748,7 +1757,7 @@ void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * for(i=0, weight=0.25; i<3; i++, weight += 0.25) { interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW); - interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight); + interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min); e[i] = interp_energy2(c2->prev_e_dec, e[3],weight); } for(i=0; i<4; i++) { @@ -1826,7 +1835,8 @@ void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]) float rate_K_vec[K], mean; float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; - newamp1_model_to_indexes(indexes, + newamp1_model_to_indexes(&c2->c2const, + indexes, &model, rate_K_vec, c2->rate_K_sample_freqs_kHz, @@ -1874,7 +1884,8 @@ void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * COMP HH[M][MAX_AMP+1]; float interpolated_surface_[M][NEWAMP1_K]; - newamp1_indexes_to_model(model, + newamp1_indexes_to_model(&c2->c2const, + model, (COMP*)HH, (float*)interpolated_surface_, c2->prev_rate_K_vec_, @@ -1963,7 +1974,7 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) if (c2->mode == CODEC2_MODE_2400) { nbit = 1 + 1; WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model, &e, xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); } if (c2->mode == CODEC2_MODE_1600) { nbit = 1 + 1 + WO_BITS; @@ -1973,7 +1984,7 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) if (c2->mode == CODEC2_MODE_1400) { nbit = 1 + 1; WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model, &e, xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); } if (c2->mode == CODEC2_MODE_1300) { nbit = 1 + 1 + 1 + 1 + WO_BITS; @@ -1983,7 +1994,7 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) if (c2->mode == CODEC2_MODE_1200) { nbit = 1 + 1; WoE_index = unpack(bits, &nbit, WO_E_BITS); - decode_WoE(&model, &e, xq_dec, WoE_index); + decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index); } if (c2->mode == CODEC2_MODE_700) { nbit = 1 + 5; @@ -2076,21 +2087,23 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) float pitch; int i; PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps); + int n_samp = c2->n_samp; + int m_pitch = c2->m_pitch; /* Read input speech */ - for(i=0; in_samp; i++) - c2->Sn[i] = c2->Sn[i+c2->n_samp]; - for(i=0; in_samp; i++) - c2->Sn[i+M_PITCH-c2->n_samp] = speech[i]; + for(i=0; iSn[i] = c2->Sn[i+n_samp]; + for(i=0; iSn[i+m_pitch-n_samp] = speech[i]; PROFILE_SAMPLE(dft_start); - dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); + dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w); PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech"); /* Estimate pitch */ - nlp(c2->nlp,c2->Sn,c2->n_samp,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc); + nlp(c2->nlp,c2->Sn,n_samp,c2->c2const.p_min,c2->c2const.p_max,&pitch,Sw, c2->W, &c2->prev_Wo_enc); PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp"); model->Wo = TWO_PI/pitch; @@ -2098,11 +2111,11 @@ void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]) /* estimate model parameters */ - two_stage_pitch_refinement(model, Sw); + two_stage_pitch_refinement(&c2->c2const, model, Sw); PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage"); estimate_amplitudes(model, Sw, c2->W, 0); PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps"); - est_voicing_mbe(model, Sw, c2->W); + est_voicing_mbe(&c2->c2const, model, Sw, c2->W); c2->prev_Wo_enc = model->Wo; PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing"); #ifdef DUMP diff --git a/codec2-dev/src/codec2_internal.h b/codec2-dev/src/codec2_internal.h index 489dda3e..15115873 100644 --- a/codec2-dev/src/codec2_internal.h +++ b/codec2-dev/src/codec2_internal.h @@ -34,15 +34,17 @@ struct CODEC2 { int mode; + C2CONST c2const; int Fs; int n_samp; + int m_pitch; codec2_fft_cfg fft_fwd_cfg; /* forward FFT config */ codec2_fftr_cfg fftr_fwd_cfg; /* forward real FFT config */ - float w[M_PITCH]; /* time domain hamming window */ + float *w; /* [m_pitch] time domain hamming window */ COMP W[FFT_ENC]; /* DFT of w[] */ float *Pn; /* [2*n_samp] trapezoidal synthesis window */ float *bpf_buf; /* buffer for band pass filter */ - float Sn[M_PITCH]; /* input speech */ + float *Sn; /* [m_pitch] input speech */ float hpf_states[2]; /* high pass filter states */ void *nlp; /* pitch predictor states */ int gray; /* non-zero for gray encoding */ diff --git a/codec2-dev/src/defines.h b/codec2-dev/src/defines.h index 7cafd63e..f23ebadc 100644 --- a/codec2-dev/src/defines.h +++ b/codec2-dev/src/defines.h @@ -36,12 +36,11 @@ /* General defines */ -#define MAX_AMP 80 /* maximum number of harmonics */ +#define MAX_AMP 160 /* maximum number of harmonics */ #ifndef PI #define PI 3.141592654 /* mathematical constant */ #endif #define TWO_PI 6.283185307 /* mathematical constant */ -#define FS 8000 /* sample rate in Hz */ #define MAX_STR 256 /* maximum string size */ #define NW 279 /* analysis window size */ @@ -54,9 +53,9 @@ /* Pitch estimation defines */ -#define M_PITCH 320 /* pitch analysis frame size */ -#define P_MIN 20 /* minimum pitch */ -#define P_MAX 160 /* maximum pitch */ +#define M_PITCH_S 0.0400 /* pitch analysis window in s */ +#define P_MIN_S 0.0025 /* minimum pitch period in s */ +#define P_MAX_S 0.0200 /* maximum pitch period in s */ /*---------------------------------------------------------------------------*\ @@ -65,8 +64,14 @@ \*---------------------------------------------------------------------------*/ typedef struct { - int Fs; - int n_samp; + int Fs; /* sample rate of this instance */ + int n_samp; /* number of samples per 10ms frame at Fs */ + int max_amp; /* maximum number of harmonics */ + int m_pitch; /* pitch estimation window size in samples */ + int p_min; /* minimum pitch period in samples */ + int p_max; /* maximum pitch period in samples */ + float Wo_min; + float Wo_max; } C2CONST; /* Structure to hold model parameters for one frame */ diff --git a/codec2-dev/src/dump.c b/codec2-dev/src/dump.c index c993d83b..20bf0229 100644 --- a/codec2-dev/src/dump.c +++ b/codec2-dev/src/dump.c @@ -140,7 +140,7 @@ void dump_off(){ fclose(fhephase); } -void dump_Sn(float Sn[]) { +void dump_Sn(int m_pitch, float Sn[]) { int i; char s[MAX_STR]; @@ -155,10 +155,10 @@ void dump_Sn(float Sn[]) { /* split across two lines to avoid max line length problems */ /* reconstruct in Octave */ - for(i=0; iWo = prev->Wo; } else { - interp->Wo = TWO_PI/P_MAX; + interp->Wo = Wo_min; } interp->L = PI/interp->Wo; @@ -91,6 +93,7 @@ void interpolate( interp->A[l] = powf(10.0, log_amp); } } +#endif /*---------------------------------------------------------------------------*\ @@ -149,15 +152,16 @@ float sample_log_amp(MODEL *model, float w) void interpolate_lsp( codec2_fft_cfg fft_fwd_cfg, - MODEL *interp, /* interpolated model params */ - MODEL *prev, /* previous frames model params */ - MODEL *next, /* next frames model params */ - float *prev_lsps, /* previous frames LSPs */ - float prev_e, /* previous frames LPC energy */ - float *next_lsps, /* next frames LSPs */ - float next_e, /* next frames LPC energy */ - float *ak_interp, /* interpolated aks for this frame */ - float *lsps_interp/* interpolated lsps for this frame */ + MODEL *interp, /* interpolated model params */ + MODEL *prev, /* previous frames model params */ + MODEL *next, /* next frames model params */ + float *prev_lsps, /* previous frames LSPs */ + float prev_e, /* previous frames LPC energy */ + float *next_lsps, /* next frames LSPs */ + float next_e, /* next frames LPC energy */ + float *ak_interp, /* interpolated aks for this frame */ + float *lsps_interp, /* interpolated lsps for this frame */ + float Wo_min ) { int i; @@ -181,7 +185,7 @@ void interpolate_lsp( interp->Wo = prev->Wo; } else { - interp->Wo = TWO_PI/P_MAX; + interp->Wo = Wo_min; } interp->L = PI/interp->Wo; @@ -223,10 +227,11 @@ void interpolate_lsp( void interp_Wo( MODEL *interp, /* interpolated model params */ MODEL *prev, /* previous frames model params */ - MODEL *next /* next frames model params */ + MODEL *next, /* next frames model params */ + float Wo_min ) { - interp_Wo2(interp, prev, next, 0.5); + interp_Wo2(interp, prev, next, 0.5, Wo_min); } /*---------------------------------------------------------------------------*\ @@ -243,7 +248,8 @@ void interp_Wo2( MODEL *interp, /* interpolated model params */ MODEL *prev, /* previous frames model params */ MODEL *next, /* next frames model params */ - float weight + float weight, + float Wo_min ) { /* trap corner case where voicing est is probably wrong */ @@ -263,7 +269,7 @@ void interp_Wo2( interp->Wo = prev->Wo; } else { - interp->Wo = TWO_PI/P_MAX; + interp->Wo = Wo_min; } interp->L = PI/interp->Wo; } diff --git a/codec2-dev/src/interp.h b/codec2-dev/src/interp.h index 5d7f7093..1247b7e1 100644 --- a/codec2-dev/src/interp.h +++ b/codec2-dev/src/interp.h @@ -35,9 +35,9 @@ void interpolate_lsp(kiss_fft_cfg fft_dec_cfg, MODEL *interp, MODEL *prev, MODEL *next, float *prev_lsps, float prev_e, float *next_lsps, float next_e, - float *ak_interp, float *lsps_interp); -void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next); -void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight); + float *ak_interp, float *lsps_interp, float Wo_min); +void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next, float Wo_min); +void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight, float Wo_min); float interp_energy(float prev, float next); float interp_energy2(float prev, float next, float weight); void interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order); diff --git a/codec2-dev/src/newamp1.c b/codec2-dev/src/newamp1.c index ad729b65..ebb7257b 100644 --- a/codec2-dev/src/newamp1.c +++ b/codec2-dev/src/newamp1.c @@ -369,7 +369,7 @@ void resample_rate_L(MODEL *model, float rate_K_vec[], float rate_K_sample_freqs \*---------------------------------------------------------------------------*/ -void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg) +void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg) { int i,m,b; int Ns = Nfft/2+1; @@ -382,7 +382,7 @@ void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, c } for(i=0; iFs/1000.0)*(float)i/Nfft; } interp_para(Gdbfk, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, sample_freqs_kHz, Ns); @@ -407,7 +407,8 @@ void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, c \*---------------------------------------------------------------------------*/ -void newamp1_model_to_indexes(int indexes[], +void newamp1_model_to_indexes(C2CONST *c2const, + int indexes[], MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], @@ -448,7 +449,7 @@ void newamp1_model_to_indexes(int indexes[], an unvoiced frame */ if (model->voiced) { - int index = encode_log_Wo(model->Wo, 6); + int index = encode_log_Wo(c2const, model->Wo, 6); if (index == 0) { index = 1; } @@ -533,7 +534,8 @@ void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[], \*---------------------------------------------------------------------------*/ -void newamp1_indexes_to_model(MODEL model_[], +void newamp1_indexes_to_model(C2CONST *c2const, + MODEL model_[], COMP H[], float *interpolated_surface_, float prev_rate_K_vec_[], @@ -562,7 +564,7 @@ void newamp1_indexes_to_model(MODEL model_[], /* decode latest Wo and voicing */ if (indexes[3]) { - Wo_right = decode_log_Wo(indexes[3], 6); + Wo_right = decode_log_Wo(c2const, indexes[3], 6); voicing_right = 1; } else { @@ -591,7 +593,7 @@ void newamp1_indexes_to_model(MODEL model_[], model_[i].voiced = avoicing_[i]; resample_rate_L(&model_[i], &interpolated_surface_[K*i], rate_K_sample_freqs_kHz, K); - determine_phase(&H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg); + determine_phase(c2const, &H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg); } /* update memories for next time */ diff --git a/codec2-dev/src/newamp1.h b/codec2-dev/src/newamp1.h index 4b3d7fa4..ec9d0f13 100644 --- a/codec2-dev/src/newamp1.h +++ b/codec2-dev/src/newamp1.h @@ -45,8 +45,9 @@ float rate_K_mbest_encode(int *indexes, float *x, float *xq, int ndim, int mbest void post_filter_newamp1(float vec[], float sample_freq_kHz[], int K, float pf_gain); void interp_Wo_v(float Wo_[], int L_[], int voicing_[], float Wo1, float Wo2, int voicing1, int voicing2); void resample_rate_L(MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K); -void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg); -void newamp1_model_to_indexes(int indexes[], +void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg); +void newamp1_model_to_indexes(C2CONST *c2const, + int indexes[], MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], @@ -63,7 +64,8 @@ void newamp1_indexes_to_rate_K_vec(float rate_K_vec_[], int indexes[]); void newamp1_interpolate(float interpolated_surface_[], float left_vec[], float right_vec[], int K); -void newamp1_indexes_to_model(MODEL model_[], +void newamp1_indexes_to_model(C2CONST *c2const, + MODEL model_[], COMP H[], float interpolated_surface_[], float prev_rate_K_vec_[], diff --git a/codec2-dev/src/nlp.c b/codec2-dev/src/nlp.c index 3321375c..36037b43 100644 --- a/codec2-dev/src/nlp.c +++ b/codec2-dev/src/nlp.c @@ -305,7 +305,7 @@ float nlp( PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq"); #ifdef DUMP - dump_sq(nlp->sq); + dump_sq(m, nlp->sq); dump_Fw(Fw); #endif diff --git a/codec2-dev/src/quantise.c b/codec2-dev/src/quantise.c index a4fa96eb..8afed7b6 100644 --- a/codec2-dev/src/quantise.c +++ b/codec2-dev/src/quantise.c @@ -52,7 +52,7 @@ \*---------------------------------------------------------------------------*/ float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], - int order); + int m_pitch, int order); /*---------------------------------------------------------------------------*\ @@ -984,11 +984,11 @@ void aks_to_M2( \*---------------------------------------------------------------------------*/ -int encode_Wo(float Wo, int bits) +int encode_Wo(C2CONST *c2const, float Wo, int bits) { int index, Wo_levels = 1<Wo_min; + float Wo_max = c2const->Wo_max; float norm; norm = (Wo - Wo_min)/(Wo_max - Wo_min); @@ -1009,10 +1009,10 @@ int encode_Wo(float Wo, int bits) \*---------------------------------------------------------------------------*/ -float decode_Wo(int index, int bits) +float decode_Wo(C2CONST *c2const, int index, int bits) { - float Wo_min = TWO_PI/P_MAX; - float Wo_max = TWO_PI/P_MIN; + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; float step; float Wo; int Wo_levels = 1<Wo_min; + float Wo_max = c2const->Wo_max; float norm; norm = (log10f(Wo) - log10f(Wo_min))/(log10f(Wo_max) - log10f(Wo_min)); @@ -1058,10 +1058,10 @@ int encode_log_Wo(float Wo, int bits) \*---------------------------------------------------------------------------*/ -float decode_log_Wo(int index, int bits) +float decode_log_Wo(C2CONST *c2const, int index, int bits) { - float Wo_min = TWO_PI/P_MAX; - float Wo_max = TWO_PI/P_MIN; + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; float step; float Wo; int Wo_levels = 1<Wo_min; + float Wo_max = c2const->Wo_max; float norm; norm = (Wo - prev_Wo)/(Wo_max - Wo_min); @@ -1121,10 +1122,10 @@ int encode_Wo_dt(float Wo, float prev_Wo) \*---------------------------------------------------------------------------*/ -float decode_Wo_dt(int index, float prev_Wo) +float decode_Wo_dt(C2CONST *c2const, int index, float prev_Wo) { - float Wo_min = TWO_PI/P_MAX; - float Wo_max = TWO_PI/P_MIN; + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; float step; float Wo; int mask; @@ -1149,6 +1150,7 @@ float decode_Wo_dt(int index, float prev_Wo) return Wo; } +#endif /*---------------------------------------------------------------------------*\ @@ -1166,16 +1168,17 @@ float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[], - int order + int m_pitch, + int order ) { int i, roots; - float Wn[M_PITCH]; + float Wn[m_pitch]; float R[order+1]; float e, E; e = 0.0; - for(i=0; iWo_min; + float Wo_max = c2const->Wo_max; + float Fs = c2const->Fs; + + /* VQ is only trained for Fs = 8000 Hz */ + + assert(Fs == 8000); x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2); x[1] = 10.0*log10f(1e-4 + *e); @@ -2018,13 +2026,13 @@ int encode_WoE(MODEL *model, float e, float xq[]) \*---------------------------------------------------------------------------*/ -void decode_WoE(MODEL *model, float *e, float xq[], int n1) +void decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1) { int i; const float *codebook1 = ge_cb[0].cb; int ndim = ge_cb[0].k; - float Wo_min = TWO_PI/P_MAX; - float Wo_max = TWO_PI/P_MIN; + float Wo_min = c2const->Wo_min; + float Wo_max = c2const->Wo_max; for (i=0;im_pitch; /* Generate Hamming window centered on M-sample pitch analysis window @@ -94,20 +106,20 @@ void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[ */ m = 0.0; - for(i=0; im_pitch; - /* Centre analysis window on time axis, we need to arrange input - to FFT this way to make FFT phases correct */ + for(i=0; iWo < TWO_PI/P_MAX) - model->Wo = TWO_PI/P_MAX; - if (model->Wo > TWO_PI/P_MIN) - model->Wo = TWO_PI/P_MIN; + if (model->Wo < TWO_PI/c2const->p_max) + model->Wo = TWO_PI/c2const->p_max; + if (model->Wo > TWO_PI/c2const->p_min) + model->Wo = TWO_PI/c2const->p_min; model->L = floorf(PI/model->Wo); } @@ -423,10 +437,11 @@ void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase) \*---------------------------------------------------------------------------*/ float est_voicing_mbe( - MODEL *model, - COMP Sw[], - COMP W[] - ) /* DFT of error */ + C2CONST *c2const, + MODEL *model, + COMP Sw[], + COMP W[] + ) { int l,al,bl,m; /* loop variables */ COMP Am; /* amplitude sample for this band */ @@ -525,7 +540,7 @@ float est_voicing_mbe( These errors are much more common than people with 50Hz3 pitch, so we have just a small eratio threshold. */ - sixty = 60.0*TWO_PI/FS; + sixty = 60.0*TWO_PI/c2const->Fs; if ((eratio < -4.0) && (model->Wo <= sixty)) model->voiced = 0; } diff --git a/codec2-dev/src/sine.h b/codec2-dev/src/sine.h index 47a3475f..774f020a 100644 --- a/codec2-dev/src/sine.h +++ b/codec2-dev/src/sine.h @@ -36,10 +36,10 @@ C2CONST c2const_create(int Fs); void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]); float hpf(float x, float states[]); -void dft_speech(codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]); -void two_stage_pitch_refinement(MODEL *model, COMP Sw[]); +void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]); +void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]); void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase); -float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[]); +float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], COMP W[]); void make_synthesis_window(C2CONST *c2const, float Pn[]); void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift);