{
C2CONST c2const = c2const_create(8000);
int n_samp = c2const.n_samp;
+ int m_pitch = c2const.m_pitch;
+
FILE *fout = NULL; /* output speech file */
FILE *fin; /* input speech file */
short buf[N_SAMP]; /* input/output buffer */
float buf_float[N_SAMP];
- float Sn[M_PITCH]; /* float input speech samples */
- float Sn_pre[M_PITCH]; /* pre-emphasised input speech samples */
+ float Sn[m_pitch]; /* float input speech samples */
+ float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */
COMP Sw[FFT_ENC]; /* DFT of Sn[] */
codec2_fft_cfg fft_fwd_cfg;
codec2_fftr_cfg fftr_fwd_cfg;
codec2_fftr_cfg fftr_inv_cfg;
- float w[M_PITCH]; /* time domain hamming window */
+ float w[m_pitch]; /* time domain hamming window */
COMP W[FFT_ENC]; /* DFT of w[] */
MODEL model;
float Pn[2*N_SAMP]; /* trapezoidal synthesis window */
COMP Aw[FFT_ENC];
COMP H[MAX_AMP];
- for(i=0; i<M_PITCH; i++) {
+ for(i=0; i<m_pitch; i++) {
Sn[i] = 1.0;
Sn_pre[i] = 1.0;
}
for(i=0; i<2*N_SAMP; i++)
Sn_[i] = 0;
- prev_uq_Wo = prev_Wo = prev__Wo = TWO_PI/P_MAX;
+ prev_uq_Wo = prev_Wo = prev__Wo = c2const.Wo_min;
- prev_model.Wo = TWO_PI/P_MIN;
+ prev_model.Wo = c2const.Wo_max;
prev_model.L = floor(PI/prev_model.Wo);
for(i=1; i<=prev_model.L; i++) {
prev_model.A[i] = 0.0;
e = prev_e = 1;
hpf_states[0] = hpf_states[1] = 0.0;
- nlp_states = nlp_create(M_PITCH);
+ nlp_states = nlp_create(m_pitch);
if (argc < 2) {
print_help(long_options, num_opts, argv);
prev_e_dec = 1;
for(m=1; m<=MAX_AMP; m++)
prev_model_dec.A[m] = 0.0;
- prev_model_dec.Wo = TWO_PI/P_MAX;
+ prev_model_dec.Wo = c2const.Wo_min;
prev_model_dec.L = PI/prev_model_dec.Wo;
prev_model_dec.voiced = 0;
/* shift buffer of input samples, and insert new samples */
- for(i=0; i<M_PITCH-N_SAMP; i++) {
+ for(i=0; i<m_pitch-N_SAMP; i++) {
Sn[i] = Sn[i+N_SAMP];
}
for(i=0; i<N_SAMP; i++) {
- Sn[i+M_PITCH-N_SAMP] = buf_float[i];
+ Sn[i+m_pitch-N_SAMP] = buf_float[i];
}
/*------------------------------------------------------------*\
\*------------------------------------------------------------*/
- nlp(nlp_states,Sn,N_SAMP,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
+ nlp(nlp_states,Sn,N_SAMP,c2const.p_min,c2const.p_max,&pitch,Sw,W,&prev_uq_Wo);
model.Wo = TWO_PI/pitch;
- dft_speech(fft_fwd_cfg, Sw, Sn, w);
- two_stage_pitch_refinement(&model, Sw);
+ dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w);
+ two_stage_pitch_refinement(&c2const, &model, Sw);
estimate_amplitudes(&model, Sw, W, 1);
#ifdef DUMP
- dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
+ dump_Sn(m_pitch, Sn); dump_Sw(Sw); dump_model(&model);
#endif
#if 0
\*------------------------------------------------------------*/
if (phase0) {
- float Wn[M_PITCH]; /* windowed speech samples */
+ float Wn[m_pitch]; /* windowed speech samples */
float Rk[order+1]; /* autocorrelation coeffs */
COMP a[FFT_ENC];
/* find aks here, these are overwritten if LPC modelling is enabled */
- for(i=0; i<M_PITCH; i++)
+ for(i=0; i<m_pitch; i++)
Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,Rk,M_PITCH,order);
+ autocorrelate(Wn,Rk,m_pitch,order);
levinson_durbin(Rk,ak,order);
/* determine voicing */
#if 0
- snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew);
+ snr = est_voicing_mbe(&c2const, &model, Sw, W, Sw_, Ew);
#else
- snr = est_voicing_mbe(&model, Sw, W);
+ snr = est_voicing_mbe(&c2const, &model, Sw, W);
#endif
if (dump_pitch_e)
if (lpc_model) {
- e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
+ e = speech_to_uq_lsps(lsps, ak, Sn, w, m_pitch, order);
for(i=0; i<LPC_ORD; i++)
lsps_[i] = lsps[i];
if (scalar_quant_Wo_e) {
e = decode_energy(encode_energy(e, E_BITS), E_BITS);
- model.Wo = decode_Wo(encode_Wo(model.Wo, WO_BITS), WO_BITS);
+ model.Wo = decode_Wo(&c2const, encode_Wo(&c2const, model.Wo, WO_BITS), WO_BITS);
model.L = PI/model.Wo; /* if we quantise Wo re-compute L */
}
if (scalar_quant_Wo_e_low) {
int ind;
e = decode_energy(ind = encode_energy(e, 3), 3);
- model.Wo = decode_log_Wo(encode_log_Wo(model.Wo, 5), 5);
+ model.Wo = decode_log_Wo(&c2const, encode_log_Wo(&c2const, model.Wo, 5), 5);
model.L = PI/model.Wo; /* if we quantise Wo re-compute L */
}
/* JVM's experimental joint Wo & LPC energy quantiser */
- quantise_WoE(&model, &e, Woe_);
+ quantise_WoE(&c2const, &model, &e, Woe_);
}
}
for(i=0, weight=weight_inc; i<decimate-1; i++, weight += weight_inc) {
//model_dec[i].voiced = model_dec[decimate-1].voiced;
interpolate_lsp_ver2(&lsps_dec[i][0], prev_lsps_dec, &lsps_dec[decimate-1][0], weight, order);
- interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate-1], weight);
+ interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate-1], weight, c2const.Wo_min);
e_dec[i] = interp_energy2(prev_e_dec, e_dec[decimate-1],weight);
}
{
struct CODEC2 *c2;
int i,l;
- C2CONST c2const = c2const_create(8000);
- int n_samp = c2const.n_samp;
if (!((mode >= 0) && (mode <= CODEC2_MODE_700C))) {
return NULL;
c2->mode = mode;
- c2->Fs = c2const.Fs;
- c2->n_samp = n_samp;
+ /* store constants in a few places for convenience */
+
+ c2->c2const = c2const_create(8000);
+ c2->Fs = c2->c2const.Fs;
+ int n_samp = c2->n_samp = c2->c2const.n_samp;
+ int m_pitch = c2->m_pitch = c2->c2const.m_pitch;
+
c2->Pn = (float*)malloc(2*n_samp*sizeof(float));
if (c2->Pn == NULL) {
- free(c2);
return NULL;
}
c2->Sn_ = (float*)malloc(2*n_samp*sizeof(float));
if (c2->Sn_ == NULL) {
- free(c2->Pn);
- free(c2);
+ return NULL;
+ }
+ c2->w = (float*)malloc(m_pitch*sizeof(float));
+ if (c2->w == NULL) {
+ return NULL;
+ }
+ c2->Sn = (float*)malloc(m_pitch*sizeof(float));
+ if (c2->Sn == NULL) {
return NULL;
}
- for(i=0; i<M_PITCH; i++)
+ for(i=0; i<m_pitch; i++)
c2->Sn[i] = 1.0;
c2->hpf_states[0] = c2->hpf_states[1] = 0.0;
for(i=0; i<2*n_samp; i++)
c2->Sn_[i] = 0;
c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL);
c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL);
- make_analysis_window(&c2const, c2->fft_fwd_cfg, c2->w,c2->W);
- make_synthesis_window(&c2const, c2->Pn);
+ make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W);
+ make_synthesis_window(&c2->c2const, c2->Pn);
c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
quantise_init();
c2->prev_Wo_enc = 0.0;
for(l=1; l<=MAX_AMP; l++)
c2->prev_model_dec.A[l] = 0.0;
- c2->prev_model_dec.Wo = TWO_PI/P_MAX;
+ c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max;
c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo;
c2->prev_model_dec.voiced = 0;
}
c2->prev_e_dec = 1;
- c2->nlp = nlp_create(M_PITCH);
+ c2->nlp = nlp_create(m_pitch);
if (c2->nlp == NULL) {
- free(c2->Sn_);
- free(c2->Pn);
- free (c2);
return NULL;
}
codec2_fft_free(c2->phase_fft_fwd_cfg);
codec2_fft_free(c2->phase_fft_inv_cfg);
}
+ free(c2->Pn);
+ free(c2->Sn);
+ free(c2->w);
+ free(c2->Sn_);
free(c2);
}
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- Wo_index = encode_Wo(model.Wo, WO_BITS);
+ Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
model[1].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
- model[1].Wo = decode_Wo(Wo_index, WO_BITS);
+ model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = unpack(bits, &nbit, E_BITS);
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
- interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
/* LSPs are sampled every 20ms so we interpolate the frame in
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
- interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
/* LSPs are sampled every 20ms so we interpolate the frame in
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- Wo_index = encode_Wo(model.Wo, WO_BITS);
+ Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
/* need to run this just to get LPC energy */
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- Wo_index = encode_Wo(model.Wo, WO_BITS);
+ Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
model[1].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
- model[1].Wo = decode_Wo(Wo_index, WO_BITS);
+ model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = unpack(bits, &nbit, E_BITS);
model[3].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
- model[3].Wo = decode_Wo(Wo_index, WO_BITS);
+ model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[3].L = PI/model[3].Wo;
e_index = unpack(bits, &nbit, E_BITS);
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
- interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
- interp_Wo(&model[2], &model[1], &model[3]);
+ interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
pack(bits, &nbit, model.voiced, 1);
/* need to run this just to get LPC energy */
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
model[2].voiced = unpack(bits, &nbit, 1);
model[3].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
- interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
- interp_Wo(&model[2], &model[1], &model[3]);
+ interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
- Wo_index = encode_Wo(model.Wo, WO_BITS);
+ Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray);
#ifdef PROFILE
quant_start = machdep_profile_sample();
#endif
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray);
model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray);
- model[3].Wo = decode_Wo(Wo_index, WO_BITS);
+ model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
PROFILE_SAMPLE(recover_start);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
- interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
+ interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
pack(bits, &nbit, model.voiced, 1);
/* need to run this just to get LPC energy */
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
model[2].voiced = unpack(bits, &nbit, 1);
model[3].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i));
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
- interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
- interp_Wo(&model[2], &model[1], &model[3]);
+ interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- Wo_index = encode_log_Wo(model.Wo, 5);
+ Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
e_index = encode_energy(e, 3);
pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
- model[3].Wo = decode_log_Wo(Wo_index, 5);
+ model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
- interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
+ interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
for(i=0; i<4; i++) {
analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
- Wo_index = encode_log_Wo(model.Wo, 5);
+ Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
e_index = encode_energy(e, 3);
pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
- model[3].Wo = decode_log_Wo(Wo_index, 5);
+ model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
- interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
+ interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
for(i=0; i<4; i++) {
float rate_K_vec[K], mean;
float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
- newamp1_model_to_indexes(indexes,
+ newamp1_model_to_indexes(&c2->c2const,
+ indexes,
&model,
rate_K_vec,
c2->rate_K_sample_freqs_kHz,
COMP HH[M][MAX_AMP+1];
float interpolated_surface_[M][NEWAMP1_K];
- newamp1_indexes_to_model(model,
+ newamp1_indexes_to_model(&c2->c2const,
+ model,
(COMP*)HH,
(float*)interpolated_surface_,
c2->prev_rate_K_vec_,
if (c2->mode == CODEC2_MODE_2400) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model, &e, xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if (c2->mode == CODEC2_MODE_1600) {
nbit = 1 + 1 + WO_BITS;
if (c2->mode == CODEC2_MODE_1400) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model, &e, xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if (c2->mode == CODEC2_MODE_1300) {
nbit = 1 + 1 + 1 + 1 + WO_BITS;
if (c2->mode == CODEC2_MODE_1200) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
- decode_WoE(&model, &e, xq_dec, WoE_index);
+ decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if (c2->mode == CODEC2_MODE_700) {
nbit = 1 + 5;
float pitch;
int i;
PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
+ int n_samp = c2->n_samp;
+ int m_pitch = c2->m_pitch;
/* Read input speech */
- for(i=0; i<M_PITCH-c2->n_samp; i++)
- c2->Sn[i] = c2->Sn[i+c2->n_samp];
- for(i=0; i<c2->n_samp; i++)
- c2->Sn[i+M_PITCH-c2->n_samp] = speech[i];
+ for(i=0; i<m_pitch-n_samp; i++)
+ c2->Sn[i] = c2->Sn[i+n_samp];
+ for(i=0; i<n_samp; i++)
+ c2->Sn[i+m_pitch-n_samp] = speech[i];
PROFILE_SAMPLE(dft_start);
- dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
+ dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,c2->n_samp,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+ nlp(c2->nlp,c2->Sn,n_samp,c2->c2const.p_min,c2->c2const.p_max,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
model->Wo = TWO_PI/pitch;
/* estimate model parameters */
- two_stage_pitch_refinement(model, Sw);
+ two_stage_pitch_refinement(&c2->c2const, model, Sw);
PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
estimate_amplitudes(model, Sw, c2->W, 0);
PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
- est_voicing_mbe(model, Sw, c2->W);
+ est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
c2->prev_Wo_enc = model->Wo;
PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing");
#ifdef DUMP
struct CODEC2 {
int mode;
+ C2CONST c2const;
int Fs;
int n_samp;
+ int m_pitch;
codec2_fft_cfg fft_fwd_cfg; /* forward FFT config */
codec2_fftr_cfg fftr_fwd_cfg; /* forward real FFT config */
- float w[M_PITCH]; /* time domain hamming window */
+ float *w; /* [m_pitch] time domain hamming window */
COMP W[FFT_ENC]; /* DFT of w[] */
float *Pn; /* [2*n_samp] trapezoidal synthesis window */
float *bpf_buf; /* buffer for band pass filter */
- float Sn[M_PITCH]; /* input speech */
+ float *Sn; /* [m_pitch] input speech */
float hpf_states[2]; /* high pass filter states */
void *nlp; /* pitch predictor states */
int gray; /* non-zero for gray encoding */
/* General defines */
-#define MAX_AMP 80 /* maximum number of harmonics */
+#define MAX_AMP 160 /* maximum number of harmonics */
#ifndef PI
#define PI 3.141592654 /* mathematical constant */
#endif
#define TWO_PI 6.283185307 /* mathematical constant */
-#define FS 8000 /* sample rate in Hz */
#define MAX_STR 256 /* maximum string size */
#define NW 279 /* analysis window size */
/* Pitch estimation defines */
-#define M_PITCH 320 /* pitch analysis frame size */
-#define P_MIN 20 /* minimum pitch */
-#define P_MAX 160 /* maximum pitch */
+#define M_PITCH_S 0.0400 /* pitch analysis window in s */
+#define P_MIN_S 0.0025 /* minimum pitch period in s */
+#define P_MAX_S 0.0200 /* maximum pitch period in s */
/*---------------------------------------------------------------------------*\
\*---------------------------------------------------------------------------*/
typedef struct {
- int Fs;
- int n_samp;
+ int Fs; /* sample rate of this instance */
+ int n_samp; /* number of samples per 10ms frame at Fs */
+ int max_amp; /* maximum number of harmonics */
+ int m_pitch; /* pitch estimation window size in samples */
+ int p_min; /* minimum pitch period in samples */
+ int p_max; /* maximum pitch period in samples */
+ float Wo_min;
+ float Wo_max;
} C2CONST;
/* Structure to hold model parameters for one frame */
fclose(fhephase);
}
-void dump_Sn(float Sn[]) {
+void dump_Sn(int m_pitch, float Sn[]) {
int i;
char s[MAX_STR];
/* split across two lines to avoid max line length problems */
/* reconstruct in Octave */
- for(i=0; i<M_PITCH/2; i++)
+ for(i=0; i<m_pitch/2; i++)
fprintf(fsn,"%f\t",Sn[i]);
fprintf(fsn,"\n");
- for(i=M_PITCH/2; i<M_PITCH; i++)
+ for(i=m_pitch/2; i<m_pitch; i++)
fprintf(fsn,"%f\t",Sn[i]);
fprintf(fsn,"\n");
}
fprintf(fe,"\n");
}
-void dump_sq(float sq[]) {
+void dump_sq(int m_pitch, float sq[]) {
int i;
char s[MAX_STR];
assert(fsq != NULL);
}
- for(i=0; i<M_PITCH/2; i++)
+ for(i=0; i<m_pitch/2; i++)
fprintf(fsq,"%f\t",sq[i]);
fprintf(fsq,"\n");
- for(i=M_PITCH/2; i<M_PITCH; i++)
+ for(i=m_pitch/2; i<m_pitch; i++)
fprintf(fsq,"%f\t",sq[i]);
fprintf(fsq,"\n");
}
fprintf(fE,"%f\n", 10.0*log10(E));
}
+#if 0
void dump_Rk(float Rk[]) {
int i;
char s[MAX_STR];
fprintf(frk,"%f\t",Rk[i]);
fprintf(frk,"\n");
}
+#endif
#endif
void dump_on(char filename_prefix[]);
void dump_off();
-void dump_Sn(float Sn[]);
+void dump_Sn(int m_pitch, float Sn[]);
void dump_Sw(COMP Sw[]);
void dump_Sw_(COMP Sw_[]);
void dump_Ew(COMP Ew[]);
/* NLP states */
-void dump_sq(float sq[]);
+void dump_sq(int m_pitch, float sq[]);
void dump_dec(COMP Fw[]);
void dump_Fw(COMP Fw[]);
void dump_e(float e_hz[]);
+#if 0
void dump_Rk(float Rk[]);
+#endif
/* post filter */
float sample_log_amp(MODEL *model, float w);
+#if 0
/*---------------------------------------------------------------------------*\
FUNCTION....: interp()
void interpolate(
MODEL *interp, /* interpolated model params */
MODEL *prev, /* previous frames model params */
- MODEL *next /* next frames model params */
+ MODEL *next, /* next frames model params */
+ float Wo_min
)
{
int l;
interp->Wo = prev->Wo;
}
else {
- interp->Wo = TWO_PI/P_MAX;
+ interp->Wo = Wo_min;
}
interp->L = PI/interp->Wo;
interp->A[l] = powf(10.0, log_amp);
}
}
+#endif
/*---------------------------------------------------------------------------*\
void interpolate_lsp(
codec2_fft_cfg fft_fwd_cfg,
- MODEL *interp, /* interpolated model params */
- MODEL *prev, /* previous frames model params */
- MODEL *next, /* next frames model params */
- float *prev_lsps, /* previous frames LSPs */
- float prev_e, /* previous frames LPC energy */
- float *next_lsps, /* next frames LSPs */
- float next_e, /* next frames LPC energy */
- float *ak_interp, /* interpolated aks for this frame */
- float *lsps_interp/* interpolated lsps for this frame */
+ MODEL *interp, /* interpolated model params */
+ MODEL *prev, /* previous frames model params */
+ MODEL *next, /* next frames model params */
+ float *prev_lsps, /* previous frames LSPs */
+ float prev_e, /* previous frames LPC energy */
+ float *next_lsps, /* next frames LSPs */
+ float next_e, /* next frames LPC energy */
+ float *ak_interp, /* interpolated aks for this frame */
+ float *lsps_interp, /* interpolated lsps for this frame */
+ float Wo_min
)
{
int i;
interp->Wo = prev->Wo;
}
else {
- interp->Wo = TWO_PI/P_MAX;
+ interp->Wo = Wo_min;
}
interp->L = PI/interp->Wo;
void interp_Wo(
MODEL *interp, /* interpolated model params */
MODEL *prev, /* previous frames model params */
- MODEL *next /* next frames model params */
+ MODEL *next, /* next frames model params */
+ float Wo_min
)
{
- interp_Wo2(interp, prev, next, 0.5);
+ interp_Wo2(interp, prev, next, 0.5, Wo_min);
}
/*---------------------------------------------------------------------------*\
MODEL *interp, /* interpolated model params */
MODEL *prev, /* previous frames model params */
MODEL *next, /* next frames model params */
- float weight
+ float weight,
+ float Wo_min
)
{
/* trap corner case where voicing est is probably wrong */
interp->Wo = prev->Wo;
}
else {
- interp->Wo = TWO_PI/P_MAX;
+ interp->Wo = Wo_min;
}
interp->L = PI/interp->Wo;
}
MODEL *interp, MODEL *prev, MODEL *next,
float *prev_lsps, float prev_e,
float *next_lsps, float next_e,
- float *ak_interp, float *lsps_interp);
-void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next);
-void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight);
+ float *ak_interp, float *lsps_interp, float Wo_min);
+void interp_Wo(MODEL *interp, MODEL *prev, MODEL *next, float Wo_min);
+void interp_Wo2(MODEL *interp, MODEL *prev, MODEL *next, float weight, float Wo_min);
float interp_energy(float prev, float next);
float interp_energy2(float prev, float next, float weight);
void interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order);
\*---------------------------------------------------------------------------*/
-void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg)
+void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg)
{
int i,m,b;
int Ns = Nfft/2+1;
}
for(i=0; i<Ns; i++) {
- sample_freqs_kHz[i] = (FS/1000.0)*(float)i/Nfft;
+ sample_freqs_kHz[i] = (c2const->Fs/1000.0)*(float)i/Nfft;
}
interp_para(Gdbfk, &rate_L_sample_freqs_kHz[1], &AmdB[1], model->L, sample_freqs_kHz, Ns);
\*---------------------------------------------------------------------------*/
-void newamp1_model_to_indexes(int indexes[],
+void newamp1_model_to_indexes(C2CONST *c2const,
+ int indexes[],
MODEL *model,
float rate_K_vec[],
float rate_K_sample_freqs_kHz[],
an unvoiced frame */
if (model->voiced) {
- int index = encode_log_Wo(model->Wo, 6);
+ int index = encode_log_Wo(c2const, model->Wo, 6);
if (index == 0) {
index = 1;
}
\*---------------------------------------------------------------------------*/
-void newamp1_indexes_to_model(MODEL model_[],
+void newamp1_indexes_to_model(C2CONST *c2const,
+ MODEL model_[],
COMP H[],
float *interpolated_surface_,
float prev_rate_K_vec_[],
/* decode latest Wo and voicing */
if (indexes[3]) {
- Wo_right = decode_log_Wo(indexes[3], 6);
+ Wo_right = decode_log_Wo(c2const, indexes[3], 6);
voicing_right = 1;
}
else {
model_[i].voiced = avoicing_[i];
resample_rate_L(&model_[i], &interpolated_surface_[K*i], rate_K_sample_freqs_kHz, K);
- determine_phase(&H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg);
+ determine_phase(c2const, &H[(MAX_AMP+1)*i], &model_[i], NEWAMP1_PHASE_NFFT, fwd_cfg, inv_cfg);
}
/* update memories for next time */
void post_filter_newamp1(float vec[], float sample_freq_kHz[], int K, float pf_gain);
void interp_Wo_v(float Wo_[], int L_[], int voicing_[], float Wo1, float Wo2, int voicing1, int voicing2);
void resample_rate_L(MODEL *model, float rate_K_vec[], float rate_K_sample_freqs_kHz[], int K);
-void determine_phase(COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg);
-void newamp1_model_to_indexes(int indexes[],
+void determine_phase(C2CONST *c2const, COMP H[], MODEL *model, int Nfft, codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg);
+void newamp1_model_to_indexes(C2CONST *c2const,
+ int indexes[],
MODEL *model,
float rate_K_vec[],
float rate_K_sample_freqs_kHz[],
int indexes[]);
void newamp1_interpolate(float interpolated_surface_[], float left_vec[], float right_vec[], int K);
-void newamp1_indexes_to_model(MODEL model_[],
+void newamp1_indexes_to_model(C2CONST *c2const,
+ MODEL model_[],
COMP H[],
float interpolated_surface_[],
float prev_rate_K_vec_[],
PROFILE_SAMPLE_AND_LOG(magsq, fft, " mag sq");
#ifdef DUMP
- dump_sq(nlp->sq);
+ dump_sq(m, nlp->sq);
dump_Fw(Fw);
#endif
\*---------------------------------------------------------------------------*/
float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[],
- int order);
+ int m_pitch, int order);
/*---------------------------------------------------------------------------*\
\*---------------------------------------------------------------------------*/
-int encode_Wo(float Wo, int bits)
+int encode_Wo(C2CONST *c2const, float Wo, int bits)
{
int index, Wo_levels = 1<<bits;
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float norm;
norm = (Wo - Wo_min)/(Wo_max - Wo_min);
\*---------------------------------------------------------------------------*/
-float decode_Wo(int index, int bits)
+float decode_Wo(C2CONST *c2const, int index, int bits)
{
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float step;
float Wo;
int Wo_levels = 1<<bits;
\*---------------------------------------------------------------------------*/
-int encode_log_Wo(float Wo, int bits)
+int encode_log_Wo(C2CONST *c2const, float Wo, int bits)
{
int index, Wo_levels = 1<<bits;
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float norm;
norm = (log10f(Wo) - log10f(Wo_min))/(log10f(Wo_max) - log10f(Wo_min));
\*---------------------------------------------------------------------------*/
-float decode_log_Wo(int index, int bits)
+float decode_log_Wo(C2CONST *c2const, int index, int bits)
{
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float step;
float Wo;
int Wo_levels = 1<<bits;
return powf(10,Wo);
}
+#if 0
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_Wo_dt()
\*---------------------------------------------------------------------------*/
-int encode_Wo_dt(float Wo, float prev_Wo)
+int encode_Wo_dt(C2CONST *c2const, float Wo, float prev_Wo)
{
int index, mask, max_index, min_index;
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float norm;
norm = (Wo - prev_Wo)/(Wo_max - Wo_min);
\*---------------------------------------------------------------------------*/
-float decode_Wo_dt(int index, float prev_Wo)
+float decode_Wo_dt(C2CONST *c2const, int index, float prev_Wo)
{
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
float step;
float Wo;
int mask;
return Wo;
}
+#endif
/*---------------------------------------------------------------------------*\
float ak[],
float Sn[],
float w[],
- int order
+ int m_pitch,
+ int order
)
{
int i, roots;
- float Wn[M_PITCH];
+ float Wn[m_pitch];
float R[order+1];
float e, E;
e = 0.0;
- for(i=0; i<M_PITCH; i++) {
+ for(i=0; i<m_pitch; i++) {
Wn[i] = Sn[i]*w[i];
e += Wn[i]*Wn[i];
}
return 0.0;
}
- autocorrelate(Wn, R, M_PITCH, order);
+ autocorrelate(Wn, R, m_pitch, order);
levinson_durbin(R, ak, order);
E = 0.0;
\*---------------------------------------------------------------------------*/
-void quantise_WoE(MODEL *model, float *e, float xq[])
+void quantise_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[])
{
int i, n1;
float x[2];
const float *codebook1 = ge_cb[0].cb;
int nb_entries = ge_cb[0].m;
int ndim = ge_cb[0].k;
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
+ float Fs = c2const->Fs;
+
+ /* VQ is only trained for Fs = 8000 Hz */
+
+ assert(Fs == 8000);
x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
x[1] = 10.0*log10f(1e-4 + *e);
\*---------------------------------------------------------------------------*/
-void decode_WoE(MODEL *model, float *e, float xq[], int n1)
+void decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1)
{
int i;
const float *codebook1 = ge_cb[0].cb;
int ndim = ge_cb[0].k;
- float Wo_min = TWO_PI/P_MAX;
- float Wo_max = TWO_PI/P_MIN;
+ float Wo_min = c2const->Wo_min;
+ float Wo_max = c2const->Wo_max;
for (i=0;i<ndim;i++)
{
float E, float *snr, int dump, int sim_pf,
int pf, int bass_boost, float beta, float gamma, COMP Aw[]);
-int encode_Wo(float Wo, int bits);
-float decode_Wo(int index, int bits);
-int encode_log_Wo(float Wo, int bits);
-float decode_log_Wo(int index, int bits);
-int encode_Wo_dt(float Wo, float prev_Wo);
-float decode_Wo_dt(int index, float prev_Wo);
+int encode_Wo(C2CONST *c2const, float Wo, int bits);
+float decode_Wo(C2CONST *c2const, int index, int bits);
+int encode_log_Wo(C2CONST *c2const, float Wo, int bits);
+float decode_log_Wo(C2CONST *c2const, int index, int bits);
+#if 0
+int encode_Wo_dt(C2CONST *c2const, float Wo, float prev_Wo);
+float decode_Wo_dt(C2CONST *c2const, int index, float prev_Wo);
+#endif
void encode_lsps_scalar(int indexes[], float lsp[], int order);
void decode_lsps_scalar(float lsp[], int indexes[], int order);
void encode_lspds_scalar(int indexes[], float lsp[], int order);
void encode_mels_scalar(int mel_indexes[], float mels[], int order);
void decode_mels_scalar(float mels[], int mel_indexes[], int order);
-void quantise_WoE(MODEL *model, float *e, float xq[]);
+void quantise_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[]);
int encode_WoE(MODEL *model, float e, float xq[]);
-void decode_WoE(MODEL *model, float *e, float xq[], int n1);
+void decode_WoE(C2CONST *c2const, MODEL *model, float *e, float xq[], int n1);
int encode_energy(float e, int bits);
float decode_energy(int index, int bits);
float ak[],
float Sn[],
float w[],
- int order
+ int m_pitch,
+ int order
);
int check_lsp_order(float lsp[], int lpc_order);
void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high);
assert((Fs == 8000) || (Fs = 16000));
c2const.Fs = Fs;
c2const.n_samp = Fs*0.01;
+ c2const.max_amp = floor(Fs*P_MIN_S/2);
+ c2const.p_min = floor(Fs*P_MIN_S);
+ c2const.p_max = floor(Fs*P_MAX_S);
+ c2const.m_pitch = floor(Fs*M_PITCH_S);
+ c2const.Wo_min = TWO_PI/c2const.p_max;
+ c2const.Wo_max = TWO_PI/c2const.p_min;
+
+ fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
+ fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
+ fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
+
return c2const;
}
COMP wshift[FFT_ENC];
COMP temp;
int i,j;
+ int m_pitch = c2const->m_pitch;
/*
Generate Hamming window centered on M-sample pitch analysis window
*/
m = 0.0;
- for(i=0; i<M_PITCH/2-NW/2; i++)
+ for(i=0; i<m_pitch/2-NW/2; i++)
w[i] = 0.0;
- for(i=M_PITCH/2-NW/2,j=0; i<M_PITCH/2+NW/2; i++,j++) {
+ for(i=m_pitch/2-NW/2,j=0; i<m_pitch/2+NW/2; i++,j++) {
w[i] = 0.5 - 0.5*cosf(TWO_PI*j/(NW-1));
m += w[i]*w[i];
}
- for(i=M_PITCH/2+NW/2; i<M_PITCH; i++)
+ for(i=m_pitch/2+NW/2; i<m_pitch; i++)
w[i] = 0.0;
/* Normalise - makes freq domain amplitude estimation straight
forward */
m = 1.0/sqrtf(m*FFT_ENC);
- for(i=0; i<M_PITCH; i++) {
+ for(i=0; i<m_pitch; i++) {
w[i] *= m;
}
wshift[i].imag = 0.0;
}
for(i=0; i<NW/2; i++)
- wshift[i].real = w[i+M_PITCH/2];
- for(i=FFT_ENC-NW/2,j=M_PITCH/2-NW/2; i<FFT_ENC; i++,j++)
+ wshift[i].real = w[i+m_pitch/2];
+ for(i=FFT_ENC-NW/2,j=m_pitch/2-NW/2; i<FFT_ENC; i++,j++)
wshift[i].real = w[j];
codec2_fft(fft_fwd_cfg, wshift, W);
// TODO: we can either go for a faster FFT using fftr and some stack usage
// or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
#if 1
-void dft_speech(codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
+void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[])
{
int i;
- for(i=0; i<FFT_ENC; i++) {
- Sw[i].real = 0.0;
- Sw[i].imag = 0.0;
- }
+ int m_pitch = c2const->m_pitch;
- /* Centre analysis window on time axis, we need to arrange input
- to FFT this way to make FFT phases correct */
+ for(i=0; i<FFT_ENC; i++) {
+ Sw[i].real = 0.0;
+ Sw[i].imag = 0.0;
+ }
- /* move 2nd half to start of FFT input vector */
+ /* Centre analysis window on time axis, we need to arrange input
+ to FFT this way to make FFT phases correct */
- for(i=0; i<NW/2; i++)
- Sw[i].real = Sn[i+M_PITCH/2]*w[i+M_PITCH/2];
+ /* move 2nd half to start of FFT input vector */
- /* move 1st half to end of FFT input vector */
+ for(i=0; i<NW/2; i++)
+ Sw[i].real = Sn[i+m_pitch/2]*w[i+m_pitch/2];
- for(i=0; i<NW/2; i++)
- Sw[FFT_ENC-NW/2+i].real = Sn[i+M_PITCH/2-NW/2]*w[i+M_PITCH/2-NW/2];
+ /* move 1st half to end of FFT input vector */
+
+ for(i=0; i<NW/2; i++)
+ Sw[FFT_ENC-NW/2+i].real = Sn[i+m_pitch/2-NW/2]*w[i+m_pitch/2-NW/2];
- codec2_fft_inplace(fft_fwd_cfg, Sw);
+ codec2_fft_inplace(fft_fwd_cfg, Sw);
}
#else
void dft_speech(codec2_fftr_cfg fftr_fwd_cfg, COMP Sw[], float Sn[], float w[])
/* move 2nd half to start of FFT input vector */
for(i=0; i<NW/2; i++)
- sw[i] = Sn[i+M_PITCH/2]*w[i+M_PITCH/2];
+ sw[i] = Sn[i+m_pitch/2]*w[i+m_pitch/2];
/* move 1st half to end of FFT input vector */
for(i=0; i<NW/2; i++)
- sw[FFT_ENC-NW/2+i] = Sn[i+M_PITCH/2-NW/2]*w[i+M_PITCH/2-NW/2];
+ sw[FFT_ENC-NW/2+i] = Sn[i+m_pitch/2-NW/2]*w[i+m_pitch/2-NW/2];
codec2_fftr(fftr_fwd_cfg, sw, Sw);
}
\*---------------------------------------------------------------------------*/
-void two_stage_pitch_refinement(MODEL *model, COMP Sw[])
+void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[])
{
float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
/* Limit range */
- if (model->Wo < TWO_PI/P_MAX)
- model->Wo = TWO_PI/P_MAX;
- if (model->Wo > TWO_PI/P_MIN)
- model->Wo = TWO_PI/P_MIN;
+ if (model->Wo < TWO_PI/c2const->p_max)
+ model->Wo = TWO_PI/c2const->p_max;
+ if (model->Wo > TWO_PI/c2const->p_min)
+ model->Wo = TWO_PI/c2const->p_min;
model->L = floorf(PI/model->Wo);
}
\*---------------------------------------------------------------------------*/
float est_voicing_mbe(
- MODEL *model,
- COMP Sw[],
- COMP W[]
- ) /* DFT of error */
+ C2CONST *c2const,
+ MODEL *model,
+ COMP Sw[],
+ COMP W[]
+ )
{
int l,al,bl,m; /* loop variables */
COMP Am; /* amplitude sample for this band */
These errors are much more common than people with 50Hz3
pitch, so we have just a small eratio threshold. */
- sixty = 60.0*TWO_PI/FS;
+ sixty = 60.0*TWO_PI/c2const->Fs;
if ((eratio < -4.0) && (model->Wo <= sixty))
model->voiced = 0;
}
void make_analysis_window(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, float w[], COMP W[]);
float hpf(float x, float states[]);
-void dft_speech(codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]);
-void two_stage_pitch_refinement(MODEL *model, COMP Sw[]);
+void dft_speech(C2CONST *c2const, codec2_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]);
+void two_stage_pitch_refinement(C2CONST *c2const, MODEL *model, COMP Sw[]);
void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase);
-float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[]);
+float est_voicing_mbe(C2CONST *c2const, MODEL *model, COMP Sw[], COMP W[]);
void make_synthesis_window(C2CONST *c2const, float Pn[]);
void synthesise(int n_samp, codec2_fftr_cfg fftr_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift);