FILE *fout;
short *buf;
unsigned char *bits;
- int nsam, nbit;
+ int nsam, nbit, nbyte;
if (argc != 4) {
printf("usage: c2dec 2500|1400 InputBitFile OutputRawSpeechFile\n");
fprintf(stderr, "Error in mode: %s. Must be 2500 or 1400\n", argv[1]);
exit(1);
}
- printf("mode: %d\n", mode);
+
if (strcmp(argv[2], "-") == 0) fin = stdin;
else if ( (fin = fopen(argv[2],"rb")) == NULL ) {
fprintf(stderr, "Error opening input bit file: %s: %s.\n",
nsam = codec2_samples_per_frame(codec2);
nbit = codec2_bits_per_frame(codec2);
buf = (short*)malloc(nsam*sizeof(short));
- bits = (unsigned char*)malloc(nbit*sizeof(char));
+ nbyte = (nbit + 7) / 8;
+ bits = (unsigned char*)malloc(nbyte*sizeof(char));
while(fread(bits, sizeof(char), nbit, fin) == nbit) {
codec2_decode(codec2, buf, bits);
FILE *fout;
short *buf;
unsigned char *bits;
- int nsam, nbit;
+ int nsam, nbit, nbyte;
if (argc != 4) {
printf("usage: c2enc 2500|1400 InputRawspeechFile OutputBitFile\n");
nsam = codec2_samples_per_frame(codec2);
nbit = codec2_bits_per_frame(codec2);
buf = (short*)malloc(nsam*sizeof(short));
- bits = (unsigned char*)malloc(nbit*sizeof(char));
+ nbyte = (nbit + 7) / 8;
+ bits = (unsigned char*)malloc(nbyte*sizeof(char));
while(fread(buf, sizeof(short), nsam, fin) == nsam) {
codec2_encode(codec2, bits, buf);
float Sn_[2*N]; /* synthesised speech */
int i; /* loop variable */
int frames;
- float prev_Wo, prev__Wo;
+ float prev_Wo, prev__Wo, uq_Wo, prev_uq_Wo;
float pitch;
int voiced1 = 0;
for(i=0; i<2*N; i++)
Sn_[i] = 0;
- prev_Wo = prev__Wo = TWO_PI/P_MAX;
+ prev_uq_Wo = prev_Wo = prev__Wo = TWO_PI/P_MAX;
prev_model.Wo = TWO_PI/P_MIN;
prev_model.L = floor(PI/prev_model.Wo);
/* Estimate pitch */
- nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
+ nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_uq_Wo);
model.Wo = TWO_PI/pitch;
-
+
/* estimate model parameters --------------------------------------*/
dft_speech(Sw, Sn, w);
two_stage_pitch_refinement(&model, Sw);
estimate_amplitudes(&model, Sw, W);
+ uq_Wo = model.Wo;
#ifdef DUMP
dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
#endif
/* determine voicing */
- snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_Wo);
+ snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_uq_Wo);
+ printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced,
+ model.Wo, prev_uq_Wo);
#ifdef DUMP
dump_Sw_(Sw_);
dump_Ew(Ew);
if ((frames%2) == 0) {
printf("interp\n");
- printf("Wo: %1.5f L: %d e: %3.2f \n", model.Wo, model.L, e);
- for(i=0; i<LPC_ORD; i++)
- printf("lsp_indexes: %d lsps_: %2.3f prev_lsps_: %2.3f\n",
- lsp_indexes[i], lsps_[i], prev_lsps[i]);
- printf("ak: ");
- for(i=0; i<LPC_ORD; i++)
- printf("%2.3f ", ak[i]);
+ printf("Wo: %1.5f L: %d e: %3.2f v2: %d\n",
+ model.Wo, model.L, e, model.voiced);
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("lsp_indexes: %d lsps_: %2.3f prev_lsps_: %2.3f\n",
+ // lsp_indexes[i], lsps_[i], prev_lsps[i]);
+ //printf("ak: ");
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("%2.3f ", ak[i]);
+ //printf("\n");
+ printf("Am: ");
+ for(i=0; i<5; i++)
+ printf("%2.3f ", model.A[i]);
printf("\n");
/* decode interpolated frame */
interp_model.voiced = voiced1;
-
+ //printf("before Wo: %1.5f L: %d prev_e: %3.2f\n",
+ // prev_model.Wo, prev_model.L, prev_e);
+
#ifdef LOG_LIN_INTERP
interpolate(&interp_model, &prev_model, &model);
#else
prev_lsps, prev_e, lsps_, e, ak_interp);
apply_lpc_correction(&interp_model);
#endif
- printf("Wo: %1.5f L: %d prev_e: %3.2f\n",
- interp_model.Wo, interp_model.L, prev_e);
- printf("ak_interp: ");
- for(i=0; i<LPC_ORD; i++)
- printf("%2.3f ", ak_interp[i]);
+ printf("Wo: %1.5f L: %d prev_e: %3.2f v1: %d pv: %d\n",
+ interp_model.Wo, interp_model.L, prev_e, voiced1,
+ prev_model.voiced);
+ //printf("ak_interp: ");
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("%2.3f ", ak_interp[i]);
+ //printf("\n");
+ printf("Am: ");
+ for(i=0; i<5; i++)
+ printf("%2.3f ", interp_model.A[i]);
printf("\n");
- //if (frames==40)
+ //if (frames == 6)
// exit(0);
if (phase0)
phase_synth_zero_order(&interp_model, ak_interp, ex_phase,
synth_one_frame(buf, &model, Sn_, Pn);
if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
}
+
prev__Wo = prev_Wo;
- prev_Wo = TWO_PI/pitch;
+ prev_Wo = model.Wo;
+ prev_uq_Wo = uq_Wo;
}
/* End Main Loop -----------------------------------------------------*/
pack(bits, &nbit, energy_index, E_BITS);
pack(bits, &nbit, voiced1, 1);
pack(bits, &nbit, voiced2, 1);
-
+ printf("v2: %d v1: %d\n", voiced2, voiced1);
assert(nbit == codec2_bits_per_frame(c2));
}
aks_to_M2(ak, LPC_ORD, &model, energy, &snr, 1);
apply_lpc_correction(&model);
- printf("Wo: %1.5f L: %d e: %3.2f \n", model.Wo, model.L, energy);
- for(i=0; i<LPC_ORD; i++)
- printf("lsp_indexes: %d lsp_: %2.3f prev_lsp_: %2.3f\n",
- lsp_indexes[i], lsps_[i], c2->prev_lsps_[i]);
- printf("ak: ");
- for(i=0; i<LPC_ORD; i++)
- printf("%2.3f ", ak[i]);
+ printf("Wo: %1.5f L: %d e: %3.2f v2: %d\n",
+ model.Wo, model.L, energy, voiced2 );
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("lsp_indexes: %d lsp_: %2.3f prev_lsp_: %2.3f\n",
+ // lsp_indexes[i], lsps_[i], c2->prev_lsps_[i]);
+ //printf("ak: ");
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("%2.3f ", ak[i]);
+ printf("Am: ");
+ for(i=0; i<5; i++)
+ printf("%2.3f ", model.A[i]);
printf("\n");
/* interpolate odd frame model parameters from adjacent frames */
interpolate_lsp(&model_interp, &c2->prev_model, &model,
c2->prev_lsps_, c2->prev_energy, lsps_, energy, ak_interp);
apply_lpc_correction(&model_interp);
- printf("Wo: %1.5f L: %d prev_e: %3.2f\n",
- model_interp.Wo, model_interp.L, c2->prev_energy );
- printf("ak_interp: ");
- for(i=0; i<LPC_ORD; i++)
- printf("%2.3f ", ak_interp[i]);
+ printf("Wo: %1.5f L: %d prev_e: %3.2f v1: %d pv: %d\n",
+ model_interp.Wo, model_interp.L, c2->prev_energy, voiced1,
+ c2->prev_model.voiced);
+ //printf("ak_interp: ");
+ //for(i=0; i<LPC_ORD; i++)
+ // printf("%2.3f ", ak_interp[i]);
+ //printf("\n");
+ printf("Am: ");
+ for(i=0; i<5; i++)
+ printf("%2.3f ", model_interp.A[i]);
printf("\n");
- //if (frames ==40)
+ //if (frames == 6)
// exit(0);
/* synthesise two 10ms frames */
COMP Sw[FFT_ENC];
COMP Sw_[FFT_ENC];
COMP Ew[FFT_ENC];
- float pitch;
+ float pitch, snr;
int i;
/* Read input speech */
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, &c2->prev_Wo);
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
two_stage_pitch_refinement(model, Sw);
estimate_amplitudes(model, Sw, c2->W);
- est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo);
-
+ snr = est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo);
+ printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n",
+ snr, model->voiced, model->Wo, c2->prev_Wo);
c2->prev_Wo = model->Wo;
}
float lsps[LPC_ORD],e;
float snr;
+ /* trap corner case where V est is probably wrong */
+
+ if (interp->voiced && !prev->voiced && !next->voiced) {
+ interp->voiced = 0;
+ }
+
/* Wo depends on voicing of this and adjacent frames */
if (interp->voiced) {
}
interp->L = PI/interp->Wo;
+ //printf("interp: prev_v: %d next_v: %d prev_Wo: %f next_Wo: %f\n",
+ // prev->voiced, next->voiced, prev->Wo, next->Wo);
+ //printf("interp: Wo: %1.5f L: %d\n", interp->Wo, interp->L);
+
/* interpolate LSPs */
for(i=0; i<LPC_ORD; i++) {
/* post processing, helps clean up some voicing errors ------------------*/
/*
- Determine the ratio of low freancy to high frequency energy,
+ Determine the ratio of low freqency to high frequency energy,
voiced speech tends to be dominated by low frequency energy,
unvoiced by high frequency. This measure can be used to
determine if we have made any gross errors.
Nov 2010 - found that synthesis using time domain cos() functions
gives better results for synthesis frames greater than 10ms. Inverse
FFT synthesis using a 512 pt FFT works well for 10ms window. I think
- (but am not sure) that the problem is realted to the quantisation of
+ (but am not sure) that the problem is related to the quantisation of
the harmonic frequencies to the FFT bin size, e.g. there is a
8000/512 Hz step between FFT bins. For some reason this makes
the speech from longer frame > 10ms sound poor. The effect can also