MODEL model;
float Pn[2*N]; /* trapezoidal synthesis window */
float Sn_[2*N]; /* synthesised speech */
- int i; /* loop variable */
+ int i,m; /* loop variable */
int frames;
- float prev_Wo, prev__Wo, uq_Wo, prev_uq_Wo;
+ float prev_Wo, prev__Wo, prev_uq_Wo;
float pitch;
- int voiced1 = 0;
char out_file[MAX_STR];
char ampexp_arg[MAX_STR];
char phaseexp_arg[MAX_STR];
int lpc_model = 0, order = LPC_ORD;
int lsp = 0, lspd = 0, lspvq = 0;
int lspres = 0;
- int lspdt = 0, lspdt_mode = LSPDT_ALL;
- int dt = 0, lspjvm = 0, lspanssi = 0, lspjnd = 0, lspmel = 0;
+ int lspjvm = 0, lspjnd = 0, lspmel = 0;
+ #ifdef __EXPERIMENTAL__
+ int lspanssi = 0,
+ #endif
int prede = 0;
float pre_mem = 0.0, de_mem = 0.0;
float ak[order];
float ex_phase[MAX_AMP+1];
int postfilt;
- float bg_est;
int hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0;
int lpcpf = 0;
FILE *fvoicing = 0;
- MODEL prev_model, interp_model;
- int decimate = 0;
+ MODEL prev_model;
+ int dec;
+ int decimate = 1;
float lsps[order];
- float prev_lsps[order], prev_lsps_[order];
- float lsps__prev[order];
- float lsps__prev2[order];
float e, prev_e;
- float ak_interp[order];
int lsp_indexes[order];
float lsps_[order];
float Woe_[2];
+ float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
+ MODEL model_dec[4], prev_model_dec;
+ float prev_lsps_dec[order], prev_e_dec;
+
void *nlp_states;
float hpf_states[2];
int scalar_quant_Wo_e = 0;
{ "lspd", no_argument, &lspd, 1 },
{ "lspvq", no_argument, &lspvq, 1 },
{ "lspres", no_argument, &lspres, 1 },
- #ifdef __EXPERIMENTAL__
- { "lspdt", no_argument, &lspdt, 1 },
- { "lspdt_mode", required_argument, NULL, 0 },
- #endif
{ "lspjvm", no_argument, &lspjvm, 1 },
#ifdef __EXPERIMENTAL__
{ "lspanssi", no_argument, &lspanssi, 1 },
{ "ampexp", required_argument, &exp, 1 },
{ "postfilter", no_argument, &postfilt, 1 },
{ "hand_voicing", required_argument, &hand_voicing, 1 },
- { "dec", no_argument, &decimate, 1 },
- { "dt", no_argument, &dt, 1 },
+ { "dec", required_argument, &dec, 1 },
{ "hi", no_argument, &hi, 1 },
{ "simlpcpf", no_argument, &simlpcpf, 1 },
{ "lpcpf", no_argument, &lpcpf, 1 },
//ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
ex_phase[i] = 0.0;
}
- for(i=0; i<order; i++) {
- lsps_[i] = prev_lsps[i] = prev_lsps_[i] = i*PI/(order+1);
- lsps__prev[i] = lsps__prev2[i] = i*PI/(order+1);
- }
e = prev_e = 1;
hpf_states[0] = hpf_states[1] = 0.0;
|| strcmp(long_options[option_index].name, "lspd") == 0
|| strcmp(long_options[option_index].name, "lspvq") == 0) {
assert(order == LPC_ORD);
- } else if(strcmp(long_options[option_index].name, "lspdt_mode") == 0) {
- if (strcmp(optarg,"all") == 0)
- lspdt_mode = LSPDT_ALL;
- else if (strcmp(optarg,"low") == 0)
- lspdt_mode = LSPDT_LOW;
- else if (strcmp(optarg,"high") == 0)
- lspdt_mode = LSPDT_HIGH;
- else {
- fprintf(stderr, "Error in lspdt_mode: %s\n", optarg);
+ } else if(strcmp(long_options[option_index].name, "dec") == 0) {
+
+ decimate = atoi(optarg);
+ if ((decimate != 2) && (decimate != 4)) {
+ fprintf(stderr, "Error in --dec, must be 2 or 4\n");
exit(1);
}
+
+ if (!phase0) {
+ printf("needs --phase0 to resample phase when using --dec\n");
+ exit(1);
+ }
+ if (!lpc_model) {
+ printf("needs --lpc [order] to resample amplitudes when using --dec\n");
+ exit(1);
+ }
+
} else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) {
if ((fvoicing = fopen(optarg,"rt")) == NULL) {
fprintf(stderr, "Error opening voicing file: %s: %s.\n",
lsp = 1;
phase0 = 1;
postfilt = 1;
- decimate = 1;
+ decimate = 2;
lpcpf = 1;
} else if(strcmp(optarg,"1400") == 0) {
lpc_model = 1;
vector_quant_Wo_e = 1;
- lsp = 1; lspdt = 1;
+ lsp = 1;
phase0 = 1;
postfilt = 1;
- decimate = 1;
- dt = 1;
+ decimate = 4;
lpcpf = 1;
- } else if(strcmp(optarg,"1200") == 0) {
+ } else if(strcmp(optarg,"1300") == 0) {
lpc_model = 1;
scalar_quant_Wo_e = 1;
- lspjvm = 1; lspdt = 1;
+ lsp = 1;
phase0 = 1;
postfilt = 1;
- decimate = 1;
- dt = 1;
+ decimate = 4;
+ lpcpf = 1;
+ } else if(strcmp(optarg,"1200") == 0) {
+ lpc_model = 1;
+ scalar_quant_Wo_e = 1;
+ lspjvm = 1;
+ phase0 = 1;
+ postfilt = 1;
+ decimate = 4;
lpcpf = 1;
} else {
fprintf(stderr, "Error: invalid output rate (3200|2400|1400|1200) %s\n", optarg);
}
ex_phase[0] = 0;
- bg_est = 0.0;
Woe_[0] = Woe_[1] = 1.0;
/*
bpf_buf[i] = 0.0;
}
- /*----------------------------------------------------------------*\
+ for(i=0; i<LPC_ORD; i++) {
+ prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
+ }
+ prev_e_dec = 1;
+ for(m=1; m<=MAX_AMP; m++)
+ prev_model_dec.A[m] = 0.0;
+ prev_model_dec.Wo = TWO_PI/P_MAX;
+ prev_model_dec.L = PI/prev_model_dec.Wo;
+ prev_model_dec.voiced = 0;
+
+ /*----------------------------------------------------------------* \
Main Loop
dft_speech(fft_fwd_cfg, Sw, Sn, w);
two_stage_pitch_refinement(&model, Sw);
estimate_amplitudes(&model, Sw, W, 1);
- uq_Wo = model.Wo;
#ifdef DUMP
dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
if (phase0) {
float Wn[M]; /* windowed speech samples */
float Rk[order+1]; /* autocorrelation coeffs */
+ COMP a[FFT_ENC];
#ifdef DUMP
dump_phase(&model.phi[0], model.L);
for(i=0; i<=MAX_AMP; i++)
model.phi[i] = 0;
+ /* Determine DFT of A(exp(jw)), which is needed for phase0 model when
+ LPC is not used, e.g. indecimate=1 (10ms) frames with no LPC */
+
+ for(i=0; i<FFT_ENC; i++) {
+ a[i].real = 0.0;
+ a[i].imag = 0.0;
+ }
+
+ for(i=0; i<=order; i++)
+ a[i].real = ak[i];
+ kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)a, (kiss_fft_cpx *)Aw);
+
if (hand_voicing) {
fscanf(fvoicing,"%d\n",&model.voiced);
}
if (lpc_model) {
e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
+ for(i=0; i<LPC_ORD; i++)
+ lsps_[i] = lsps[i];
#ifdef DUMP
dump_ak(ak, order);
fprintf(fjvm, "%f\n", e);
#ifdef DUMP
- /* dump order is different if we are decimating */
- if (!decimate)
- dump_lsp(lsps);
- for(i=0; i<order; i++)
- prev_lsps[i] = lsps[i];
+ dump_lsp(lsps);
#endif
/* various LSP quantisation schemes */
*/
if (lspmel) {
- float f, f_, dmel;
+ float f, f_;
float mel[LPC_ORD];
int mel_indexes[LPC_ORD];
lsp_to_lpc(lsps_, ak, order);
}
-
- /* we need lsp__prev[] for lspdt and decimate. If no
- other LSP quantisation is used we use original LSPs as
- there is no quantised version available. TODO: this is
- mess, we should have structures and standard
- nomenclature for previous frames values, lsp_[]
- shouldn't be overwritten as we may want to dump it for
- analysis. Re-design some time.
- */
-
- if (!lsp && !lspd && !lspvq && !lspres && !lspjvm && !lspanssi && !lspjnd && !lspmel)
- for(i=0; i<LPC_ORD; i++)
- lsps_[i] = lsps[i];
-
- /* Odd frames are generated by quantising the difference
- between the previous frames LSPs and this frames */
-
-#ifdef __EXPERIMENTAL__
- if (lspdt && !decimate) {
- if (frames%2) {
- lspdt_quantise(lsps, lsps_, lsps__prev, lspdt_mode);
- bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0);
- lsp_to_lpc(lsps_, ak, LPC_ORD);
- }
- for(i=0; i<LPC_ORD; i++)
- lsps__prev[i] = lsps_[i];
- }
-#endif
-
- /*
- When decimation is enabled we only send LSPs to the
- decoder on odd frames. In the Delta-time LSPs case we
- encode every second odd frame (i.e. every 3rd frame out
- of 4) by quantising the difference between the 1st
- frames LSPs and the 3rd frames:
-
- 10ms, frame 1: discard (interpolate at decoder)
- 20ms, frame 2: send "full" LSP frame
- 30ms, frame 3: discard (interpolate at decoder)
- 40ms, frame 4: send LSPs differences between frame 4 and frame 2
- */
-
- if (lspdt && decimate) {
- /* print previous LSPs to make sure we are using the right set */
- if ((frames%4) == 0) {
- //printf(" lspdt ");
- //#define LSPDT
- #ifdef LSPDT
- lspdt_quantise(lsps, lsps_, lsps__prev2, lspdt_mode);
- #else
- for(i=0; i<LPC_ORD; i++)
- lsps_[i] = lsps__prev2[i];
- #endif
- bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0);
- lsp_to_lpc(lsps_, ak, LPC_ORD);
- }
-
- for(i=0; i<LPC_ORD; i++) {
- lsps__prev2[i] = lsps__prev[i];
- lsps__prev[i] = lsps_[i];
- }
- }
- #ifdef DUMP
- /* if using decimated (20ms) frames we dump interp
- LSPs below */
- if (!decimate)
- dump_lsp_(lsps_);
- #endif
if (scalar_quant_Wo_e) {
e = decode_energy(encode_energy(e, E_BITS), E_BITS);
-
- if (!decimate) {
- /* we send params every 10ms, delta-time every 20ms */
- if (dt && (frames % 2))
- model.Wo = decode_Wo_dt(encode_Wo_dt(model.Wo, prev_Wo),prev_Wo);
- else
- model.Wo = decode_Wo(encode_Wo(model.Wo, WO_BITS), WO_BITS);
- }
-
- if (decimate) {
- /* we send params every 20ms */
- if (dt && ((frames % 4) == 0)) {
- /* delta-time every 40ms */
- model.Wo = decode_Wo_dt(encode_Wo_dt(model.Wo, prev__Wo),prev__Wo);
- }
- else
- model.Wo = decode_Wo(encode_Wo(model.Wo, WO_BITS), WO_BITS);
- }
-
+ model.Wo = decode_Wo(encode_Wo(model.Wo, WO_BITS), WO_BITS);
model.L = PI/model.Wo; /* if we quantise Wo re-compute L */
}
/* JVM's experimental joint Wo & LPC energy quantiser */
- //printf("\nWo %f e %f\n", model.Wo, e);
quantise_WoE(&model, &e, Woe_);
- //printf("Wo %f e %f\n", model.Wo, e);
-
}
- aks_to_M2(fft_fwd_cfg, ak, order, &model, e, &snr, 1, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
- apply_lpc_correction(&model);
-
- #ifdef DUMP
- dump_ak_(ak, order);
- #endif
-
- /* note SNR on interpolated frames can't be measured properly
- by comparing Am as L has changed. We can dump interp lsps
- and compare them,
- */
- #ifdef DUMP
- dump_lpc_snr(snr);
- #endif
- sum_snr += snr;
- #ifdef DUMP
- dump_quantised_model(&model);
- #endif
}
/*------------------------------------------------------------*\
- Decimation to 20ms frame rate
+ Synthesise and optional decimation to 20 or 40ms frame rate
\*------------------------------------------------------------*/
- if (decimate) {
- float lsps_interp[order];
+ /*
+ if decimate == 2, we interpolate frame n from frame n-1 and n+1
+ if decimate == 4, we interpolate frames n, n+1, n+2, from frames n-1 and n+3
- if (!phase0) {
- printf("needs --phase0 to resample phase for interpolated Wo\n");
- exit(0);
- }
- if (!lpc_model) {
- printf("needs --lpc [order] to resample amplitudes\n");
- exit(0);
- }
+ This is meant to give identical results to the implementations of various modes
+ in codec2.c
+ */
- /*
- Each 20ms we synthesise two 10ms frames:
+ /* delay line to keep frame by frame voicing decisions */
- frame 1: discard except for voicing bit
- frame 2: interpolate frame 1 LSPs from frame 2 and frame 0
- synthesise frame 1 and frame 2 speech
- frame 3: discard except for voicing bit
- frame 4: interpolate frame 3 LSPs from frame 4 and frame 2
- synthesise frame 3 and frame 4 speech
- */
+ for(i=0; i<decimate-1; i++)
+ model_dec[i] = model_dec[i+1];
+ model_dec[decimate-1] = model;
- if ((frames%2) == 0) {
- //printf("frame: %d\n", frames);
+ if ((frames % decimate) == 0) {
+ for(i=0; i<order; i++)
+ lsps_dec[decimate-1][i] = lsps_[i];
+ e_dec[decimate-1] = e;
+ model_dec[decimate-1] = model;
- /* decode interpolated frame */
+ /* interpolate the model parameters */
- interp_model.voiced = voiced1;
-
- #ifdef FIX_ME
- /* NOTE: need to get this woking again */
- interpolate_lsp_ver2(lsps_interp, prev_lsps_, lsps_, 0.5)
- aks_to_M2(fft_fwd_cfg, ak, order, &model, e, &snr, 1, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
- interpolate_lsp(fft_fwd_cfg, &interp_model, &prev_model, &model,
- prev_lsps_, prev_e, lsps_, e, ak_interp, lsps_interp);
- #endif
- apply_lpc_correction(&interp_model);
-
- /* used to compare with c2enc/c2dec version
-
- printf(" Wo: %1.5f L: %d v1: %d prev_e: %f\n",
- interp_model.Wo, interp_model.L, interp_model.voiced, prev_e);
- printf(" lsps_interp: ");
- for(i=0; i<order; i++)
- printf("%5.3f ", lsps_interp[i]);
- printf("\n A..........: ");
- for(i=0; i<order; i++)
- printf("%5.3f ",interp_model.A[i]);
-
- printf("\n Wo: %1.5f L: %d e: %3.2f v2: %d\n",
- model.Wo, model.L, e, model.voiced);
- printf(" lsps_......: ");
- for(i=0; i<order; i++)
- printf("%5.3f ", lsps_[i]);
- printf("\n A..........: ");
- for(i=0; i<order; i++)
- printf("%5.3f ",model.A[i]);
- printf("\n");
- */
-
- #ifdef DUMP
- /* do dumping here so we get lsp dump file in correct order */
- dump_lsp(prev_lsps);
- dump_lsp(lsps_interp);
- dump_lsp(lsps);
- dump_lsp(lsps_);
- #endif
+ weight_inc = 1.0/decimate;
+ for(i=0, weight=weight_inc; i<decimate-1; i++, weight += weight_inc) {
+ //model_dec[i].voiced = model_dec[decimate-1].voiced;
+ interpolate_lsp_ver2(&lsps_dec[i][0], prev_lsps_dec, &lsps_dec[decimate-1][0], weight, order);
+ interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate-1], weight);
+ e_dec[i] = interp_energy2(prev_e_dec, e_dec[decimate-1],weight);
+ }
- if (phase0)
- phase_synth_zero_order(fft_fwd_cfg, &interp_model, ex_phase, Aw);
- if (postfilt)
- postfilter(&interp_model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &interp_model, Sn_, Pn, prede, &de_mem, gain);
- //printf(" buf[0] %d\n", buf[0]);
- if (fout != NULL)
- fwrite(buf,sizeof(short),N,fout);
-
- /* decode this frame */
-
- if (phase0)
- phase_synth_zero_order(fft_fwd_cfg, &model, ex_phase, Aw);
- if (postfilt)
- postfilter(&model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem, gain);
- //printf(" buf[0] %d\n", buf[0]);
- if (fout != NULL)
- fwrite(buf,sizeof(short),N,fout);
-
- /* update states for next time */
-
- prev_model = model;
- for(i=0; i<order; i++)
- prev_lsps_[i] = lsps_[i];
- prev_e = e;
- }
- else {
- voiced1 = model.voiced;
- }
- }
- else {
- /* no decimation - sythesise each 10ms frame immediately */
-
- if (phase0)
- phase_synth_zero_order(fft_fwd_cfg, &model, ex_phase, Aw);
+ /* then recover spectral amplitudes and synthesise */
+
+ for(i=0; i<decimate; i++) {
+ if (lpc_model) {
+ lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order);
+ aks_to_M2(fft_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i],
+ &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
+ apply_lpc_correction(&model_dec[i]);
+ #ifdef DUMP
+ dump_lsp_(&lsps_dec[i][0]);
+ dump_ak_(&ak_dec[i][0], order);
+ sum_snr += snr;
+ dump_quantised_model(&model_dec[i]);
+ #endif
+ }
- if (postfilt)
- postfilter(&model, &bg_est);
- synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem, gain);
- if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
- }
+ if (phase0)
+ phase_synth_zero_order(fft_fwd_cfg, &model_dec[i], ex_phase, Aw);
+ synth_one_frame(fft_inv_cfg, buf, &model_dec[i], Sn_, Pn, prede, &de_mem, gain);
+ if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
+ }
+ /*
+ for(i=0; i<decimate; i++) {
+ printf("%d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced);
+ }
+ if (frames == 4*50)
+ exit(0);
+ */
+ /* update memories for next frame ----------------------------*/
+
+ prev_model_dec = model_dec[decimate-1];
+ prev_e_dec = e_dec[decimate-1];
+ for(i=0; i<LPC_ORD; i++)
+ prev_lsps_dec[i] = lsps_dec[decimate-1][i];
+ }
- prev__Wo = prev_Wo;
- prev_Wo = model.Wo;
- prev_uq_Wo = uq_Wo;
- //if (frames == 8) {
- // exit(0);
- //}
}
/*----------------------------------------------------------------*\
option_parameters="";
} else if (strcmp("lpc", long_options[i].name) == 0) {
option_parameters = " <Order>";
- } else if (strcmp("lspdt_mode", long_options[i].name) == 0) {
- option_parameters = " <all|high|low>";
+ } else if (strcmp("dec", long_options[i].name) == 0) {
+ option_parameters = " <2|4>";
} else if (strcmp("hand_voicing", long_options[i].name) == 0) {
option_parameters = " <VoicingFile>";
} else if (strcmp("dump_pitch_e", long_options[i].name) == 0) {
option_parameters = " <Dump File>";
} else if (strcmp("rate", long_options[i].name) == 0) {
- option_parameters = " <3200|2400|1400|1200>";
+ option_parameters = " <3200|2400|1400|1300|1200>";
} else if (strcmp("dump", long_options[i].name) == 0) {
option_parameters = " <DumpFilePrefix>";
} else {
}
exit(1);
}
+
void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est);
void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
-void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]);
-void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_650(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_650(struct CODEC2 *c2, short speech[], const unsigned char * bits);
static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
(mode == CODEC2_MODE_1400) ||
(mode == CODEC2_MODE_1300) ||
(mode == CODEC2_MODE_1200) ||
- (mode == CODEC2_MODE_450)
+ (mode == CODEC2_MODE_650)
);
c2->mode = mode;
for(i=0; i<M; i++)
return 52;
if (c2->mode == CODEC2_MODE_1200)
return 48;
- if (c2->mode == CODEC2_MODE_450)
- return 18;
+ if (c2->mode == CODEC2_MODE_650)
+ return 26;
return 0; /* shouldn't get here */
}
return 320;
if (c2->mode == CODEC2_MODE_1200)
return 320;
- if (c2->mode == CODEC2_MODE_450)
+ if (c2->mode == CODEC2_MODE_650)
return 320;
return 0; /* shouldnt get here */
(c2->mode == CODEC2_MODE_1400) ||
(c2->mode == CODEC2_MODE_1300) ||
(c2->mode == CODEC2_MODE_1200) ||
- (c2->mode == CODEC2_MODE_450)
+ (c2->mode == CODEC2_MODE_650)
);
if (c2->mode == CODEC2_MODE_3200)
codec2_encode_1300(c2, bits, speech);
if (c2->mode == CODEC2_MODE_1200)
codec2_encode_1200(c2, bits, speech);
- if (c2->mode == CODEC2_MODE_450)
- codec2_encode_450(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_650)
+ codec2_encode_650(c2, bits, speech);
}
void CODEC2_WIN32SUPPORT codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits)
(c2->mode == CODEC2_MODE_1400) ||
(c2->mode == CODEC2_MODE_1300) ||
(c2->mode == CODEC2_MODE_1200) ||
- (c2->mode == CODEC2_MODE_450)
+ (c2->mode == CODEC2_MODE_650)
);
if (c2->mode == CODEC2_MODE_3200)
codec2_decode_1300(c2, speech, bits, ber_est);
if (c2->mode == CODEC2_MODE_1200)
codec2_decode_1200(c2, speech, bits);
- if (c2->mode == CODEC2_MODE_450)
- codec2_decode_450(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_650)
+ codec2_decode_650(c2, speech, bits);
}
/* LSPs are sampled every 20ms so we interpolate the frame in
between, then recover spectral amplitudes */
- interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5);
+ interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
for(i=0; i<2; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
/* LSPs are sampled every 20ms so we interpolate the frame in
between, then recover spectral amplitudes */
- interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5);
+ interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
for(i=0; i<2; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
- interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
- interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
Decodes frames of 52 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
-
+static int frames;
void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est)
{
MODEL model[4];
PROFILE_VAR(recover_start);
assert(c2 != NULL);
-
+ frames+= 4;
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
PROFILE_SAMPLE(recover_start);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
- interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
}
+ /*
+ for(i=0; i<4; i++) {
+ printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced);
+ }
+ if (frames == 4*50)
+ exit(0);
+ */
PROFILE_SAMPLE_AND_LOG2(recover_start, " recover");
#ifdef DUMP
dump_lsp_(&lsps[3][0]);
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
- interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
/*---------------------------------------------------------------------------*\
- FUNCTION....: codec2_encode_450
+ FUNCTION....: codec2_encode_650
AUTHOR......: David Rowe
- DATE CREATED: Oct 1 2014
+ DATE CREATED: April 2015
- Encodes 320 speech samples (40ms of speech) into 18 bits.
+ Encodes 320 speech samples (40ms of speech) into 26 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: nothing
frame 1: nothing
frame 2: nothing
- frame 3: voicing bit, scalar Wo and E, 9 bit VQ of LSPs
+ frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar
The bit allocation is:
Parameter frames 1-3 frame 4 Total
-----------------------------------------------------------
- Harmonic magnitudes (LSPs) 0 9 9
+ Harmonic magnitudes (LSPs) 0 17 17
Energy 0 3 3
log Wo 0 5 5
Voicing 0 1 1
- TOTAL 0 18 18
+ TOTAL 0 26 26
\*---------------------------------------------------------------------------*/
-void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[])
+void codec2_encode_650(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
- float lsps[LPC_ORD];
- float lsps_[LPC_ORD];
- float ak[LPC_ORD+1];
- float e;
- int lsp_indexes[LPC_ORD];
- int Wo_index, e_index;
+ float lsps[LPC_ORD_LOW];
+ float mel[LPC_ORD_LOW];
+ float ak[LPC_ORD_LOW+1];
+ float e, f;
+ int indexes[LPC_ORD_LOW];
+ int Wo_index, e_index, i;
unsigned int nbit = 0;
assert(c2 != NULL);
Wo_index = encode_log_Wo(model.Wo, 5);
pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
- e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW);
e_index = encode_energy(e, 3);
pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
- encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD);
- pack(bits, &nbit, lsp_indexes[0], lsp_pred_vq_bits(0));
-
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ f = (4000.0/PI)*lsps[i];
+ mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
+ }
+ encode_mels_scalar(indexes, mel, LPC_ORD_LOW);
+
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ pack(bits, &nbit, indexes[i], mel_bits(i));
+ }
+
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
- FUNCTION....: codec2_decode_450
+ FUNCTION....: codec2_decode_650
AUTHOR......: David Rowe
- DATE CREATED: Oct 1 2014
+ DATE CREATED: April 2015
- Decodes frames of 18 bits into 320 samples (40ms) of speech.
+ Decodes frames of 26 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
-void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+void codec2_decode_650(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
- int lsp_indexes[LPC_ORD];
- float lsps[4][LPC_ORD];
+ int indexes[LPC_ORD_LOW];
+ float mel[LPC_ORD_LOW];
+ float lsps[4][LPC_ORD_LOW];
int Wo_index, e_index;
float e[4];
- float snr;
- float ak[4][LPC_ORD+1];
+ float snr, f_;
+ float ak[4][LPC_ORD_LOW+1];
int i,j;
unsigned int nbit = 0;
float weight;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
e[3] = decode_energy(e_index, 3);
- lsp_indexes[0] = unpack(bits, &nbit, lsp_pred_vq_bits(0));
- decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD, 1);
- check_lsp_order(&lsps[3][0], LPC_ORD);
- bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ indexes[i] = unpack(bits, &nbit, mel_bits(i));
+ }
+ decode_mels_scalar(mel, indexes, LPC_ORD_LOW);
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
+ lsps[3][i] = f_*(PI/4000.0);
+ //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
+ }
+
+ check_lsp_order(&lsps[3][0], LPC_ORD_LOW);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0);
/* interpolate ------------------------------------------------*/
the 3 frames in between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
- interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
for(i=0; i<4; i++) {
- lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
- aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
- for(i=0; i<LPC_ORD; i++)
+ for(i=0; i<LPC_ORD_LOW; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}