# Run steps to encode a speech sample
../unittest/tnlp ../raw/$1.raw ../unittest/$1_nlp.p
-../src/sinenc ../raw/$1.raw %1.mdl 300 ../unittest/$1_nlp.p
-../src/sinedec ../raw/$1.raw %1.mdl -o $1_uq.raw
-../src/sinedec ../raw/$1.raw %1.mdl --phase 0 -o $1_phase0.raw --postfilter
-../src/sinedec ../raw/$1.raw %1.mdl --lpc 10 -o $1_lpc10.raw
+../src/sinenc ../raw/$1.raw $1.mdl 300 ../unittest/$1_nlp.p
+../src/sinedec ../raw/$1.raw $1.mdl -o $1_uq.raw
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 -o $1_phase0.raw --postfilter
+../src/sinedec ../raw/$1.raw $1.mdl --lpc 10 -o $1_lpc10.raw
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 -o $1_phase0_lpc10.raw --postfilter
+../src/sinedec ../raw/$1.raw $1.mdl --phase 0 --lpc 10 --lsp 37 -o $1_lsp.raw --postfilter
#
# Run menu with common sample file options, headphone version
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3
+
#
# Run menu with common sample file options, headphone version
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw ../raw/$1_speex_8k.raw -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_phase0_lpc10.raw $1_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1
if (Lrand > model.L) Lrand = model.L;
}
- /* update excitation fundamental phase track */
+ /* update excitation fundamental phase track, this sets
+ the position of each pitch pulse during voiced speech */
ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0;
ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+
+ /* After much experimentation I found that a few percent of jitter
+ was effective in reducing "clicky" artifact in hts1 and mmt1. The
+ peaks level of the synthesised speech was reduced to levels closer
+ to the orginal speech as well.*/
+
+ ex_phase[0] += 0.05*TWO_PI*(0.5 - (float)rand()/RAND_MAX);
+
*prev_Wo = model.Wo;
/* now modify this frames phase using zero phase model */
/* generate excitation */
if (m <= Lrand) {
- Ex[m].real = cos(ex_phase[0]*m);
+ Ex[m].real = cos(ex_phase[0]*m);
Ex[m].imag = sin(ex_phase[0]*m);
- /* following is an experiment in dispersing pulse energy over time,
- didn't really change sound at all, e.g. mmt1 still sounded
- "clicky"*/
+ /* following is an experiment in dispersing pulse energy over
+ time, didn't really change sound at all, e.g. mmt1 still
+ sounded "clicky. I think this is because this provides
+ just a small phase shift between adjacent harmonics.
+ However for voiced speech it is the high energy harmonics
+ that form pitch pulses, so we need a relatively high phase
+ shift between them to disperse pulse energy */
+
//Ex[m].real = cos(ex_phase[0]*m + model.Wo*m*m*0.3);
//Ex[m].imag = sin(ex_phase[0]*m + model.Wo*m*m*0.3);
(see octave/glottal.m) in an attempt io make mmt1 and hts1 a little
less "clicky", i.e. disperse the pusle energy away from the point
of onset. Result was no difference in speech quality, in fact
- no difference at all. Could be an implementation error I guess. */
+ no difference at all. Could be an implementation error I guess.
+ One again - this model doesnt change phases much between adjacent
+ harmonics, so not much dispersion. */
//b = floor(m*model->Wo*FFT_DEC/TWO_PI + 0.5);
//Ex[m].real = cos(ex_phase[0]*m + glottal[b]);
//Ex[m].imag = sin(ex_phase[0]*m + glottal[b]);
#define MAX_ORDER 20
#define LPC_FLOOR 0.0002 /* autocorrelation floor */
-#define LSP_DELTA1 0.2 /* grid spacing for LSP root searches */
+#define LSP_DELTA1 0.05 /* grid spacing for LSP root searches */
/* Speex lag window */
E += ak[i]*R[i];
if (lsp_quantisation) {
- roots = lpc_to_lsp(&ak[1], order, lsp, 10, LSP_DELTA1, NULL);
-
+ roots = lpc_to_lsp(&ak[1], order, lsp, 5, LSP_DELTA1, NULL);
+ if (roots != order)
+ printf("LSP roots not found\n");
index = quantise(cb12, &lsp[0], LSP_12_K, LSP_12_M, &se);
lsp[0] = cb12[index*LSP_12_K+0];
lsp[1] = cb12[index*LSP_12_K+1];
int lpc_model, order;
int lsp, lsp_quantiser;
float ak[LPC_MAX_ORDER+1];
-
+
int dump;
int phase, phase_model;
int postfilt;
float bg_est;
+
if (argc < 3) {
printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
printf(" [--dump DumpFilePrefix]\n");
if (phase) {
phase_model = atoi(argv[phase+1]);
assert((phase_model == 0) || (phase_model == 1));
+ ex_phase = 0;
}
bg_est = 0.0;
dump_snr(snr);
if (phase_model == 0) {
/* just to make sure we are not cheating - kill all phases */
- //for(i=0; i<MAX_AMP; i++)
- // model.phi[i] = 0;
+ for(i=0; i<MAX_AMP; i++)
+ model.phi[i] = 0;
phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
}