SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \
quantise.o lpc.o dump.o refine.o ../speex/lsp.o \
../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \
- ../speex/high_lsp_tables.o phase.o postfilter.o
+ ../speex/high_lsp_tables.o phase.o postfilter.o interp.o
all: sinenc sinedec
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: interp.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 9/10/09
+
+ Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "interp.h"
+#include <string.h>
+
+/*---------------------------------------------------------------------------*\
+
+ interp()
+
+ Given two frames decribed by model parameters 20ms apart, determines the
+ model parameters of the 10ms frame between them. Note that phases are
+ not interpolated, they must be set external to this function.
+
+\*---------------------------------------------------------------------------*/
+
+void interp(
+ MODEL *prev, /* previous frames model params */
+ MODEL *next, /* next frames model params */
+ MODEL *synth, /* interp model params for cont frame */
+ MODEL *a, /* prev frame extended into this frame */
+ MODEL *b, /* next frame extended into this frame */
+ int *transition /* non-zero if this is a transition frame, this
+ information is used for synthesis */
+)
+{
+ int m;
+
+ if (fabs(next->Wo - prev->Wo) < 0.1*next->Wo) {
+
+ /* If the Wo of adjacent frames is within 10% we synthesise a
+ continuous track through this frame by linear interpolation
+ of the amplitudes and Wo. This is typical of a strongly
+ voiced frame.
+ */
+
+ *transition = 0;
+
+ synth->Wo = (next->Wo + prev->Wo)/2.0;
+ if (next->L > prev->L)
+ synth->L = prev->L;
+ else
+ synth->L = next->L;
+ for(m=1; m<=synth->L; m++) {
+ synth->A[m] = (prev->A[m] + next->A[m])/2.0;
+ }
+ }
+ else {
+ /*
+ transition frame, adjacent frames have different Wo and L
+ so set up two sets of model parameters based on prev and
+ next. We then synthesise both of them and add them
+ together in the time domain.
+
+ The transition case is typical of unvoiced speech or
+ background noise or a voiced/unvoiced transition.
+ */
+
+ *transition = 1;
+
+ /* a is prev extended forward into this frame, b is next
+ extended backward into this frame. Note the adjustments to
+ phase to time-shift the model forward or backward N
+ samples. */
+
+ memcpy(a, prev, sizeof(model));
+ memcpy(b, next, sizeof(model));
+ for(m=1; m<=a->L; m++) {
+ a->A[m] /= 2.0;
+ a->phi[m] += a->Wo*m*N;
+ }
+ for(m=1; m<=b->L; m++) {
+ b->A[m] /= 2.0;
+ b->phi[m] -= b->Wo*m*N;
+ }
+ }
+
+}
+
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: interp.h
+ AUTHOR......: David Rowe
+ DATE CREATED: 9/10/09
+
+ Interpolation of 20ms frames to 10ms frames.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __INTERP__
+#define __INTERP__
+
+#include "sine.h"
+
+void interp(MODEL *prev, MODEL *next, MODEL *synth, MODEL *a, MODEL *b,
+ int *transition);
+
+#endif
#
# Run menu with common sample file options, headphone version
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3
#
# Run menu with common sample file options, headphone version
-../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1
+../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3 -d /dev/dsp1
int voiced
)
{
- int m;
+ int m,i;
float new_phi;
COMP Ex[MAX_AMP]; /* excitation samples */
COMP A_[MAX_AMP]; /* synthesised harmonic samples */
+ float maxA;
/*
Update excitation fundamental phase track, this sets the position
new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
model.phi[m] = new_phi;
}
+
+ #ifdef CLICKY
+ /* Adding a random component to low energy harmonic phase seems to
+ improve low pitch speakers. Adding a small random component to
+ low energy harmonic amplitudes also helps low pitch speakers after
+ LPC modelling (see LPC modelling/amplitude quantisation code).
+ */
+
+ maxA = 0.0;
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] > maxA) {
+ maxA = model.A[i];
+ }
+ }
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] < 0.1*maxA) {
+ model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+ }
+ }
+ #endif
}
/*---------------------------------------------------------------------------*\
#define MAX_ORDER 20
#define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */
-#define MAX_CB 10 /* max number of codebooks */
+#define MAX_CB 20 /* max number of codebooks */
/* describes each codebook */
/* lsp_q describes entire quantiser made up of several codebooks */
+#ifdef OLDER
/* 10+10+6+6 = 32 bit LSP difference split VQ */
LSP_CB lsp_q[] = {
{2, 64, "../unittest/lspd910.txt"},
{0, 0, ""}
};
+#endif
+
+LSP_CB lsp_q[] = {
+ {1, 16, "../unittest/lsp1.txt"},
+ {1, 16, "../unittest/lsp2.txt"},
+ {1, 16, "../unittest/lsp3.txt"},
+ {1, 16, "../unittest/lsp4.txt"},
+ {1, 16, "../unittest/lsp5.txt"},
+ {1, 16, "../unittest/lsp6.txt"},
+ {1, 16, "../unittest/lsp7.txt"},
+ {1, 8, "../unittest/lsp8.txt"},
+ {1, 8, "../unittest/lsp9.txt"},
+ {1, 4, "../unittest/lsp10.txt"},
+ {0, 0, ""}
+};
/* ptr to each codebook */
int i,j;
float snr;
float lsp[MAX_ORDER];
+ float lsp_hz[MAX_ORDER];
float lsp_[MAX_ORDER];
float lspd[MAX_ORDER];
int roots; /* number of LSP roots found */
float *cb;
float wt[MAX_ORDER];
+ float maxA, dB;
+
for(i=0; i<M; i++)
Wn[i] = Sn[i]*w[i];
autocorrelate(Wn,R,M,order);
if (roots != order)
printf("LSP roots not found\n");
+ for(i=0; i<order; i++)
+ lsp_hz[i] = (4000.0/PI)*lsp[i];
+
+ for(i=0; i<10; i++) {
+ k = lsp_q[i].k;
+ m = lsp_q[i].m;
+ cb = plsp_cb[i];
+ index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
+ lsp_hz[i] = cb[index*k];
+ }
+
+ /*
+ for(i=0; i<order; i++)
+ lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
+ */
+
+ for(i=0; i<order; i++)
+ lsp[i] = (PI/4000.0)*lsp_hz[i];
+
+ for(i=1; i<5; i++) {
+ if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
+ lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
+ }
+
+ /* as quantiser gaps increased, larger BW expansion was required
+ to prevent twinkly noises */
+ for(i=5; i<8; i++) {
+ if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
+ lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
+ }
+ for(i=8; i<order; i++) {
+ if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
+ lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
+ }
+
+ //#define OLD_VQ
+#ifdef OLD_VQ
lspd[0] = lsp[0];
for(i=1; i<order; i++)
lspd[i] = lsp[i] - lsp[i-1];
i++;
assert(i < MAX_CB);
}
-
+#else
+ l = 0;
+#endif
/* used during development: copy remaining LSPs from orig if we haven't
quantised all of them */
for(j=l; j<order; j++)
aks_to_M2(ak,order,model,E,&snr); /* {ak} -> {Am} LPC decode */
+ #ifdef CLICKY
+ /* Adding a random component to low energy harmonic phase seems to
+ improve low pitch speakers. Adding a small random component to
+ low energy harmonic amplitudes also helps low pitch speakers after
+ LPC modelling (see LPC modelling/amplitude quantisation code).
+ */
+
+ maxA = 0.0;
+ for(i=1; i<=model->L; i++) {
+ if (model->A[i] > maxA) {
+ maxA = model->A[i];
+ }
+ }
+ for(i=1; i<=model->L; i++) {
+ if (model->A[i] < 0.1*maxA) {
+ dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+ model->A[i] *= pow(10.0, dB/20.0);
+ }
+ }
+ #endif
+
return snr;
}
for(i=0; i<FFT_DEC; i++) {
Pw[i].real = 0.0;
- Pw[i].imag = 0.0;
- }
+ Pw[i].imag = 0.0; }
for(i=0; i<=order; i++)
Pw[i].real = ak[i];
#include "lpc.h"
#include "synth.h"
#include "postfilter.h"
+#include "interp.h"
/*---------------------------------------------------------------------------*\
dump_Sn(Sn);
dft_speech(); dump_Sw(Sw);
- //dump_model(&model);
+ dump_model(&model);
/* optional phase modelling - make sure this happens before LPC
modelling of {Am} as first order model fit doesn't work well
/* just to make sure we are not cheating - kill all phases */
for(i=0; i<MAX_AMP; i++)
model.phi[i] = 0;
- if (hand_snr)
+ if (hand_snr) {
fscanf(fsnr,"%f\n",&snr);
+ voiced = snr > 2.0;
+ }
phase_synth_zero_order(voiced, H, ex_phase, voiced);
}
if (postfilt)
postfilter(&model, voiced, &bg_est);
- dump_phase_(&model.phi[0]);
+ //dump_phase_(&model.phi[0]);
}
/* optional LPC model amplitudes */
dump_quantised_model(&model);
}
- #define DEC
+ //#define MAKE_CLICKY
+#ifdef MAKE_CLICKY
+ {
+ float maxA = 0.0;
+ float dB;
+ int max_m;
+
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] > maxA) {
+ maxA = model.A[i];
+ max_m = i;
+ }
+ }
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] > 0.1*maxA) {
+ model.A[i] = 0.0;
+ }
+ }
+
+ }
+#endif
+
+
+ //#define REDUCE_CLICKY
+#ifdef REDUCE_CLICKY
+ {
+ float maxA = 0.0;
+ float dB;
+ int max_m;
+
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] > maxA) {
+ maxA = model.A[i];
+ max_m = i;
+ }
+ }
+ for(i=1; i<=model.L; i++) {
+ if (model.A[i] < 0.1*maxA) {
+ model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX;
+ dB = 3.0 - 6.0*(float)rand()/RAND_MAX;
+ model.A[i] *= pow(10.0, dB/20.0);
+ }
+ }
+
+ }
+#endif
+
+
+ //#define DEC
#ifdef DEC
/* Decimate to 20ms frame rate. In the code we only send
off frames to the receiver. To simulate this on odd
/* even frame so we need to synthesise the model parameters by
interpolating between adjacent frames */
- model_synth = model_2;
- voiced_synth = voiced && voiced_2;
if (fabs(model_1.Wo - model_3.Wo) < 0.1*model_1.Wo) {
/* If the Wo of adjacent frames is within 10% we synthesise a
continuous track through this frame by linear interpolation
of the amplitudes and Wo. This is typical of a strongly
voiced frame.
*/
+
transition = 0;
- /* continuous track through this frame */
- #define T
- #ifdef T
- model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
- if (model_1.L > model_3.L)
- model_synth.L = model_3.L;
- else
- model_synth.L = model_1.L;
- #endif
- for(i=1; i<=model_synth.L; i++) {
- model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
- /* cheat on phases for now, these were constructed using
- LPC model from actual speech for this frame - fix later */
- model_synth.phi[i] = model_2.phi[i];
- }
- vf++;
+ model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0;
+ if (model_1.L > model_3.L)
+ model_synth.L = model_3.L;
+ else
+ model_synth.L = model_1.L;
+ for(i=1; i<=model_synth.L; i++) {
+ model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0;
+ /* cheat on phases for now, these were constructed using
+ LPC model from actual speech for this frame - fix later */
+ model_synth.phi[i] = model_2.phi[i];
+ }
}
else {
/*
transition frame, adjacent frames have different Wo and
L so set up two sets of model parameters based on
previous and next frame. We then synthesise both of
- them and add them together in the time domain. Note
- the adjustments to phase to shift the timing of the
- model parameters forward or back N samples.
+ them and add them together in the time domain.
- This case is typical of unvoiced speech or background noise
- of a voiced to unvoiced transition.
+ This case is typical of unvoiced speech or background
+ noise or a voiced/unvoiced transition.
*/
transition = 1;
+ /* model_a is the previous frames extended forward into
+ this frame, model_b is the next frame extended backward
+ into this frame. Note the adjustments to phase to
+ time-shift the model forward or backward N samples. */
+
memcpy(&model_a, &model_3, sizeof(model));
memcpy(&model_b, &model_1, sizeof(model));
for(i=1; i<=model_a.L; i++) {
model_2 = model_1;
model_1 = model;
model = model_synth;
+#endif
+ //dump_quantised_model(&model);
+
+#define INTERP
+#ifdef INTERP
+ if (frames%2) {
+
+ /* odd frames use the original model parameters */
+
+ model_synth = model_2;
+ transition = 0;
+
+ }
+ else {
+ interp(&model_3, &model_1, &model_synth, &model_a, &model_b, &transition);
+ for(i=1; i<=model_synth.L; i++)
+ model_synth.phi[i] = model_2.phi[i];
+ }
+
+ model_3 = model_2;
+ model_2 = model_1;
+ model_1 = model;
+ model = model_synth;
+
#endif
/* Synthesise speech */
}
}
- //printf("gmin = %f\n", get_gmin());
- printf("vf = %d\n", vf);
if (fout != NULL)
fclose(fout);
partial unvoiced sound when using zero phase model was found to be
due mis-alignment of the LPC analysis window and accidental addition
of a random phase component. So we are sticking with synthesise_mixed()
- above for now.
+ above for now. I am leaving this function here for the moment as it
+ might be useful one day.
\*---------------------------------------------------------------------------*/