From: drowe67 Date: Sun, 18 Oct 2009 23:07:54 +0000 (+0000) Subject: interp function working with nimor artifacts, about to clean up X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=16f791e6d5c97dad096f725b81e43aa0b6c9bbf6;p=freetel-svn-tracking.git interp function working with nimor artifacts, about to clean up git-svn-id: https://svn.code.sf.net/p/freetel/code@76 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/src/Makefile b/codec2/src/Makefile index f7a49d86..03036bc0 100644 --- a/codec2/src/Makefile +++ b/codec2/src/Makefile @@ -5,7 +5,7 @@ SINENC_OBJ = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \ quantise.o lpc.o dump.o refine.o ../speex/lsp.o \ ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \ - ../speex/high_lsp_tables.o phase.o postfilter.o + ../speex/high_lsp_tables.o phase.o postfilter.o interp.o all: sinenc sinedec diff --git a/codec2/src/interp.c b/codec2/src/interp.c new file mode 100644 index 00000000..6aad899c --- /dev/null +++ b/codec2/src/interp.c @@ -0,0 +1,104 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: interp.c + AUTHOR......: David Rowe + DATE CREATED: 9/10/09 + + Interpolation of 20ms frames to 10ms frames. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "interp.h" +#include + +/*---------------------------------------------------------------------------*\ + + interp() + + Given two frames decribed by model parameters 20ms apart, determines the + model parameters of the 10ms frame between them. Note that phases are + not interpolated, they must be set external to this function. + +\*---------------------------------------------------------------------------*/ + +void interp( + MODEL *prev, /* previous frames model params */ + MODEL *next, /* next frames model params */ + MODEL *synth, /* interp model params for cont frame */ + MODEL *a, /* prev frame extended into this frame */ + MODEL *b, /* next frame extended into this frame */ + int *transition /* non-zero if this is a transition frame, this + information is used for synthesis */ +) +{ + int m; + + if (fabs(next->Wo - prev->Wo) < 0.1*next->Wo) { + + /* If the Wo of adjacent frames is within 10% we synthesise a + continuous track through this frame by linear interpolation + of the amplitudes and Wo. This is typical of a strongly + voiced frame. + */ + + *transition = 0; + + synth->Wo = (next->Wo + prev->Wo)/2.0; + if (next->L > prev->L) + synth->L = prev->L; + else + synth->L = next->L; + for(m=1; m<=synth->L; m++) { + synth->A[m] = (prev->A[m] + next->A[m])/2.0; + } + } + else { + /* + transition frame, adjacent frames have different Wo and L + so set up two sets of model parameters based on prev and + next. We then synthesise both of them and add them + together in the time domain. + + The transition case is typical of unvoiced speech or + background noise or a voiced/unvoiced transition. + */ + + *transition = 1; + + /* a is prev extended forward into this frame, b is next + extended backward into this frame. Note the adjustments to + phase to time-shift the model forward or backward N + samples. */ + + memcpy(a, prev, sizeof(model)); + memcpy(b, next, sizeof(model)); + for(m=1; m<=a->L; m++) { + a->A[m] /= 2.0; + a->phi[m] += a->Wo*m*N; + } + for(m=1; m<=b->L; m++) { + b->A[m] /= 2.0; + b->phi[m] -= b->Wo*m*N; + } + } + +} + diff --git a/codec2/src/interp.h b/codec2/src/interp.h new file mode 100644 index 00000000..39a9175c --- /dev/null +++ b/codec2/src/interp.h @@ -0,0 +1,37 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: interp.h + AUTHOR......: David Rowe + DATE CREATED: 9/10/09 + + Interpolation of 20ms frames to 10ms frames. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __INTERP__ +#define __INTERP__ + +#include "sine.h" + +void interp(MODEL *prev, MODEL *next, MODEL *synth, MODEL *a, MODEL *b, + int *transition); + +#endif diff --git a/codec2/src/listen.sh b/codec2/src/listen.sh index 338e0712..1a1676c5 100755 --- a/codec2/src/listen.sh +++ b/codec2/src/listen.sh @@ -4,7 +4,7 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3 +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3 diff --git a/codec2/src/listen1.sh b/codec2/src/listen1.sh index cdf03ca8..e3af29ae 100755 --- a/codec2/src/listen1.sh +++ b/codec2/src/listen1.sh @@ -4,6 +4,6 @@ # # Run menu with common sample file options, headphone version -../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_speex_8k.raw $2 $3 -d /dev/dsp1 +../script/menu.sh ../raw/$1.raw $1_uq.raw $1_phase0.raw $1_lpc10.raw $1_lsp.raw $1_phase0_lpc10.raw $1_phase0_lsp.raw ../raw/$1_g729a.raw $2 $3 -d /dev/dsp1 diff --git a/codec2/src/phase.c b/codec2/src/phase.c index 8f695411..85105c55 100644 --- a/codec2/src/phase.c +++ b/codec2/src/phase.c @@ -305,10 +305,11 @@ void phase_synth_zero_order( int voiced ) { - int m; + int m,i; float new_phi; COMP Ex[MAX_AMP]; /* excitation samples */ COMP A_[MAX_AMP]; /* synthesised harmonic samples */ + float maxA; /* Update excitation fundamental phase track, this sets the position @@ -374,6 +375,26 @@ void phase_synth_zero_order( new_phi = atan2(A_[m].imag, A_[m].real+1E-12); model.phi[m] = new_phi; } + + #ifdef CLICKY + /* Adding a random component to low energy harmonic phase seems to + improve low pitch speakers. Adding a small random component to + low energy harmonic amplitudes also helps low pitch speakers after + LPC modelling (see LPC modelling/amplitude quantisation code). + */ + + maxA = 0.0; + for(i=1; i<=model.L; i++) { + if (model.A[i] > maxA) { + maxA = model.A[i]; + } + } + for(i=1; i<=model.L; i++) { + if (model.A[i] < 0.1*maxA) { + model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX; + } + } + #endif } /*---------------------------------------------------------------------------*\ diff --git a/codec2/src/quantise.c b/codec2/src/quantise.c index b9de0a1d..1f1ea094 100644 --- a/codec2/src/quantise.c +++ b/codec2/src/quantise.c @@ -36,7 +36,7 @@ #define MAX_ORDER 20 #define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */ -#define MAX_CB 10 /* max number of codebooks */ +#define MAX_CB 20 /* max number of codebooks */ /* describes each codebook */ @@ -48,6 +48,7 @@ typedef struct { /* lsp_q describes entire quantiser made up of several codebooks */ +#ifdef OLDER /* 10+10+6+6 = 32 bit LSP difference split VQ */ LSP_CB lsp_q[] = { @@ -57,6 +58,21 @@ LSP_CB lsp_q[] = { {2, 64, "../unittest/lspd910.txt"}, {0, 0, ""} }; +#endif + +LSP_CB lsp_q[] = { + {1, 16, "../unittest/lsp1.txt"}, + {1, 16, "../unittest/lsp2.txt"}, + {1, 16, "../unittest/lsp3.txt"}, + {1, 16, "../unittest/lsp4.txt"}, + {1, 16, "../unittest/lsp5.txt"}, + {1, 16, "../unittest/lsp6.txt"}, + {1, 16, "../unittest/lsp7.txt"}, + {1, 8, "../unittest/lsp8.txt"}, + {1, 8, "../unittest/lsp9.txt"}, + {1, 4, "../unittest/lsp10.txt"}, + {0, 0, ""} +}; /* ptr to each codebook */ @@ -306,6 +322,7 @@ float lpc_model_amplitudes( int i,j; float snr; float lsp[MAX_ORDER]; + float lsp_hz[MAX_ORDER]; float lsp_[MAX_ORDER]; float lspd[MAX_ORDER]; int roots; /* number of LSP roots found */ @@ -315,6 +332,8 @@ float lpc_model_amplitudes( float *cb; float wt[MAX_ORDER]; + float maxA, dB; + for(i=0; i {Am} LPC decode */ + #ifdef CLICKY + /* Adding a random component to low energy harmonic phase seems to + improve low pitch speakers. Adding a small random component to + low energy harmonic amplitudes also helps low pitch speakers after + LPC modelling (see LPC modelling/amplitude quantisation code). + */ + + maxA = 0.0; + for(i=1; i<=model->L; i++) { + if (model->A[i] > maxA) { + maxA = model->A[i]; + } + } + for(i=1; i<=model->L; i++) { + if (model->A[i] < 0.1*maxA) { + dB = 3.0 - 6.0*(float)rand()/RAND_MAX; + model->A[i] *= pow(10.0, dB/20.0); + } + } + #endif + return snr; } @@ -416,8 +495,7 @@ void aks_to_M2( for(i=0; i 2.0; + } phase_synth_zero_order(voiced, H, ex_phase, voiced); } @@ -270,7 +273,7 @@ int main(int argc, char *argv[]) if (postfilt) postfilter(&model, voiced, &bg_est); - dump_phase_(&model.phi[0]); + //dump_phase_(&model.phi[0]); } /* optional LPC model amplitudes */ @@ -281,7 +284,55 @@ int main(int argc, char *argv[]) dump_quantised_model(&model); } - #define DEC + //#define MAKE_CLICKY +#ifdef MAKE_CLICKY + { + float maxA = 0.0; + float dB; + int max_m; + + for(i=1; i<=model.L; i++) { + if (model.A[i] > maxA) { + maxA = model.A[i]; + max_m = i; + } + } + for(i=1; i<=model.L; i++) { + if (model.A[i] > 0.1*maxA) { + model.A[i] = 0.0; + } + } + + } +#endif + + + //#define REDUCE_CLICKY +#ifdef REDUCE_CLICKY + { + float maxA = 0.0; + float dB; + int max_m; + + for(i=1; i<=model.L; i++) { + if (model.A[i] > maxA) { + maxA = model.A[i]; + max_m = i; + } + } + for(i=1; i<=model.L; i++) { + if (model.A[i] < 0.1*maxA) { + model.phi[i] += 0.2*TWO_PI*(float)rand()/RAND_MAX; + dB = 3.0 - 6.0*(float)rand()/RAND_MAX; + model.A[i] *= pow(10.0, dB/20.0); + } + } + + } +#endif + + + //#define DEC #ifdef DEC /* Decimate to 20ms frame rate. In the code we only send off frames to the receiver. To simulate this on odd @@ -330,48 +381,45 @@ int main(int argc, char *argv[]) /* even frame so we need to synthesise the model parameters by interpolating between adjacent frames */ - model_synth = model_2; - voiced_synth = voiced && voiced_2; if (fabs(model_1.Wo - model_3.Wo) < 0.1*model_1.Wo) { /* If the Wo of adjacent frames is within 10% we synthesise a continuous track through this frame by linear interpolation of the amplitudes and Wo. This is typical of a strongly voiced frame. */ + transition = 0; - /* continuous track through this frame */ - #define T - #ifdef T - model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0; - if (model_1.L > model_3.L) - model_synth.L = model_3.L; - else - model_synth.L = model_1.L; - #endif - for(i=1; i<=model_synth.L; i++) { - model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0; - /* cheat on phases for now, these were constructed using - LPC model from actual speech for this frame - fix later */ - model_synth.phi[i] = model_2.phi[i]; - } - vf++; + model_synth.Wo = (model_1.Wo + model_3.Wo)/2.0; + if (model_1.L > model_3.L) + model_synth.L = model_3.L; + else + model_synth.L = model_1.L; + for(i=1; i<=model_synth.L; i++) { + model_synth.A[i] = (model_3.A[i] + model_1.A[i])/2.0; + /* cheat on phases for now, these were constructed using + LPC model from actual speech for this frame - fix later */ + model_synth.phi[i] = model_2.phi[i]; + } } else { /* transition frame, adjacent frames have different Wo and L so set up two sets of model parameters based on previous and next frame. We then synthesise both of - them and add them together in the time domain. Note - the adjustments to phase to shift the timing of the - model parameters forward or back N samples. + them and add them together in the time domain. - This case is typical of unvoiced speech or background noise - of a voiced to unvoiced transition. + This case is typical of unvoiced speech or background + noise or a voiced/unvoiced transition. */ transition = 1; + /* model_a is the previous frames extended forward into + this frame, model_b is the next frame extended backward + into this frame. Note the adjustments to phase to + time-shift the model forward or backward N samples. */ + memcpy(&model_a, &model_3, sizeof(model)); memcpy(&model_b, &model_1, sizeof(model)); for(i=1; i<=model_a.L; i++) { @@ -392,6 +440,30 @@ int main(int argc, char *argv[]) model_2 = model_1; model_1 = model; model = model_synth; +#endif + //dump_quantised_model(&model); + +#define INTERP +#ifdef INTERP + if (frames%2) { + + /* odd frames use the original model parameters */ + + model_synth = model_2; + transition = 0; + + } + else { + interp(&model_3, &model_1, &model_synth, &model_a, &model_b, &transition); + for(i=1; i<=model_synth.L; i++) + model_synth.phi[i] = model_2.phi[i]; + } + + model_3 = model_2; + model_2 = model_1; + model_1 = model; + model = model_synth; + #endif /* Synthesise speech */ @@ -420,8 +492,6 @@ int main(int argc, char *argv[]) } } - //printf("gmin = %f\n", get_gmin()); - printf("vf = %d\n", vf); if (fout != NULL) fclose(fout); diff --git a/codec2/src/synth.c b/codec2/src/synth.c index 105acbf1..04edd8bc 100644 --- a/codec2/src/synth.c +++ b/codec2/src/synth.c @@ -109,7 +109,8 @@ void synthesise_mixed( partial unvoiced sound when using zero phase model was found to be due mis-alignment of the LPC analysis window and accidental addition of a random phase component. So we are sticking with synthesise_mixed() - above for now. + above for now. I am leaving this function here for the moment as it + might be useful one day. \*---------------------------------------------------------------------------*/