From: drowe67 Date: Mon, 7 Sep 2009 04:16:59 +0000 (+0000) Subject: added zero and first order phase models, sounds pretty good thru loudpeaker X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=a42656660fe98c92daa7348ea566599ee2faf8a8;p=freetel-svn-tracking.git added zero and first order phase models, sounds pretty good thru loudpeaker git-svn-id: https://svn.code.sf.net/p/freetel/code@37 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/src/Makefile b/codec2/src/Makefile index 0de20f0c..467adef8 100644 --- a/codec2/src/Makefile +++ b/codec2/src/Makefile @@ -5,7 +5,7 @@ SINENC_OBJ = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \ quantise.o lpc.o dump.o refine.o ../speex/lsp.o \ ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \ - ../speex/high_lsp_tables.c + ../speex/high_lsp_tables.o phase.o all: sinenc sinedec diff --git a/codec2/src/dump.c b/codec2/src/dump.c index bd65fb1c..1b8195ed 100644 --- a/codec2/src/dump.c +++ b/codec2/src/dump.c @@ -39,6 +39,9 @@ static FILE *fmodel = NULL; static FILE *fqmodel = NULL; static FILE *fpw = NULL; static FILE *flsp = NULL; +static FILE *fphase = NULL; +static FILE *fphase_ = NULL; + static char prefix[MAX_STR]; void dump_on(char p[]) { @@ -61,6 +64,10 @@ void dump_off(){ fclose(fpw); if (flsp != NULL) fclose(flsp); + if (fphase != NULL) + fclose(fphase); + if (fphase_ != NULL) + fclose(fphase_); } void dump_Sn(float Sn[]) { @@ -166,6 +173,44 @@ void dump_quantised_model(MODEL *model) { fprintf(fqmodel,"\n"); } +void dump_phase(float phase[]) { + int l; + char s[MAX_STR]; + + if (!dumpon) return; + + if (fphase == NULL) { + sprintf(s,"%s_phase.txt", prefix); + fphase = fopen(s, "wt"); + assert(fphase != NULL); + } + + for(l=1; l<=model.L; l++) + fprintf(fphase,"%f\t",phase[l]); + for(l=model.L+1; l +#include + +#define VTHRESH1 3.0 +#define VTHRESH2 3.0 + +/*---------------------------------------------------------------------------*\ + + aks_to_H() + + Samples the complex LPC synthesis filter spectrum at the harmonic + frequencies. + +\*---------------------------------------------------------------------------*/ + +void aks_to_H(model,aks,G,H, order) +MODEL *model; /* model parameters */ +float aks[]; /* LPC's */ +float G; /* energy term */ +COMP H[]; /* complex LPC spectral samples */ +int order; +{ + COMP Pw[FFT_DEC]; /* power spectrum */ + int i,m; /* loop variables */ + int am,bm; /* limits of current band */ + float r; /* no. rads/bin */ + float Em; /* energy in band */ + float Am; /* spectral amplitude sample */ + int b; /* centre bin of harmonic */ + float phi_; /* phase of LPC spectra */ + + r = TWO_PI/(FFT_DEC); + + /* Determine DFT of A(exp(jw)) ------------------------------------------*/ + + for(i=0; iL; m++) { + am = floor((m - 0.5)*model->Wo/r + 0.5); + bm = floor((m + 0.5)*model->Wo/r + 0.5); + b = floor(m*model->Wo/r + 0.5); + + Em = 0.0; + for(i=am; ireal = Am.real; + minAm->imag = Am.imag; + } + + } + + snr = 10.0*log10(sig/Emin); + + return snr; +} + +/*---------------------------------------------------------------------------*\ + + phase_synth_zero_order() + + Synthesises phases based on SNR and a rule based approach. No phase + parameters are required apart from the SNR (which can be reduec to a + 1 bit V/UV decision per frame). + + The phase of each harmonic is modelled as the phase of a LPC + synthesis filter excited by an impulse. Unlike the first order + model the position of the impulse is not transmitted, so we create + an excitation pulse train using a rule based approach. + + Consider a pulse train with a pulse starting time n=0, with pulses + repeated at a rate of Wo, the fundamental frequency. A pulse train + in the time domain is equivalent to a pulse train in the frequency + domain. We can make an excitation pulse train using a sum of + sinsusoids: + + for(m=1; m<=L; m++) + ex[n] = cos(m*Wo*n) + + Note: the Octave script ../octave/phase.m is an example of this if you would + like to try making a pulse train. + + The phase of each excitation harmonic is: + + arg(E[m]) = mWo + + where E[m] are the complex excitation (freq domain) samples, + arg(x), just returns the phase of a complex sample x. + + As we don't transmit the pulse position for this model, we need to + synthesise it. Now the excitation pulses occur at a rate of Wo. + This means the phase of the first harmonic advances by N samples + over a synthesis frame of N samples. For example if Wo is pi/20 + (200 Hz), then over a 10ms frame (N=80 samples), the phase of the + first harmonic would advance (pi/20)*80 = 4*pi or two complete + cycles. + + One complication is that two adjacent frames will have different + Wo, so we take the average of the two frames to track the + excitation phase of the fundamental (first harmonic): + + arg[E[1]] = ((Wo + prev_Wo)/2)*N; + + We then relate the phase of the m-th excitation harmonic to the + phase of the fundamental as: + + arg(E[m]) = marg(E[1]) + + This E[m] then gets passed through the LPC synthesis filter to + determine the final harmonic phase. + + NOTES: + + 1/ This synthsis model is effectvely the same as simple LPC-10 + vocoders, and yet sounds much better. Why? + + 2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE + also from MIT) first described this zero phase model, I need to look + up the paper. + + 3/ Note that this approach could cause some discontinuities in + the phase at the edge of synthesis frames, as no attempt is made + to make sure that the phase tracks are continuous (the excitation + phases are continuous, but not teh final phases after filtering + by the LPC spectra). Technically this is a bad thing. However + this may actually be a good thing, disturbing the phase tracks a + bit. More research needed, e.g. test a synthsis model that adds + a small delta-W to make phase tracks line up for voiced + harmonics. + +\*---------------------------------------------------------------------------*/ + +void phase_synth_zero_order( + float snr, /* SNR from first order model */ + COMP H[], /* LPC spectra samples */ + float *prev_Wo, /* last frames Wo (we will update this here) */ + float *ex_phase /* excitation phase of fundamental */ +) +{ + int Lrand; + int m; + float new_phi; + COMP Ex[MAX_AMP]; /* excitation samples */ + COMP A_[MAX_AMP]; /* synthesised harmonic samples */ + + /* + Bunch of mixed voicing thresholds tried but in the end a simple + voiced/unvoiced model worked best. With mixed voicing some + unvoiced speech had a "clicky" sound due to occasional high SNR + causing the first few harmonics to be modelled as voiced. I don't + really understand why simple one bit V/UV sounds so good - + everyone else seems to think mixed voicing models are required + for good quality speech. + + Note code below supports mixed voicing but with VTHRESH1 == VTHRESH2 + we get a simple V/UV model. + */ + + Lrand = model.L; + if (snr < VTHRESH2) { + Lrand = floor(model.L*(snr-VTHRESH1)/(VTHRESH2-VTHRESH1)); + if (Lrand < 1) Lrand = 1; + if (Lrand > model.L) Lrand = model.L; + } + + /* update excitation fundamental phase track */ + + ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0; + ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5); + *prev_Wo = model.Wo; + + /* now modify this frames phase using zero phase model */ + + for(m=1; m<=model.L; m++) { + + /* generate excitation */ + + if (m <= Lrand) { + Ex[m].real = cos(ex_phase[0]*m); + Ex[m].imag = sin(ex_phase[0]*m); + } + else { + /* we probably don't need to LPC filter phase in unvoiced case, + maybe test this theory some time */ + float phi = TWO_PI*(float)rand()/RAND_MAX; + Ex[m].real = cos(phi); + Ex[m].imag = sin(phi); + } + + /* filter using LPC filter */ + + A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag; + A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag; + + /* modify sinusoidal phase */ + + new_phi = atan2(A_[m].imag, A_[m].real+1E-12); + model.phi[m] = new_phi; + + /* little bit of randomness to phase - possibly makes females + sound slightly better, need to do some more research. May not + be needed */ + + model.phi[m] += (m*model.Wo)*rand()/RAND_MAX; + } +} + +/*---------------------------------------------------------------------------*\ + + phase_synth_first_order() + + Synthesises phases based on SNR and the first oreder phase model + parameters. + +\*---------------------------------------------------------------------------*/ + +void phase_synth_first_order( + float snr, /* SNR from first order model */ + COMP H[], /* LPC spectra samples */ + int i_min, /* best pulse position */ + COMP minAm /* best complex gain */ +) +{ + int Lrand; + int m; + float new_phi; + COMP Ex[MAX_AMP]; /* excitation samples */ + COMP A_[MAX_AMP]; /* synthesised harmonic samples */ + COMP Tm; + + /* see notes in zero phase function above to V/UV model */ + + Lrand = model.L; + if (snr < VTHRESH2) { + Lrand = floor(model.L*(snr-VTHRESH1)/(VTHRESH2-VTHRESH1)); + if (Lrand < 1) Lrand = 1; + if (Lrand > model.L) Lrand = model.L; + } + + /* now modify sinusoidal model phase using phase model */ + + for(m=1; m<=model.L; m++) { + + /* generate excitation */ + + if (m <= Lrand) { + Ex[m].real = cos(model.Wo*m*i_min); + Ex[m].imag = sin(-model.Wo*m*i_min); + } + else { + float phi = TWO_PI*(float)rand()/RAND_MAX; + Ex[m].real = cos(phi); + Ex[m].imag = sin(phi); + } + + /* filter using LPC filter */ + + A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag; + A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag; + + /* scale by complex gain (could have done this earlier at Ex[] + stage) */ + + Tm.real = A_[m].real*minAm.real - A_[m].imag*minAm.imag; + Tm.imag = A_[m].imag*minAm.real + A_[m].real*minAm.imag; + + /* modify sinusoidal phase */ + + new_phi = atan2(Tm.imag, Tm.real+1E-12); + model.phi[m] = new_phi; + } +} + diff --git a/codec2/src/phase.h b/codec2/src/phase.h new file mode 100644 index 00000000..e692f938 --- /dev/null +++ b/codec2/src/phase.h @@ -0,0 +1,38 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: phase.h + AUTHOR......: David Rowe + DATE CREATED: 1/2/09 + + Functions for modelling phase. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __PHASE__ +#define __PHASE__ + +#define PHASE_LPC_ORD 10 + +float phase_model_first_order(float aks[], COMP H[], int *i_min, COMP *min_Am); +void phase_synth_zero_order(float snr, COMP H[], float *prev_Wo, float *ex_phase); +void phase_synth_first_order(float snr, COMP H[], int i_min, COMP min_Am); + +#endif diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c index 37c131a6..a01a8e8d 100644 --- a/codec2/src/sinedec.c +++ b/codec2/src/sinedec.c @@ -26,10 +26,13 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include "sine.h" #include "quantise.h" #include "dump.h" +#include "phase.h" +#include "lpc.h" /*---------------------------------------------------------------------------*\ @@ -78,6 +81,9 @@ int main(int argc, char *argv[]) int lpc_model, order; int lsp, lsp_quantiser; int dump; + + int phase, phase_model; + float prev_Wo, ex_phase; if (argc < 3) { printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n"); @@ -143,6 +149,13 @@ int main(int argc, char *argv[]) if (lsp) lsp_quantiser = atoi(argv[lsp+1]); + /* phase_model 0: zero phase + phase_model 1: 1st order polynomial */ + phase = switch_present("--phase",argc,argv); + if (phase) + phase_model = atoi(argv[phase+1]); + assert((phase_model == 0) || (phase_model == 1)); + /* Initialise ------------------------------------------------------------*/ init_decoder(); @@ -170,14 +183,44 @@ int main(int argc, char *argv[]) dump_model(&model); + /* optional phase modelling */ + + if (phase) { + float Wn[AW_ENC]; /* windowed speech samples */ + float Rk[PHASE_LPC_ORD+1]; /* autocorrelation coeffs */ + float aks[PHASE_LPC_ORD+1]; + COMP H[MAX_AMP]; /* LPC freq domain samples */ + int i_min; + COMP min_Am; + + dump_phase(&model.phi[0]); + + /* Determine LPC model using time domain LPC. A little + further down the development track optionally LPCs from lpc + modelling/LSP quant for phase modelling */ + + for(i=0; i