From: drowe67 Date: Fri, 11 Sep 2009 09:43:02 +0000 (+0000) Subject: progressing zero phase model with a continuous phase synth algorith, still a work... X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=8f23fb78c343eac61faf6587da3e1ff42e7e63ce;p=freetel-svn-tracking.git progressing zero phase model with a continuous phase synth algorith, still a work in progress git-svn-id: https://svn.code.sf.net/p/freetel/code@56 01035d8c-6547-0410-b346-abe4f91aad63 --- diff --git a/codec2/README.txt b/codec2/README.txt index 3dbdb084..2c75b568 100644 --- a/codec2/README.txt +++ b/codec2/README.txt @@ -19,6 +19,9 @@ link:/blog/?p=128[blog post]. Status ------ +Still in experimental/development stage - no 2400 bit/s codec +available yet. Progress to date: + 1. Unquantised encoder (sinenc) and decoder (sinedec) running under Linux/gcc, pitch estimator untested. The decoder (sinedec) is a test-bed for various modelling and quantisation options - these are @@ -31,6 +34,9 @@ Status 3. Phase model developed that uses 0 bits for phase and 1 bit/frame for voiced/unvoiced decision. +4. Non-Linear Pitch (NLP) pitch estimator working OK, could use a pitch + tracker to improve a few problem frames. + [[source]] The Source Code --------------- @@ -69,16 +75,23 @@ Development Roadmap ------------------- [X] Milestone 0 - Project kick off - [X] Milestone 1 - Baseline unquantised codec running under Linux/gcc - [ ] Milestone 3 - Prototype 2400 bit/s codec + [X] Milestone 1 - Alpha 2400 bits/s codec [X] Spectral amplitudes modelled and quantised [X] Phase and voicing model developed - [ ] Pitch estimator integrated into encoder + [ ] Pitch estimator [ ] Frame rate/quantisation schemes for 2400 bit/s developed - [ ] Refactor to develop a encoder/decoder functions + [ ] Refactor to develop a seperate encoder/decoder functions [ ] Test phone call over LAN - [ ] Fixed point port - [ ] codec2-on-a-chip embedded DSP/CPU port + [ ] Release 0.1 for Alpha Testing + [ ] Milestone 2 - Beta codec for digital radio + [ ] Gather samples from the community with different speakers, + input filtering, and background noise conditions that break + codec. + [ ] Further develop algorithm based on samples above + [ ] Design FEC scheme + [ ] Test over digital radio links + [ ] Milestone 3 - Fixed point port + [ ] Milestone 4 - codec2-on-a-chip embedded DSP/CPU port [[howitworks]] How it Works @@ -161,7 +174,7 @@ The tough bits of this project are: Can I help? ----------- -Maybe, check out the latest version of the +Maybe; check out the latest version of the http://freetel.svn.sourceforge.net/viewvc/freetel/codec2/TODO.txt?view=log[TODO] list and the development roadmap above and see if there is anything that interests you. diff --git a/codec2/TODO.txt b/codec2/TODO.txt index 99058ee9..66e62e49 100644 --- a/codec2/TODO.txt +++ b/codec2/TODO.txt @@ -2,13 +2,23 @@ TODO for codec2 --------------- [ ] Important Open Issues - [ ] Why zero phase model doesn'y work for mmt1 - [ ] Pitch errors on mmt1 + [ ] Why zero phase model doesn't work for mmt1 + [ ] residual noise on zero phase model + + "navy" on hts1a, "frog" on morig + + perhaps due to mis-alignment of phases at frame boundaries? + + or possibly pitch errors + + need a way to research this + [ ] Pitch errors on mmt1, morig + we may need a tracker + + suggest manually step through each file, check for + pitch errors, design tracker and check again + + or develop manual pitch tracks and check estimator + with tracker against these. [ ] removal of LPC modelling errors for males - + first few haromic energies get raised + + first few haromic energies (e.g. mmt1, hts1a) get raised + [ ] good quality LSP quantisation of {Am} [ ] conversion to 20ms frames - + without significnat distortion + + without significant distortion [ ] Planned tasks and half formed ideas for codec2..... [X] Convert files from DOS to Unix diff --git a/codec2/raw/mmt1_speex_8k.raw b/codec2/raw/mmt1_speex_8k.raw new file mode 100644 index 00000000..769a49cd Binary files /dev/null and b/codec2/raw/mmt1_speex_8k.raw differ diff --git a/codec2/raw/morig_speex_8k.raw b/codec2/raw/morig_speex_8k.raw new file mode 100644 index 00000000..ab667a1b Binary files /dev/null and b/codec2/raw/morig_speex_8k.raw differ diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c index 2a3310cc..bd4bef58 100644 --- a/codec2/src/sinedec.c +++ b/codec2/src/sinedec.c @@ -33,6 +33,7 @@ #include "dump.h" #include "phase.h" #include "lpc.h" +#include "synth.h" /*---------------------------------------------------------------------------*\ @@ -86,6 +87,8 @@ int main(int argc, char *argv[]) int phase, phase_model; float prev_Wo, ex_phase; + float phi_prev[MAX_AMP]; + float Wo_prev; if (argc < 3) { printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n"); @@ -148,6 +151,7 @@ int main(int argc, char *argv[]) dump_on(argv[dump+1]); lsp = switch_present("--lsp",argc,argv); + lsp_quantiser = 0; if (lsp) lsp_quantiser = atoi(argv[lsp+1]); @@ -235,7 +239,10 @@ int main(int argc, char *argv[]) if (fout != NULL) { - synthesise_mixed(Pn,&model,Sn_); + if (phase) + synthesise_continuous_phase(Pn, &model, Sn_, (snr>2.0), &Wo_prev, phi_prev); + else + synthesise_mixed(Pn,&model,Sn_); /* Save output speech to disk */ diff --git a/codec2/src/synth.c b/codec2/src/synth.c index de7df7b8..e9affb45 100644 --- a/codec2/src/synth.c +++ b/codec2/src/synth.c @@ -29,10 +29,11 @@ #include "sine.h" -void synthesise_mixed(Pn,model,Sn_) -float Pn[]; /* time domain Parzen window */ -MODEL *model; /* ptr to model parameters for this frame */ -float Sn_[]; /* time domain synthesised signal */ +void synthesise_mixed( + float Pn[], /* time domain Parzen window */ + MODEL *model, /* ptr to model parameters for this frame */ + float Sn_[] /* time domain synthesised signal */ +) { int i,l,j,b; /* loop variables */ COMP Sw_[FFT_DEC]; /* DFT of synthesised signal */ @@ -72,3 +73,106 @@ float Sn_[]; /* time domain synthesised signal */ Sn_[i] = Sw_[j].real*Pn[i]; } +/*---------------------------------------------------------------------------*\ + + synthesise_continuous_phase() + + This version adjusts the frequency of each harmonic slightly to + ensure a continuous phase track from the previous frame. Used with + the zero phase model, when original phases are not available. + + At sample n=0 of this frame, we assume the phase of harmonic m is + set to phi_prev[m]. We want the final phase at sample N to be + phi[m]. This means the phase track must start at phi_prev[m], + rotate several times based on mWo, then end up at phase phi[m]. + + To ensure that the phase track arrives at phi[m] by sample N we add + a small frequency offset by slightly shifting the frequency of each + harmonic. + + The continuous phase track model is only useful for voiced speech. + In fact, for unvoiced speech we desire a rough, discontinuous phase + track. So in unvoiced frames or in cases where the fundamental + frequency varies by more that 20%, we don't add the small frequency + offset. + +\*---------------------------------------------------------------------------*/ + +void synthesise_continuous_phase( + float Pn[], /* time domain Parzen window */ + MODEL *model, /* ptr to model parameters for this frame */ + float Sn_[], /* time domain synthesised signal */ + int voiced, /* non-zero if frame is voiced */ + float *Wo_prev, /* previous frames Wo */ + float phi_prev[] /* previous frames phases */ +) +{ + int i,l,j; /* loop variables */ + COMP Sw_[FFT_DEC]; /* DFT of synthesised signal */ + int b[MAX_AMP]; /* DFT bin of each harmonic */ + float delta_w; /* frequency offset required */ + + /* Update memories */ + + for(i=0; iWo) > 0.2*model->Wo)) { + //printf("disc voiced = %d\n", voiced); + //printf("%f %f\n", fabs(*Wo_prev - model->Wo), 0.2*model->Wo); + /* discontinous phase tracks: no phase adjustment of frequency + as we want discontinuous phase tracks */ + + for(l=1; l<=model->L; l++) + b[l] = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5); + } + else { + /* continous phase tracks: determine frequency of each harmonic + to ensure smooth phase track at the centre of next synthesis + frame */ + + for(l=1; l<=model->L; l++) { + //printf("Wo_prev = %f Wo = %f\n", *Wo_prev, model->Wo); + delta_w = (model->phi[l] - l*N*(*Wo_prev + model->Wo)/2.0 - phi_prev[l]); + delta_w -= TWO_PI*floor(delta_w/TWO_PI + 0.5); + delta_w /= N; + b[l] = floor((l*model->Wo+delta_w)*FFT_DEC/TWO_PI + 0.5); + //printf("delta_w = %f b[%d] = %d\n", delta_w,l,b[l]); + } + } + + /* update memories for phase tracking */ + + *Wo_prev = model->Wo; + for(l=1; l<=model->L; l++) + phi_prev[l] = model->phi[l]; + + /* Now set up frequency domain synthesised speech */ + + for(l=1; l<=model->L; l++) { + Sw_[b[l]].real = model->A[l]*cos(model->phi[l]); + Sw_[b[l]].imag = model->A[l]*sin(model->phi[l]); + Sw_[FFT_DEC-b[l]].real = Sw_[b[l]].real; + Sw_[FFT_DEC-b[l]].imag = -Sw_[b[l]].imag; + } + + /* Perform inverse DFT */ + + four1(&Sw_[-1].imag,FFT_DEC,1); + + /* Overlap add to previous samples */ + + for(i=0; i +#include +#include +#include +#include "sine.h" +#include "dump.h" +#include "synth.h" + +int frames; + +/*---------------------------------------------------------------------------*\ + + switch_present() + + Searches the command line arguments for a "switch". If the switch is + found, returns the command line argument where it ws found, else returns + NULL. + +\*---------------------------------------------------------------------------*/ + +int switch_present(sw,argc,argv) + char sw[]; /* switch in string form */ + int argc; /* number of command line arguments */ + char *argv[]; /* array of command line arguments in string form */ +{ + int i; /* loop variable */ + + for(i=1; i