From: drowe67 <drowe67@01035d8c-6547-0410-b346-abe4f91aad63>
Date: Mon, 7 Sep 2009 04:16:59 +0000 (+0000)
Subject: added zero and first order phase models, sounds pretty good thru loudpeaker
X-Git-Url: http://git.whiteaudio.com/gitweb/?a=commitdiff_plain;h=a42656660fe98c92daa7348ea566599ee2faf8a8;p=freetel-svn-tracking.git

added zero and first order phase models, sounds pretty good thru loudpeaker

git-svn-id: https://svn.code.sf.net/p/freetel/code@37 01035d8c-6547-0410-b346-abe4f91aad63
---

diff --git a/codec2/src/Makefile b/codec2/src/Makefile
index 0de20f0c..467adef8 100644
--- a/codec2/src/Makefile
+++ b/codec2/src/Makefile
@@ -5,7 +5,7 @@ SINENC_OBJ  = sinenc.o globals.o initenc.o four1.o refine.o spec.o dump.o
 SINEDEC_OBJ = sinedec.o globals.o initenc.o initdec.o four1.o synth.o \
               quantise.o lpc.o dump.o refine.o ../speex/lsp.o  \
               ../speex/quant_lsp.o ../speex/bits.o ../speex/lsp_tables_nb.o \
-              ../speex/high_lsp_tables.c
+              ../speex/high_lsp_tables.o phase.o
 
 all: sinenc sinedec
 
diff --git a/codec2/src/dump.c b/codec2/src/dump.c
index bd65fb1c..1b8195ed 100644
--- a/codec2/src/dump.c
+++ b/codec2/src/dump.c
@@ -39,6 +39,9 @@ static FILE *fmodel = NULL;
 static FILE *fqmodel = NULL;
 static FILE *fpw = NULL;
 static FILE *flsp = NULL;
+static FILE *fphase = NULL;
+static FILE *fphase_ = NULL;
+
 static char  prefix[MAX_STR];
 
 void dump_on(char p[]) {
@@ -61,6 +64,10 @@ void dump_off(){
 	fclose(fpw);
     if (flsp != NULL)
 	fclose(flsp);
+    if (fphase != NULL)
+	fclose(fphase);
+    if (fphase_ != NULL)
+	fclose(fphase_);
 }
 
 void dump_Sn(float Sn[]) {
@@ -166,6 +173,44 @@ void dump_quantised_model(MODEL *model) {
     fprintf(fqmodel,"\n");    
 }
 
+void dump_phase(float phase[]) {
+    int l;
+    char s[MAX_STR];
+
+    if (!dumpon) return;
+
+    if (fphase == NULL) {
+	sprintf(s,"%s_phase.txt", prefix);
+	fphase = fopen(s, "wt");
+	assert(fphase != NULL);
+    }
+
+    for(l=1; l<=model.L; l++)
+	fprintf(fphase,"%f\t",phase[l]);
+    for(l=model.L+1; l<MAX_AMP; l++)
+	fprintf(fphase,"%f\t",0.0);
+    fprintf(fphase,"\n");    
+}
+
+void dump_phase_(float phase_[]) {
+    int l;
+    char s[MAX_STR];
+
+    if (!dumpon) return;
+
+    if (fphase_ == NULL) {
+	sprintf(s,"%s_phase_.txt", prefix);
+	fphase_ = fopen(s, "wt");
+	assert(fphase_ != NULL);
+    }
+
+    for(l=1; l<=model.L; l++)
+	fprintf(fphase_,"%f\t",phase_[l]);
+    for(l=model.L+1; l<MAX_AMP; l++)
+	fprintf(fphase_,"%f\t",0.0);
+    fprintf(fphase_,"\n");    
+}
+
 void dump_Pw(COMP Pw[]) {
     int i;
     char s[MAX_STR];
diff --git a/codec2/src/dump.h b/codec2/src/dump.h
index a53a07c4..db55a0a4 100644
--- a/codec2/src/dump.h
+++ b/codec2/src/dump.h
@@ -38,5 +38,7 @@ void dump_model(MODEL *m);
 void dump_quantised_model(MODEL *m);
 void dump_Pw(COMP Pw[]);
 void dump_lsp(float lsp[]);
+void dump_phase(float phase[]);
+void dump_phase_(float phase[]);
 
 #endif
diff --git a/codec2/src/phase.c b/codec2/src/phase.c
new file mode 100644
index 00000000..a88dd2ff
--- /dev/null
+++ b/codec2/src/phase.c
@@ -0,0 +1,425 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: phase.c                                           
+  AUTHOR......: David Rowe                                             
+  DATE CREATED: 1/2/09                                                 
+                                                                             
+  Functions for modelling and synthesising phase.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "sine.h"
+#include "phase.h"
+#include "lpc.h"
+#include <assert.h>
+#include <string.h>
+
+#define VTHRESH1 3.0
+#define VTHRESH2 3.0
+
+/*---------------------------------------------------------------------------*\
+
+  aks_to_H()
+
+  Samples the complex LPC synthesis filter spectrum at the harmonic
+  frequencies.
+
+\*---------------------------------------------------------------------------*/
+
+void aks_to_H(model,aks,G,H, order)
+MODEL *model;	/* model parameters */
+float  aks[];	/* LPC's */
+float  G;	/* energy term */
+COMP   H[];	/* complex LPC spectral samples */
+int    order;
+{
+  COMP  Pw[FFT_DEC];	/* power spectrum */
+  int   i,m;		/* loop variables */
+  int   am,bm;		/* limits of current band */
+  float r;		/* no. rads/bin */
+  float Em;		/* energy in band */
+  float Am;		/* spectral amplitude sample */
+  int   b;		/* centre bin of harmonic */
+  float phi_;		/* phase of LPC spectra */
+
+  r = TWO_PI/(FFT_DEC);
+
+  /* Determine DFT of A(exp(jw)) ------------------------------------------*/
+
+  for(i=0; i<FFT_DEC; i++) {
+    Pw[i].real = 0.0;
+    Pw[i].imag = 0.0;
+  }
+
+  for(i=0; i<=order; i++)
+    Pw[i].real = aks[i];
+  four1(&Pw[-1].imag,FFT_DEC,-1);
+
+  /* Sample magnitude and phase at harmonics */
+
+  for(m=1; m<=model->L; m++) {
+    am = floor((m - 0.5)*model->Wo/r + 0.5);
+    bm = floor((m + 0.5)*model->Wo/r + 0.5);
+    b = floor(m*model->Wo/r + 0.5);
+
+    Em = 0.0;
+    for(i=am; i<bm; i++)
+      Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
+    Am = sqrt(fabs(Em/(bm-am)));
+
+    phi_ = -atan2(Pw[b].imag,Pw[b].real);
+    H[m].real = Am*cos(phi_);
+    H[m].imag = Am*sin(phi_);
+  }
+}
+
+/*---------------------------------------------------------------------------*\
+
+   phase_model_first_order()
+
+   Models the current frames phase samples {phi[m]} as a LPC synthesis
+   filter excited by an impulse at time i_min with a complex gain min_Am.
+   The phase of the complex gain min_Am is a constant phase term, and the
+   impulse position i_min creates a phase term that varies linearly
+   with frequency.  We are therefore modelling the excitation phase as
+   a 1st order polynomial function of mWo:
+
+     Ex[m] = -m*Wo*i_min + arg(min_Am);
+
+   and the harmonic phase is given by:
+  
+     phi[m] = -m*Wo*i_min + arg(min_Am) + arg[H[m]];
+
+   where H[m] is the LPC spectra sampled at the mWo harmonic frequencies.
+
+   This actually works out to be a pretty good model for voiced
+   speech.  The fit of the model (snr) is therefore used as a voicing
+   measure.  If the snr is less than a threshold, the frame is
+   declared unvoiced all all the phases are randomised.
+
+   Reference:  
+     [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 6
+
+   NOTE: min_Am is a dumb name for the complex gain as {A} or A[m] is
+   commonly used for the spectral magnitudes.  TODO: change name to match
+   thesis term G(mWo) (which of course clashes with LPC gain! AAAAAAHHH!).
+
+\*---------------------------------------------------------------------------*/
+
+float phase_model_first_order(
+  float aks[],                  /* LPC coeffs for this frame      */
+  COMP  H[],		        /* LPC filter freq doamin samples */
+  int  *i_min,                  /* pulse position for min error   */ 
+  COMP *minAm                   /* complex gain for min error     */
+) 
+{
+  float G;			/* LPC gain */
+  int i,m;
+
+  float E,Emin;			/* current and minimum error so far */
+  int P;			/* current pitch period */
+  COMP A[MAX_AMP];		/* harmonic samples */
+  COMP Ex[MAX_AMP];		/* excitation samples */
+  COMP A_[MAX_AMP];		/* synthesised harmonic samples */
+  COMP Am;    			/* complex gain */
+  COMP Em;  			/* error for m-th band */
+  float den;     		/* energy of synthesised */
+  float snr;		        /* snr of each excitation source */
+
+  /* Construct target vector */
+
+  sig = 0.0;
+  for(m=1; m<=model.L; m++) {
+    A[m].real = model.A[m]*cos(model.phi[m]);
+    A[m].imag = model.A[m]*sin(model.phi[m]);
+    sig += model.A[m]*model.A[m];
+  }
+
+  /* Sample LPC model at harmonics */
+
+  G = 1.0;
+  aks_to_H(&model,aks,G,H,PHASE_LPC_ORD);
+
+  /* Now attempt to fit impulse, by trying positions 0..P-1 */
+
+  Emin = 1E32;
+  P = floor(TWO_PI/model.Wo + 0.5);
+  for(i=0; i<P; i++) {
+
+    /* determine complex gain */
+
+    Am.real = 0.0;
+    Am.imag = 0.0;
+    den = 0.0;
+    for(m=1; m<=model.L; m++) {
+      Ex[m].real = cos(model.Wo*m*i);
+      Ex[m].imag = sin(-model.Wo*m*i);
+      A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
+      A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
+      Am.real += A[m].real*A_[m].real + A[m].imag*A_[m].imag;
+      Am.imag += A[m].imag*A_[m].real - A[m].real*A_[m].imag;
+      den += A_[m].real*A_[m].real + A_[m].imag*A_[m].imag;
+    }
+
+    Am.real /= den;
+    Am.imag /= den;
+
+    /* determine error */
+
+    E = 0.0;
+    for(m=1; m<=model.L; m++) {
+
+      Em.real = A_[m].real*Am.real - A_[m].imag*Am.imag;
+      Em.imag = A_[m].imag*Am.real + A_[m].real*Am.imag;
+	  
+      E += pow(A[m].real-Em.real,2.0) + pow(A[m].imag-Em.imag,2.0);
+    }
+
+    if (E < Emin) {
+      Emin = E;
+      *i_min = i;
+      minAm->real = Am.real;
+      minAm->imag = Am.imag;
+    }
+
+  }
+
+  snr = 10.0*log10(sig/Emin);
+
+  return snr;
+}
+
+/*---------------------------------------------------------------------------*\
+
+   phase_synth_zero_order()
+
+   Synthesises phases based on SNR and a rule based approach.  No phase 
+   parameters are required apart from the SNR (which can be reduec to a
+   1 bit V/UV decision per frame).
+
+   The phase of each harmonic is modelled as the phase of a LPC
+   synthesis filter excited by an impulse.  Unlike the first order
+   model the position of the impulse is not transmitted, so we create
+   an excitation pulse train using a rule based approach.  
+
+   Consider a pulse train with a pulse starting time n=0, with pulses
+   repeated at a rate of Wo, the fundamental frequency.  A pulse train
+   in the time domain is equivalent to a pulse train in the frequency
+   domain.  We can make an excitation pulse train using a sum of
+   sinsusoids:
+
+     for(m=1; m<=L; m++)
+       ex[n] = cos(m*Wo*n)
+
+   Note: the Octave script ../octave/phase.m is an example of this if you would
+   like to try making a pulse train.
+
+   The phase of each excitation harmonic is:
+
+     arg(E[m]) = mWo
+
+   where E[m] are the complex excitation (freq domain) samples,
+   arg(x), just returns the phase of a complex sample x.
+
+   As we don't transmit the pulse position for this model, we need to
+   synthesise it.  Now the excitation pulses occur at a rate of Wo.
+   This means the phase of the first harmonic advances by N samples
+   over a synthesis frame of N samples.  For example if Wo is pi/20
+   (200 Hz), then over a 10ms frame (N=80 samples), the phase of the
+   first harmonic would advance (pi/20)*80 = 4*pi or two complete
+   cycles.
+
+   One complication is that two adjacent frames will have different
+   Wo, so we take the average of the two frames to track the
+   excitation phase of the fundamental (first harmonic):
+
+     arg[E[1]] = ((Wo + prev_Wo)/2)*N;
+
+   We then relate the phase of the m-th excitation harmonic to the
+   phase of the fundamental as:
+
+     arg(E[m]) = marg(E[1])
+
+   This E[m] then gets passed through the LPC synthesis filter to
+   determine the final harmonic phase.
+     
+   NOTES:
+
+     1/ This synthsis model is effectvely the same as simple LPC-10
+     vocoders, and yet sounds much better.  Why?
+
+     2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
+     also from MIT) first described this zero phase model, I need to look
+     up the paper.
+
+     3/ Note that this approach could cause some discontinuities in
+     the phase at the edge of synthesis frames, as no attempt is made
+     to make sure that the phase tracks are continuous (the excitation
+     phases are continuous, but not teh final phases after filtering
+     by the LPC spectra).  Technically this is a bad thing.  However
+     this may actually be a good thing, disturbing the phase tracks a
+     bit.  More research needed, e.g. test a synthsis model that adds
+     a small delta-W to make phase tracks line up for voiced
+     harmonics.
+
+\*---------------------------------------------------------------------------*/
+
+void phase_synth_zero_order(
+  float  snr,     /* SNR from first order model                */
+  COMP   H[],     /* LPC spectra samples                       */
+  float *prev_Wo, /* last frames Wo (we will update this here) */
+  float *ex_phase /* excitation phase of fundamental           */
+)
+{
+  int   Lrand;
+  int   m;
+  float new_phi;
+  COMP  Ex[MAX_AMP];		/* excitation samples */
+  COMP  A_[MAX_AMP];		/* synthesised harmonic samples */
+
+  /* 
+     Bunch of mixed voicing thresholds tried but in the end a simple
+     voiced/unvoiced model worked best.  With mixed voicing some
+     unvoiced speech had a "clicky" sound due to occasional high SNR
+     causing the first few harmonics to be modelled as voiced. I don't
+     really understand why simple one bit V/UV sounds so good -
+     everyone else seems to think mixed voicing models are required
+     for good quality speech.
+
+     Note code below supports mixed voicing but with VTHRESH1 == VTHRESH2
+     we get a simple V/UV model.
+  */
+
+  Lrand = model.L;
+  if (snr < VTHRESH2) {
+    Lrand = floor(model.L*(snr-VTHRESH1)/(VTHRESH2-VTHRESH1));
+    if (Lrand < 1) Lrand = 1;
+    if (Lrand > model.L) Lrand = model.L;
+  }
+
+  /* update excitation fundamental phase track */
+
+  ex_phase[0] += (*prev_Wo+model.Wo)*N/2.0;
+  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+  *prev_Wo = model.Wo;
+
+  /* now modify this frames phase using zero phase model */
+
+  for(m=1; m<=model.L; m++) {
+
+    /* generate excitation */
+
+    if (m <= Lrand) {
+        Ex[m].real = cos(ex_phase[0]*m);
+	Ex[m].imag = sin(ex_phase[0]*m);
+    }
+    else {
+	/* we probably don't need to LPC filter phase in unvoiced case,
+	   maybe test this theory some time */
+	float phi = TWO_PI*(float)rand()/RAND_MAX;
+	Ex[m].real = cos(phi);
+	Ex[m].imag = sin(phi);
+    }
+
+    /* filter using LPC filter */
+
+    A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
+    A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
+
+    /* modify sinusoidal phase */
+
+    new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
+    model.phi[m] = new_phi;
+
+    /* little bit of randomness to phase - possibly makes females
+       sound slightly better, need to do some more research.  May not
+       be needed */
+
+    model.phi[m] += (m*model.Wo)*rand()/RAND_MAX;
+  }
+}
+
+/*---------------------------------------------------------------------------*\
+
+   phase_synth_first_order()
+
+   Synthesises phases based on SNR and the first oreder phase model
+   parameters.
+
+\*---------------------------------------------------------------------------*/
+
+void phase_synth_first_order(
+  float snr,     /* SNR from first order model */
+  COMP  H[],     /* LPC spectra samples        */
+  int   i_min,   /* best pulse position        */
+  COMP  minAm    /* best complex gain          */
+)
+{
+  int   Lrand;
+  int   m;
+  float new_phi;
+  COMP  Ex[MAX_AMP];		/* excitation samples */
+  COMP  A_[MAX_AMP];		/* synthesised harmonic samples */
+  COMP  Tm;  			
+
+  /* see notes in zero phase function above to V/UV model */
+
+  Lrand = model.L;
+  if (snr < VTHRESH2) {
+    Lrand = floor(model.L*(snr-VTHRESH1)/(VTHRESH2-VTHRESH1));
+    if (Lrand < 1) Lrand = 1;
+    if (Lrand > model.L) Lrand = model.L;
+  }
+
+  /* now modify sinusoidal model phase using phase model */
+
+  for(m=1; m<=model.L; m++) {
+
+    /* generate excitation */
+
+    if (m <= Lrand) {
+	Ex[m].real = cos(model.Wo*m*i_min);
+	Ex[m].imag = sin(-model.Wo*m*i_min);
+    }
+    else {
+      float phi = TWO_PI*(float)rand()/RAND_MAX;
+      Ex[m].real = cos(phi);
+      Ex[m].imag = sin(phi);
+    }
+
+    /* filter using LPC filter */
+
+    A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
+    A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
+
+    /* scale by complex gain (could have done this earlier at Ex[]
+       stage) */
+
+    Tm.real = A_[m].real*minAm.real - A_[m].imag*minAm.imag;
+    Tm.imag = A_[m].imag*minAm.real + A_[m].real*minAm.imag;
+
+    /* modify sinusoidal phase */
+
+    new_phi = atan2(Tm.imag, Tm.real+1E-12);
+    model.phi[m] = new_phi;
+  }
+}
+
diff --git a/codec2/src/phase.h b/codec2/src/phase.h
new file mode 100644
index 00000000..e692f938
--- /dev/null
+++ b/codec2/src/phase.h
@@ -0,0 +1,38 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: phase.h                                          
+  AUTHOR......: David Rowe                                             
+  DATE CREATED: 1/2/09                                                 
+                                                                             
+  Functions for modelling phase.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License version 2, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __PHASE__
+#define __PHASE__
+
+#define PHASE_LPC_ORD 10
+
+float phase_model_first_order(float aks[], COMP H[], int *i_min, COMP *min_Am);
+void phase_synth_zero_order(float snr, COMP H[], float *prev_Wo, float *ex_phase);
+void phase_synth_first_order(float snr, COMP H[], int i_min, COMP min_Am);
+
+#endif
diff --git a/codec2/src/sinedec.c b/codec2/src/sinedec.c
index 37c131a6..a01a8e8d 100644
--- a/codec2/src/sinedec.c
+++ b/codec2/src/sinedec.c
@@ -26,10 +26,13 @@
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
+#include <assert.h>
 #include <string.h>
 #include "sine.h"
 #include "quantise.h"
 #include "dump.h"
+#include "phase.h"
+#include "lpc.h"
 
 /*---------------------------------------------------------------------------*\
                                                                              
@@ -78,6 +81,9 @@ int main(int argc, char *argv[])
   int lpc_model, order;
   int lsp, lsp_quantiser;
   int dump;
+  
+  int phase, phase_model;
+  float prev_Wo, ex_phase;
 
   if (argc < 3) {
     printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
@@ -143,6 +149,13 @@ int main(int argc, char *argv[])
   if (lsp) 
       lsp_quantiser = atoi(argv[lsp+1]);
 
+  /* phase_model 0: zero phase
+     phase_model 1: 1st order polynomial */
+  phase = switch_present("--phase",argc,argv);
+  if (phase)
+      phase_model = atoi(argv[phase+1]);
+  assert((phase_model == 0) || (phase_model == 1));
+
   /* Initialise ------------------------------------------------------------*/
 
   init_decoder();
@@ -170,14 +183,44 @@ int main(int argc, char *argv[])
 
     dump_model(&model);
 
+    /* optional phase modelling */
+
+    if (phase) {
+	float Wn[AW_ENC];		/* windowed speech samples */
+	float Rk[PHASE_LPC_ORD+1];	/* autocorrelation coeffs  */
+	float aks[PHASE_LPC_ORD+1];
+        COMP  H[MAX_AMP];               /* LPC freq domain samples */
+	int   i_min;
+	COMP  min_Am;
+	
+	dump_phase(&model.phi[0]);
+
+	/* Determine LPC model using time domain LPC.  A little
+	   further down the development track optionally LPCs from lpc
+	   modelling/LSP quant for phase modelling */
+
+	for(i=0; i<AW_ENC; i++)
+	    Wn[i] = Sn[i]*w[i];
+	autocorrelate(Wn,Rk,AW_ENC,PHASE_LPC_ORD);
+	levinson_durbin(Rk,aks,PHASE_LPC_ORD);
+
+	snr = phase_model_first_order(aks, H, &i_min, &min_Am);
+	if (phase_model == 0)
+	    phase_synth_zero_order(snr, H, &prev_Wo, &ex_phase);
+	if (phase_model == 1)
+	    phase_synth_first_order(snr, H, i_min, min_Am);
+	
+        dump_phase_(&model.phi[0]);
+    }
+
     /* optional LPC model amplitudes */
 
     if (lpc_model) {
 	snr = lpc_model_amplitudes(Sn, &model, order, lsp_quantiser);
 	sum_snr += snr;
+        dump_quantised_model(&model);
     }
 
-    dump_quantised_model(&model);
 
     /* Synthesise speech */