Status
------
+Still in experimental/development stage - no 2400 bit/s codec
+available yet. Progress to date:
+
1. Unquantised encoder (sinenc) and decoder (sinedec) running under
Linux/gcc, pitch estimator untested. The decoder (sinedec) is a
test-bed for various modelling and quantisation options - these are
3. Phase model developed that uses 0 bits for phase and 1 bit/frame
for voiced/unvoiced decision.
+4. Non-Linear Pitch (NLP) pitch estimator working OK, could use a pitch
+ tracker to improve a few problem frames.
+
[[source]]
The Source Code
---------------
-------------------
[X] Milestone 0 - Project kick off
- [X] Milestone 1 - Baseline unquantised codec running under Linux/gcc
- [ ] Milestone 3 - Prototype 2400 bit/s codec
+ [X] Milestone 1 - Alpha 2400 bits/s codec
[X] Spectral amplitudes modelled and quantised
[X] Phase and voicing model developed
- [ ] Pitch estimator integrated into encoder
+ [ ] Pitch estimator
[ ] Frame rate/quantisation schemes for 2400 bit/s developed
- [ ] Refactor to develop a encoder/decoder functions
+ [ ] Refactor to develop a seperate encoder/decoder functions
[ ] Test phone call over LAN
- [ ] Fixed point port
- [ ] codec2-on-a-chip embedded DSP/CPU port
+ [ ] Release 0.1 for Alpha Testing
+ [ ] Milestone 2 - Beta codec for digital radio
+ [ ] Gather samples from the community with different speakers,
+ input filtering, and background noise conditions that break
+ codec.
+ [ ] Further develop algorithm based on samples above
+ [ ] Design FEC scheme
+ [ ] Test over digital radio links
+ [ ] Milestone 3 - Fixed point port
+ [ ] Milestone 4 - codec2-on-a-chip embedded DSP/CPU port
[[howitworks]]
How it Works
Can I help?
-----------
-Maybe, check out the latest version of the
+Maybe; check out the latest version of the
http://freetel.svn.sourceforge.net/viewvc/freetel/codec2/TODO.txt?view=log[TODO]
list and the development roadmap above and see if there is anything
that interests you.
---------------
[ ] Important Open Issues
- [ ] Why zero phase model doesn'y work for mmt1
- [ ] Pitch errors on mmt1
+ [ ] Why zero phase model doesn't work for mmt1
+ [ ] residual noise on zero phase model
+ + "navy" on hts1a, "frog" on morig
+ + perhaps due to mis-alignment of phases at frame boundaries?
+ + or possibly pitch errors
+ + need a way to research this
+ [ ] Pitch errors on mmt1, morig
+ we may need a tracker
+ + suggest manually step through each file, check for
+ pitch errors, design tracker and check again
+ + or develop manual pitch tracks and check estimator
+ with tracker against these.
[ ] removal of LPC modelling errors for males
- + first few haromic energies get raised
+ + first few haromic energies (e.g. mmt1, hts1a) get raised
+ [ ] good quality LSP quantisation of {Am}
[ ] conversion to 20ms frames
- + without significnat distortion
+ + without significant distortion
[ ] Planned tasks and half formed ideas for codec2.....
[X] Convert files from DOS to Unix
#include "dump.h"
#include "phase.h"
#include "lpc.h"
+#include "synth.h"
/*---------------------------------------------------------------------------*\
int phase, phase_model;
float prev_Wo, ex_phase;
+ float phi_prev[MAX_AMP];
+ float Wo_prev;
if (argc < 3) {
printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
dump_on(argv[dump+1]);
lsp = switch_present("--lsp",argc,argv);
+ lsp_quantiser = 0;
if (lsp)
lsp_quantiser = atoi(argv[lsp+1]);
if (fout != NULL) {
- synthesise_mixed(Pn,&model,Sn_);
+ if (phase)
+ synthesise_continuous_phase(Pn, &model, Sn_, (snr>2.0), &Wo_prev, phi_prev);
+ else
+ synthesise_mixed(Pn,&model,Sn_);
/* Save output speech to disk */
#include "sine.h"
-void synthesise_mixed(Pn,model,Sn_)
-float Pn[]; /* time domain Parzen window */
-MODEL *model; /* ptr to model parameters for this frame */
-float Sn_[]; /* time domain synthesised signal */
+void synthesise_mixed(
+ float Pn[], /* time domain Parzen window */
+ MODEL *model, /* ptr to model parameters for this frame */
+ float Sn_[] /* time domain synthesised signal */
+)
{
int i,l,j,b; /* loop variables */
COMP Sw_[FFT_DEC]; /* DFT of synthesised signal */
Sn_[i] = Sw_[j].real*Pn[i];
}
+/*---------------------------------------------------------------------------*\
+
+ synthesise_continuous_phase()
+
+ This version adjusts the frequency of each harmonic slightly to
+ ensure a continuous phase track from the previous frame. Used with
+ the zero phase model, when original phases are not available.
+
+ At sample n=0 of this frame, we assume the phase of harmonic m is
+ set to phi_prev[m]. We want the final phase at sample N to be
+ phi[m]. This means the phase track must start at phi_prev[m],
+ rotate several times based on mWo, then end up at phase phi[m].
+
+ To ensure that the phase track arrives at phi[m] by sample N we add
+ a small frequency offset by slightly shifting the frequency of each
+ harmonic.
+
+ The continuous phase track model is only useful for voiced speech.
+ In fact, for unvoiced speech we desire a rough, discontinuous phase
+ track. So in unvoiced frames or in cases where the fundamental
+ frequency varies by more that 20%, we don't add the small frequency
+ offset.
+
+\*---------------------------------------------------------------------------*/
+
+void synthesise_continuous_phase(
+ float Pn[], /* time domain Parzen window */
+ MODEL *model, /* ptr to model parameters for this frame */
+ float Sn_[], /* time domain synthesised signal */
+ int voiced, /* non-zero if frame is voiced */
+ float *Wo_prev, /* previous frames Wo */
+ float phi_prev[] /* previous frames phases */
+)
+{
+ int i,l,j; /* loop variables */
+ COMP Sw_[FFT_DEC]; /* DFT of synthesised signal */
+ int b[MAX_AMP]; /* DFT bin of each harmonic */
+ float delta_w; /* frequency offset required */
+
+ /* Update memories */
+
+ for(i=0; i<N-1; i++) {
+ Sn_[i] = Sn_[i+N];
+ }
+ Sn_[N-1] = 0.0;
+
+ for(i=0; i<FFT_DEC; i++) {
+ Sw_[i].real = 0.0;
+ Sw_[i].imag = 0.0;
+ }
+
+ if (!voiced || (fabs(*Wo_prev - model->Wo) > 0.2*model->Wo)) {
+ //printf("disc voiced = %d\n", voiced);
+ //printf("%f %f\n", fabs(*Wo_prev - model->Wo), 0.2*model->Wo);
+ /* discontinous phase tracks: no phase adjustment of frequency
+ as we want discontinuous phase tracks */
+
+ for(l=1; l<=model->L; l++)
+ b[l] = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+ }
+ else {
+ /* continous phase tracks: determine frequency of each harmonic
+ to ensure smooth phase track at the centre of next synthesis
+ frame */
+
+ for(l=1; l<=model->L; l++) {
+ //printf("Wo_prev = %f Wo = %f\n", *Wo_prev, model->Wo);
+ delta_w = (model->phi[l] - l*N*(*Wo_prev + model->Wo)/2.0 - phi_prev[l]);
+ delta_w -= TWO_PI*floor(delta_w/TWO_PI + 0.5);
+ delta_w /= N;
+ b[l] = floor((l*model->Wo+delta_w)*FFT_DEC/TWO_PI + 0.5);
+ //printf("delta_w = %f b[%d] = %d\n", delta_w,l,b[l]);
+ }
+ }
+
+ /* update memories for phase tracking */
+
+ *Wo_prev = model->Wo;
+ for(l=1; l<=model->L; l++)
+ phi_prev[l] = model->phi[l];
+
+ /* Now set up frequency domain synthesised speech */
+
+ for(l=1; l<=model->L; l++) {
+ Sw_[b[l]].real = model->A[l]*cos(model->phi[l]);
+ Sw_[b[l]].imag = model->A[l]*sin(model->phi[l]);
+ Sw_[FFT_DEC-b[l]].real = Sw_[b[l]].real;
+ Sw_[FFT_DEC-b[l]].imag = -Sw_[b[l]].imag;
+ }
+
+ /* Perform inverse DFT */
+
+ four1(&Sw_[-1].imag,FFT_DEC,1);
+
+ /* Overlap add to previous samples */
+
+ for(i=0; i<N-1; i++) {
+ Sn_[i] += Sw_[FFT_DEC-N+1+i].real*Pn[i];
+ }
+ for(i=N-1,j=0; i<2*N; i++,j++)
+ Sn_[i] = Sw_[j].real*Pn[i];
+}
+
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: synth.h
+ AUTHOR......: David Rowe
+ DATE CREATED: 11/9/09
+
+ Function for synthesising a speech signal in the frequency domain from
+ the sinusodal model parameters.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __SYNTH__
+#define __SYNTH__
+
+#include "sine.h"
+
+void synthesise_mixed(float Pn[], MODEL *model, float Sn_[]);
+void synthesise_continuous_phase(float Pn[], MODEL *model, float Sn_[],
+ int voiced, float *Wo_prev, float phi_prev[]);
+#endif
CFLAGS = -I. -I../src -I../speex -Wall -g -DFLOATING_POINT -DVAR_ARRAYS
-all: genres lsptest genlsp extract vqtrain tnlp
+all: genres lsptest genlsp extract vqtrain tnlp tcontphase
genres: genres.o ../src/lpc.o
gcc $(CFLAGS) -o genres genres.o ../src/lpc.o -lm
TNLP_OBJ = tnlp.o ../src/nlp.o ../src/four1.o ../src/initenc.o ../src/dump.o \
../src/globals.o ../src/refine.o
+TCONTPHASE_OBJ = tcontphase.o ../src/globals.o ../src/dump.o ../src/synth.o \
+ ../src/four1.c ../src/initdec.o
+
lsptest: $(LSP_TEST_OBJ)
gcc $(CFLAGS) -o lsptest $(LSP_TEST_OBJ) -lm
tnlp: $(TNLP_OBJ)
gcc $(CFLAGS) -o tnlp $(TNLP_OBJ) -lm
+tcontphase: $(TCONTPHASE_OBJ)
+ gcc $(CFLAGS) -o tcontphase $(TCONTPHASE_OBJ) -lm
+
%.o : %.c
$(CC) -c $(CFLAGS) $< -o $@
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: tcontphase.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 11/9/09
+
+ Test program for developing continuous phase track synthesis algorithms.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#define N 80 /* frame size */
+#define F 160 /* frames to synthesis */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "sine.h"
+#include "dump.h"
+#include "synth.h"
+
+int frames;
+
+/*---------------------------------------------------------------------------*\
+
+ switch_present()
+
+ Searches the command line arguments for a "switch". If the switch is
+ found, returns the command line argument where it ws found, else returns
+ NULL.
+
+\*---------------------------------------------------------------------------*/
+
+int switch_present(sw,argc,argv)
+ char sw[]; /* switch in string form */
+ int argc; /* number of command line arguments */
+ char *argv[]; /* array of command line arguments in string form */
+{
+ int i; /* loop variable */
+
+ for(i=1; i<argc; i++)
+ if (!strcmp(sw,argv[i]))
+ return(i);
+
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*\
+
+ MAIN
+
+\*---------------------------------------------------------------------------*/
+
+int main(argc,argv)
+int argc;
+char *argv[];
+{
+ FILE *fout;
+ short buf[N];
+ int i,j;
+ int dump;
+ float phi_prev[MAX_AMP];
+ float Wo_prev;
+
+ if (argc < 2) {
+ printf("\nusage: tcontphase OutputRawSpeechFile\n");
+ exit(0);
+ }
+
+ /* Output file */
+
+ if ((fout = fopen(argv[1],"wb")) == NULL) {
+ printf("Error opening output speech file: %s\n",argv[1]);
+ exit(1);
+ }
+
+ dump = switch_present("--dump",argc,argv);
+ if (dump)
+ dump_on(argv[dump+1]);
+
+ init_decoder();
+
+ for(i=0; i<MAX_AMP; i++)
+ phi_prev[i] = 0.0;
+ Wo_prev = 0.0;
+
+ model.L = 1;
+ model.A[1] = 1000;
+ model.Wo = PI*(50.0/4000.0);
+ model.phi[1] = 0;
+
+ frames = 0;
+ for(j=0; j<F; j++) {
+ frames++;
+
+ synthesise_continuous_phase(Pn, &model, Sn_, 1, &Wo_prev, phi_prev);
+ for(i=0; i<N; i++)
+ buf[i] = Sn_[i];
+ fwrite(buf,sizeof(short),N,fout);
+ }
+
+ fclose(fout);
+ if (dump) dump_off();
+
+ return 0;
+}
+
+
init_encoder();
make_window(NW);
- /* align with current version of sinenc.c, fix this later */
-
frames = 0;
while(fread(buf,sizeof(short),N,fin)) {
frames++;
nlp(Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw);
fprintf(fout,"%f\n",pitch);
-
- printf("frame: %d pitch: %f\n",frames,pitch);
}
fclose(fin);