From 05e762b7b77c680e729a18f597eedb0f4bf069bf Mon Sep 17 00:00:00 2001 From: drowe67 Date: Sat, 21 Aug 2010 07:01:40 +0000 Subject: [PATCH] part way through coding up codec2.c codec functions, need to revisit interpolation for 10ms voicing git-svn-id: https://svn.code.sf.net/p/freetel/code@180 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2/src/c2sim.c | 15 +- codec2/src/codec2.c | 260 ++++++++++++++++++++++++++++++ codec2/src/codec2.h | 41 +++++ codec2/src/defines.h | 28 ++-- codec2/src/initdec.c | 56 ------- codec2/src/sinedec.c | 367 ------------------------------------------- codec2/src/sinenc.c | 177 --------------------- 7 files changed, 322 insertions(+), 622 deletions(-) create mode 100644 codec2/src/codec2.c create mode 100644 codec2/src/codec2.h delete mode 100644 codec2/src/initdec.c delete mode 100644 codec2/src/sinedec.c delete mode 100644 codec2/src/sinenc.c diff --git a/codec2/src/c2sim.c b/codec2/src/c2sim.c index 04407bd6..31addfea 100644 --- a/codec2/src/c2sim.c +++ b/codec2/src/c2sim.c @@ -79,7 +79,7 @@ int main(int argc, char *argv[]) FILE *fin; /* input speech file */ short buf[N]; /* input/output buffer */ float Sn[M]; /* float input speech samples */ - COMP Sw[FFT_ENC]; /* DFT of w[] */ + COMP Sw[FFT_ENC]; /* DFT of Sn[] */ float w[M]; /* time domain hamming window */ COMP W[FFT_ENC]; /* DFT of w[] */ MODEL model; @@ -222,8 +222,7 @@ int main(int argc, char *argv[]) for(i=0; i +#include +#include +#include +#include + +#include "defines.h" +#include "sine.h" +#include "nlp.h" +#include "dump.h" +#include "lpc.h" +#include "quantise.h" +#include "phase.h" +#include "postfilter.h" +#include "interp.h" + +typedef struct { + float Sn[M]; /* input speech */ + float w[M]; /* time domain hamming window */ + COMP W[FFT_ENC]; /* DFT of w[] */ + float Pn[2*N]; /* trapezoidal synthesis window */ + float Sn_[2*N]; /* synthesised speech */ + float prev_Wo; /* previous frame's pitch estimate */ + float ex_phase; /* excitation model phase track */ + float bg_est; /* background noise estimate for post filter */ + MODEL *prev_model; /* model parameters from 20ms ago */ +} CODEC2; + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_create + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Create and initialise an instance of the codec. + +\*---------------------------------------------------------------------------*/ + +void *codec2_create() +{ + CODEC2 *c2; + int i,l; + + c2 = (CODEC2*)malloc(sizeof(CODEC2)); + + for(i=0; iSn[i] = 1.0; + for(i=0; i<2*N; i++) + c2->Sn_[i] = 0; + make_analysis_window(c2->w,c2->W); + make_synthesis_window(c2->Pn); + quantise_init(); + c2->prev_Wo = 0.0; + c2->bg_est = 0.0; + c2->ex_phase = 0.0; + + for(l=1; l<=MAX_AMP; l++) + c2->prev_model.A[l] = 0.0; + c2->prev_model.Wo = = TWO_PI/P_MAX; + + return (void*)c2; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_create + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Destroy an instance of the codec. + +\*---------------------------------------------------------------------------*/ + +void codec2_destroy(void *codec2_state) +{ + assert(codec2_state != NULL); + free(codec2_state); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_encode + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Encodes 160 speech samples (20ms of speech) into 51 bits. + + The bits[] array is not packed, each bit is stored in the LSB of + each byte in the bits[] array. + + The codec2 algorithm actually operates internally on 10ms (80 + sample) frames, so we run the encoding algorithm twice. On the + first frame we just send the voicing bit. One the second frame we + send all model parameters. + + The bit allocation is: + + Parameter bits/frame + -------------------------------------- + Harmonic magnitudes (LSPs) 36 + Low frequency LPC correction 1 + Energy 5 + Pitch (fundamental frequnecy) 7 + Voicing (10ms update) 2 + TOTAL 51 + +\*---------------------------------------------------------------------------*/ + +void codec2_encode(void *codec2_state, char bits[], short speech[]) +{ + CODEC2 *c2; + COMP Sw[FFT_ENC]; + COMP Sw_[FFT_ENC]; + MODEL model; + float pitch; + int voiced1, voiced2; + int i, nbits; + + assert(codec2_state != NULL); + c2 = (CODEC2*)codec2_state; + + /* First Frame - just send voicing ----------------------------------*/ + + /* Read input speech */ + + for(i=0; iSn[i] = c2->Sn[i+N]; + for(i=0; iSn[i+M-N] = speech[i]; + dft_speech(Sw, c2->Sn, c2->w); + + /* Estimate pitch */ + + nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); + prev_Wo = TWO_PI/pitch; + model.Wo = TWO_PI/pitch; + + /* estimate model parameters */ + + dft_speech(Sw, c2->Sn, c2->w); + two_stage_pitch_refinement(&model, Sw); + estimate_amplitudes(&model, Sw, c2->W); + est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_, &voiced1); + + /* Second Frame - send all parameters --------------------------------*/ + + /* Read input speech */ + + for(i=0; iSn[i] = c2->Sn[i+N]; + for(i=0; iSn[i+M-N] = speech[i+N]; + dft_speech(Sw, c2->Sn, c2->w); + + /* Estimate pitch */ + + nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo); + prev_Wo = TWO_PI/pitch; + model.Wo = TWO_PI/pitch; + + /* estimate model parameters */ + + dft_speech(Sw, c2->Sn, c2->w); + two_stage_pitch_refinement(&model, Sw); + estimate_amplitudes(&model, Sw, c2->W); + est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_, &voiced2); + + /* quantise */ + + nbits = 0; + encode_pitch(bits, &nbits, model.Wo); + encode_voicing(bits, &nbits, voiced1, voiced2); + encode_amplitudes(bits, &nbits, c2->Sn, c2->w); + assert(nbits == CODEC2_BITS_PER_FRAME); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: codec2_decode + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Decodes frames of 51 bits into 160 samples (20ms) of speech. + +\*---------------------------------------------------------------------------*/ + +void codec2_decode(float Sn_[], char bits[]) +{ + CODEC2 *c2; + MODEL model; + float ak[LPC_ORD+1]; + int voiced1, voiced2; + int i, nbits, transition; + MODEL model_fwd, model_back, model_interp; + + assert(codec2_state != NULL); + c2 = (CODEC2*)codec2_state; + + nbits = 0; + model.Wo = decode_pitch(bits, &nbits); + decode_voicing(&voiced1, &voiced2, bits, &nbits); + decode_amplitudes(&model, ak, bits, &nbits); + assert(nbits == CODEC2_BITS_PER_FRAME); + + /* First synthesis frame - interpolated from adjacent frames */ + + interp(c2->prev_model, &model, &model_interp, &model_fwd, &model_back, &transition); + phase_synth_zero_order(&model, ak, voiced1, &c2->ex_phase); + + postfilter(&model, voiced, &c2->bg_est); + if (transition) { + synthesise(Sn_,&model_a,Pn,1); + synthesise(Sn_,&model_b,Pn,0); + } + else { + synthesise(Sn_,&model,Pn,1); + } + + /* Save output speech to disk */ + + for(i=0; i 32767.0) + buf[i] = 32767; + else if (Sn_[i] < -32767.0) + buf[i] = -32767; + else + buf[i] = Sn_[i]; + } + +} + diff --git a/codec2/src/codec2.h b/codec2/src/codec2.h new file mode 100644 index 00000000..bca92f52 --- /dev/null +++ b/codec2/src/codec2.h @@ -0,0 +1,41 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: codec2.h + AUTHOR......: David Rowe + DATE CREATED: 21/8/2010 + + Codec2 fully quantised encoder and decoder functions. If you want use + codec2, these are the functions you need to call. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2010 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __CODEC2__ +#define __CODEC2__ + +#define CODEC2_SAMPLES_PER_FRAME 160 +#define CODEC2_BITS_PER_FRAME 51 + +void *codec2_create(); +void codec2_destroy(void *codec2_state); +void codec2_encode(void *codec2_state, char bits[], short speech[]); +void codec2_decode(float Sn_[], char bits[]); + +#endif diff --git a/codec2/src/defines.h b/codec2/src/defines.h index c963ba4a..393bcd0d 100644 --- a/codec2/src/defines.h +++ b/codec2/src/defines.h @@ -37,20 +37,20 @@ /* General defines */ -#define N 80 /* number of samples per frame */ -#define MAX_AMP 80 /* maximum number of harmonics */ -#define PI 3.141592654 /* mathematical constant */ -#define TWO_PI 6.283185307 /* mathematical constant */ -#define FS 8000 /* sample rate in Hz */ -#define MAX_STR 256 /* maximum string size */ - -#define NW 279 /* analysis window size */ -#define FFT_ENC 512 /* size of FFT used for encoder */ -#define FFT_DEC 512 /* size of FFT used in decoder */ -#define TW 40 /* Trapezoidal synthesis window overlap */ -#define V_THRESH 4.0 /* voicing threshold in dB */ -#define LPC_MAX 20 /* maximum LPC order */ -#define PHASE_LPC 10 /* maximum LPC order */ +#define N 80 /* number of samples per frame */ +#define MAX_AMP 80 /* maximum number of harmonics */ +#define PI 3.141592654 /* mathematical constant */ +#define TWO_PI 6.283185307 /* mathematical constant */ +#define FS 8000 /* sample rate in Hz */ +#define MAX_STR 256 /* maximum string size */ + +#define NW 279 /* analysis window size */ +#define FFT_ENC 512 /* size of FFT used for encoder */ +#define FFT_DEC 512 /* size of FFT used in decoder */ +#define TW 40 /* Trapezoidal synthesis window overlap */ +#define V_THRESH 4.0 /* voicing threshold in dB */ +#define LPC_MAX 20 /* maximum LPC order */ +#define LPC_ORD 10 /* phase modelling LPC order */ /* Pitch estimation defines */ diff --git a/codec2/src/initdec.c b/codec2/src/initdec.c deleted file mode 100644 index 067dfe79..00000000 --- a/codec2/src/initdec.c +++ /dev/null @@ -1,56 +0,0 @@ -/*---------------------------------------------------------------------------*\ - - FILE........: initdec.c - AUTHOR......: David Rowe - DATE CREATED: 11/5/94 - - Initialises sinusoidal speech decoder globals. - -\*---------------------------------------------------------------------------*/ - -/* - Copyright (C) 2009 David Rowe - - All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2, as - published by the Free Software Foundation. This program is - distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include "sine.h" /* sinusoidal header file */ - -void init_decoder() { - int i; - float win; - - /* Generate Parzen window in time domain */ - - win = 0.0; - for(i=0; i -#include -#include -#include -#include - -#include "defines.h" -#include "sine.h" -#include "dump.h" -#include "lpc.h" -#include "quantise.h" -#include "phase.h" -#include "postfilter.h" -#include "interp.h" - -/*---------------------------------------------------------------------------*\ - - switch_present() - - Searches the command line arguments for a "switch". If the switch is - found, returns the command line argument where it ws found, else returns - NULL. - -\*---------------------------------------------------------------------------*/ - -int switch_present(sw,argc,argv) -register char sw[]; /* switch in string form */ -register int argc; /* number of command line arguments */ -register char *argv[]; /* array of command line arguments in string form */ -{ - register int i; /* loop variable */ - - for(i=1; i 20)) { - printf("Error in lpc order: %d\n", order); - exit(1); - } - } - - dump = switch_present("--dump",argc,argv); - if (dump) - dump_on(argv[dump+1]); - - lsp = switch_present("--lsp",argc,argv); - lsp_quantiser = 0; - - phase0 = switch_present("--phase0",argc,argv); - if (phase0) { - ex_phase[0] = 0; - } - - hand_snr = switch_present("--hand_snr",argc,argv); - if (hand_snr) { - fsnr = fopen(argv[hand_snr+1],"rt"); - assert(fsnr != NULL); - } - - bg_est = 0.0; - postfilt = switch_present("--postfilter",argc,argv); - - decimate = switch_present("--dec",argc,argv); - transition = 0; - - /* Initialise ------------------------------------------------------------*/ - - make_analysis_window(w,W); - make_synthesis_window(Pn); - quantise_init(); - - /* Main loop ------------------------------------------------------------*/ - - frames = 0; - sum_snr = 0; - while(fread(&model,sizeof(model),1,fmodel)) { - frames++; - - /* Read input speech */ - - fread(buf,sizeof(short),N,fin); - for(i=0; i 2.0; - } - phase_synth_zero_order(&model, ak_phase, voiced, ex_phase); - - if (postfilt) - postfilter(&model, voiced, &bg_est); - } - - /* optional LPC model amplitudes */ - - if (lpc_model) { - snr = lpc_model_amplitudes(Sn, w, &model, order, lsp, ak); - sum_snr += snr; - dump_quantised_model(&model); - } - - /* option decimation to 20ms rate, which enables interpolation - routine to synthesise in between frame */ - - if (decimate) { - if (frames%2) { - - /* odd frames use the original model parameters */ - - model_synth = model_2; - transition = 0; - } - else { - interp(&model_3, &model_1, &model_synth, &model_a, &model_b, - &transition); - - /* phase need to be supplied outside of this routine, e.g. via - a phase model */ - - for(i=1; i<=model_synth.L; i++) - model_synth.phi[i] = model_2.phi[i]; - } - - model_3 = model_2; - model_2 = model_1; - model_1 = model; - model = model_synth; - } - - /* - Simulate Wo quantisation noise - model.Wo += 2.0*(PI/8000)*(1.0 - 2.0*(float)rand()/RAND_MAX); - if (model.Wo > TWO_PI/20.0) model.Wo = TWO_PI/20.0; - if (model.Wo < TWO_PI/160.0) model.Wo = TWO_PI/160.0; - model.L = floor(PI/model.Wo); - */ - - /* Synthesise speech */ - - if (fout != NULL) { - - if (transition) { - synthesise(Sn_,&model_a,Pn,1); - synthesise(Sn_,&model_b,Pn,0); - } - else { - synthesise(Sn_,&model,Pn,1); - } - - /* Save output speech to disk */ - - for(i=0; i 32767.0) - buf[i] = 32767; - else if (Sn_[i] < -32767.0) - buf[i] = -32767; - else - buf[i] = Sn_[i]; - } - fwrite(buf,sizeof(short),N,fout); - } - } - - if (fout != NULL) - fclose(fout); - - if (lpc_model) - printf("SNR av = %5.2f dB\n", sum_snr/frames); - - if (dump) - dump_off(); - - if (hand_snr) - fclose(fsnr); - - return 0; -} - diff --git a/codec2/src/sinenc.c b/codec2/src/sinenc.c deleted file mode 100644 index 5f75be3c..00000000 --- a/codec2/src/sinenc.c +++ /dev/null @@ -1,177 +0,0 @@ -/*---------------------------------------------------------------------------*\ - - FILE........: sinenc.c - AUTHOR......: David Rowe - DATE CREATED: 20/2/95 - - Sinusoidal speech encoder program using external (Matlab) pitch estimator. - -\*---------------------------------------------------------------------------*/ - -/* - Copyright (C) 2009 David Rowe - - All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2, as - published by the Free Software Foundation. This program is - distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include "defines.h" -#include "dump.h" -#include "sine.h" - -/*---------------------------------------------------------------------------*\ - - switch_present() - - Searches the command line arguments for a "switch". If the switch is - found, returns the command line argument where it ws found, else returns - NULL. - -\*---------------------------------------------------------------------------*/ - -int switch_present(sw,argc,argv) - char sw[]; /* switch in string form */ - int argc; /* number of command line arguments */ - char *argv[]; /* array of command line arguments in string form */ -{ - int i; /* loop variable */ - - for(i=1; i 2) - fprintf(fref,"%f\n",model.Wo); - fwrite(&model,sizeof(model),1,fmodel); - printf("frame: %d\r",frames); - } - - /* close files and exit */ - - if (fref != NULL) fclose(fref); - fclose(fin); - fclose(fmodel); - - if (dump) - dump_off(); - - return 0; -} - -- 2.25.1