FILE *fin; /* input speech file */
short buf[N]; /* input/output buffer */
float Sn[M]; /* float input speech samples */
- COMP Sw[FFT_ENC]; /* DFT of w[] */
+ COMP Sw[FFT_ENC]; /* DFT of Sn[] */
float w[M]; /* time domain hamming window */
COMP W[FFT_ENC]; /* DFT of w[] */
MODEL model;
for(i=0; i<N; i++)
Sn[i+M-N] = buf[i];
dump_Sn(Sn);
- dft_speech(Sw, Sn, w); dump_Sw(Sw);
-
+
/* Estimate pitch */
nlp(Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
/* estimate model parameters */
- dft_speech(Sw, Sn, w);
+ dft_speech(Sw, Sn, w); dump_Sw(Sw);
two_stage_pitch_refinement(&model, Sw);
estimate_amplitudes(&model, Sw, W);
dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
for(i=0; i<M; i++)
Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,Rk,M,PHASE_LPC);
- levinson_durbin(Rk,ak_phase,PHASE_LPC);
+ autocorrelate(Wn,Rk,M,LPC_ORD);
+ levinson_durbin(Rk,ak_phase,LPC_ORD);
if (lpc_model)
- assert(order == PHASE_LPC);
+ assert(order == LPC_ORD);
- dump_ak(ak_phase, PHASE_LPC);
+ dump_ak(ak_phase, LPC_ORD);
/* determine voicing */
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: codec2.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Codec2 fully quantised encoder and decoder functions. If you want use
+ codec2, these are the functions you need to call.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2010 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "defines.h"
+#include "sine.h"
+#include "nlp.h"
+#include "dump.h"
+#include "lpc.h"
+#include "quantise.h"
+#include "phase.h"
+#include "postfilter.h"
+#include "interp.h"
+
+typedef struct {
+ float Sn[M]; /* input speech */
+ float w[M]; /* time domain hamming window */
+ COMP W[FFT_ENC]; /* DFT of w[] */
+ float Pn[2*N]; /* trapezoidal synthesis window */
+ float Sn_[2*N]; /* synthesised speech */
+ float prev_Wo; /* previous frame's pitch estimate */
+ float ex_phase; /* excitation model phase track */
+ float bg_est; /* background noise estimate for post filter */
+ MODEL *prev_model; /* model parameters from 20ms ago */
+} CODEC2;
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_create
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Create and initialise an instance of the codec.
+
+\*---------------------------------------------------------------------------*/
+
+void *codec2_create()
+{
+ CODEC2 *c2;
+ int i,l;
+
+ c2 = (CODEC2*)malloc(sizeof(CODEC2));
+
+ for(i=0; i<M; i++)
+ c2->Sn[i] = 1.0;
+ for(i=0; i<2*N; i++)
+ c2->Sn_[i] = 0;
+ make_analysis_window(c2->w,c2->W);
+ make_synthesis_window(c2->Pn);
+ quantise_init();
+ c2->prev_Wo = 0.0;
+ c2->bg_est = 0.0;
+ c2->ex_phase = 0.0;
+
+ for(l=1; l<=MAX_AMP; l++)
+ c2->prev_model.A[l] = 0.0;
+ c2->prev_model.Wo = = TWO_PI/P_MAX;
+
+ return (void*)c2;
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_create
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Destroy an instance of the codec.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_destroy(void *codec2_state)
+{
+ assert(codec2_state != NULL);
+ free(codec2_state);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Encodes 160 speech samples (20ms of speech) into 51 bits.
+
+ The bits[] array is not packed, each bit is stored in the LSB of
+ each byte in the bits[] array.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm twice. On the
+ first frame we just send the voicing bit. One the second frame we
+ send all model parameters.
+
+ The bit allocation is:
+
+ Parameter bits/frame
+ --------------------------------------
+ Harmonic magnitudes (LSPs) 36
+ Low frequency LPC correction 1
+ Energy 5
+ Pitch (fundamental frequnecy) 7
+ Voicing (10ms update) 2
+ TOTAL 51
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode(void *codec2_state, char bits[], short speech[])
+{
+ CODEC2 *c2;
+ COMP Sw[FFT_ENC];
+ COMP Sw_[FFT_ENC];
+ MODEL model;
+ float pitch;
+ int voiced1, voiced2;
+ int i, nbits;
+
+ assert(codec2_state != NULL);
+ c2 = (CODEC2*)codec2_state;
+
+ /* First Frame - just send voicing ----------------------------------*/
+
+ /* Read input speech */
+
+ for(i=0; i<M-N; i++)
+ c2->Sn[i] = c2->Sn[i+N];
+ for(i=0; i<N; i++)
+ c2->Sn[i+M-N] = speech[i];
+ dft_speech(Sw, c2->Sn, c2->w);
+
+ /* Estimate pitch */
+
+ nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ prev_Wo = TWO_PI/pitch;
+ model.Wo = TWO_PI/pitch;
+
+ /* estimate model parameters */
+
+ dft_speech(Sw, c2->Sn, c2->w);
+ two_stage_pitch_refinement(&model, Sw);
+ estimate_amplitudes(&model, Sw, c2->W);
+ est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_, &voiced1);
+
+ /* Second Frame - send all parameters --------------------------------*/
+
+ /* Read input speech */
+
+ for(i=0; i<M-N; i++)
+ c2->Sn[i] = c2->Sn[i+N];
+ for(i=0; i<N; i++)
+ c2->Sn[i+M-N] = speech[i+N];
+ dft_speech(Sw, c2->Sn, c2->w);
+
+ /* Estimate pitch */
+
+ nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ prev_Wo = TWO_PI/pitch;
+ model.Wo = TWO_PI/pitch;
+
+ /* estimate model parameters */
+
+ dft_speech(Sw, c2->Sn, c2->w);
+ two_stage_pitch_refinement(&model, Sw);
+ estimate_amplitudes(&model, Sw, c2->W);
+ est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_, &voiced2);
+
+ /* quantise */
+
+ nbits = 0;
+ encode_pitch(bits, &nbits, model.Wo);
+ encode_voicing(bits, &nbits, voiced1, voiced2);
+ encode_amplitudes(bits, &nbits, c2->Sn, c2->w);
+ assert(nbits == CODEC2_BITS_PER_FRAME);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Decodes frames of 51 bits into 160 samples (20ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode(float Sn_[], char bits[])
+{
+ CODEC2 *c2;
+ MODEL model;
+ float ak[LPC_ORD+1];
+ int voiced1, voiced2;
+ int i, nbits, transition;
+ MODEL model_fwd, model_back, model_interp;
+
+ assert(codec2_state != NULL);
+ c2 = (CODEC2*)codec2_state;
+
+ nbits = 0;
+ model.Wo = decode_pitch(bits, &nbits);
+ decode_voicing(&voiced1, &voiced2, bits, &nbits);
+ decode_amplitudes(&model, ak, bits, &nbits);
+ assert(nbits == CODEC2_BITS_PER_FRAME);
+
+ /* First synthesis frame - interpolated from adjacent frames */
+
+ interp(c2->prev_model, &model, &model_interp, &model_fwd, &model_back, &transition);
+ phase_synth_zero_order(&model, ak, voiced1, &c2->ex_phase);
+
+ postfilter(&model, voiced, &c2->bg_est);
+ if (transition) {
+ synthesise(Sn_,&model_a,Pn,1);
+ synthesise(Sn_,&model_b,Pn,0);
+ }
+ else {
+ synthesise(Sn_,&model,Pn,1);
+ }
+
+ /* Save output speech to disk */
+
+ for(i=0; i<N; i++) {
+ if (Sn_[i] > 32767.0)
+ buf[i] = 32767;
+ else if (Sn_[i] < -32767.0)
+ buf[i] = -32767;
+ else
+ buf[i] = Sn_[i];
+ }
+
+}
+
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: codec2.h
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Codec2 fully quantised encoder and decoder functions. If you want use
+ codec2, these are the functions you need to call.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2010 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __CODEC2__
+#define __CODEC2__
+
+#define CODEC2_SAMPLES_PER_FRAME 160
+#define CODEC2_BITS_PER_FRAME 51
+
+void *codec2_create();
+void codec2_destroy(void *codec2_state);
+void codec2_encode(void *codec2_state, char bits[], short speech[]);
+void codec2_decode(float Sn_[], char bits[]);
+
+#endif
/* General defines */
-#define N 80 /* number of samples per frame */
-#define MAX_AMP 80 /* maximum number of harmonics */
-#define PI 3.141592654 /* mathematical constant */
-#define TWO_PI 6.283185307 /* mathematical constant */
-#define FS 8000 /* sample rate in Hz */
-#define MAX_STR 256 /* maximum string size */
-
-#define NW 279 /* analysis window size */
-#define FFT_ENC 512 /* size of FFT used for encoder */
-#define FFT_DEC 512 /* size of FFT used in decoder */
-#define TW 40 /* Trapezoidal synthesis window overlap */
-#define V_THRESH 4.0 /* voicing threshold in dB */
-#define LPC_MAX 20 /* maximum LPC order */
-#define PHASE_LPC 10 /* maximum LPC order */
+#define N 80 /* number of samples per frame */
+#define MAX_AMP 80 /* maximum number of harmonics */
+#define PI 3.141592654 /* mathematical constant */
+#define TWO_PI 6.283185307 /* mathematical constant */
+#define FS 8000 /* sample rate in Hz */
+#define MAX_STR 256 /* maximum string size */
+
+#define NW 279 /* analysis window size */
+#define FFT_ENC 512 /* size of FFT used for encoder */
+#define FFT_DEC 512 /* size of FFT used in decoder */
+#define TW 40 /* Trapezoidal synthesis window overlap */
+#define V_THRESH 4.0 /* voicing threshold in dB */
+#define LPC_MAX 20 /* maximum LPC order */
+#define LPC_ORD 10 /* phase modelling LPC order */
/* Pitch estimation defines */
+++ /dev/null
-/*---------------------------------------------------------------------------*\
-
- FILE........: initdec.c
- AUTHOR......: David Rowe
- DATE CREATED: 11/5/94
-
- Initialises sinusoidal speech decoder globals.
-
-\*---------------------------------------------------------------------------*/
-
-/*
- Copyright (C) 2009 David Rowe
-
- All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2, as
- published by the Free Software Foundation. This program is
- distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include "sine.h" /* sinusoidal header file */
-
-void init_decoder() {
- int i;
- float win;
-
- /* Generate Parzen window in time domain */
-
- win = 0.0;
- for(i=0; i<N/2-TW; i++)
- Pn[i] = 0.0;
- win = 0.0;
- for(i=N/2-TW; i<N/2+TW; win+=1.0/(2*TW), i++ )
- Pn[i] = win;
- for(i=N/2+TW; i<3*N/2-TW; i++)
- Pn[i] = 1.0;
- win = 1.0;
- for(i=3*N/2-TW; i<3*N/2+TW; win-=1.0/(2*TW), i++)
- Pn[i] = win;
- for(i=3*N/2+TW; i<2*N; i++)
- Pn[i] = 0.0;
-
- /* Init output buffer */
-
- for(i=0; i<AW_DEC; i++)
- Sn_[i] = 0.0;
-
-}
+++ /dev/null
-/*---------------------------------------------------------------------------*\
-
- FILE........: sinedec.c
- AUTHOR......: David Rowe
- DATE CREATED: 20/2/95
-
- Decoder program for sinudoidal codec.
-
-\*---------------------------------------------------------------------------*/
-
-/*
- Copyright (C) 2009 David Rowe
-
- All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2, as
- published by the Free Software Foundation. This program is
- distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include "defines.h"
-#include "sine.h"
-#include "dump.h"
-#include "lpc.h"
-#include "quantise.h"
-#include "phase.h"
-#include "postfilter.h"
-#include "interp.h"
-
-/*---------------------------------------------------------------------------*\
-
- switch_present()
-
- Searches the command line arguments for a "switch". If the switch is
- found, returns the command line argument where it ws found, else returns
- NULL.
-
-\*---------------------------------------------------------------------------*/
-
-int switch_present(sw,argc,argv)
-register char sw[]; /* switch in string form */
-register int argc; /* number of command line arguments */
-register char *argv[]; /* array of command line arguments in string form */
-{
- register int i; /* loop variable */
-
- for(i=1; i<argc; i++)
- if (!strcmp(sw,argv[i]))
- return(i);
-
- return 0;
-}
-
-/*---------------------------------------------------------------------------*\
-
- MAIN
-
-\*---------------------------------------------------------------------------*/
-
-int main(int argc, char *argv[])
-{
- FILE *fmodel; /* file of model parameters from encoder */
- FILE *fout; /* output speech file */
- FILE *fin; /* input speech file */
- short buf[N]; /* input/output buffer */
- float Sn[M]; /* float input speech samples */
- COMP Sw[FFT_ENC]; /* DFT of w[] */
- float w[M]; /* time domain hamming window */
- COMP W[FFT_ENC]; /* DFT of w[] */
- MODEL model;
- float Pn[2*N]; /* trapezoidal synthesis window */
- float Sn_[2*N]; /* synthesised speech */
- int i; /* loop variable */
- int frames;
- int length; /* number of frames so far */
-
- char out_file[MAX_STR];
- int arg;
- float snr;
- float sum_snr;
-
- int lpc_model, order;
- int lsp, lsp_quantiser;
- float ak[LPC_MAX+1];
-
- int dump;
-
- int phase0;
- float ex_phase[MAX_AMP+1];
- int voiced, voiced_1, voiced_2;
-
- int postfilt;
- float bg_est;
-
- int hand_snr;
- FILE *fsnr;
-
- MODEL model_1, model_2, model_3, model_synth, model_a, model_b;
- int transition, decimate;
-
- for(i=0; i<2*N; i++)
- Sn_[i] = 0;
-
- voiced_1 = voiced_2 = 0;
- model_1.Wo = TWO_PI/P_MIN;
- model_1.L = floor(PI/model_1.Wo);
- for(i=1; i<=model_1.L; i++) {
- model_1.A[i] = 0.0;
- model_1.phi[i] = 0.0;
- }
- for(i=1; i<=MAX_AMP; i++) {
- ex_phase[i] = 0.0;
- }
- model_synth = model_3 = model_2 = model_1;
-
- if (argc < 3) {
- printf("usage: sinedec InputFile ModelFile [-o OutputFile] [-o lpc Order]\n");
- printf(" [--dump DumpFilePrefix]\n");
- exit(0);
- }
-
- /* Interpret command line arguments -------------------------------------*/
-
- /* Input file */
-
- if ((fin = fopen(argv[1],"rb")) == NULL) {
- printf("Error opening input speech file: %s\n",argv[1]);
- exit(1);
- }
-
- /* Model parameter file */
-
- if ((fmodel = fopen(argv[2],"rb")) == NULL) {
- printf("Error opening model file: %s\n",argv[2]);
- exit(1);
- }
-
- /* Output file */
-
- if ((arg = switch_present("-o",argc,argv))) {
- if ((fout = fopen(argv[arg+1],"wb")) == NULL) {
- printf("Error opening output speech file: %s\n",argv[arg+1]);
- exit(1);
- }
- strcpy(out_file,argv[arg+1]);
- }
- else
- fout = NULL;
-
- /* Length (no. of frames) */
-
- if ((length = switch_present("-l",argc,argv))) {
- length = atoi(argv[length+1]);
- if (length < 0) {
- printf("Error in length: %d\n",length);
- exit(1);
- }
- }
- else
- length = 32000;
-
- lpc_model = 0;
- if ((arg = switch_present("--lpc",argc,argv))) {
- lpc_model = 1;
- order = atoi(argv[arg+1]);
- if ((order < 4) || (order > 20)) {
- printf("Error in lpc order: %d\n", order);
- exit(1);
- }
- }
-
- dump = switch_present("--dump",argc,argv);
- if (dump)
- dump_on(argv[dump+1]);
-
- lsp = switch_present("--lsp",argc,argv);
- lsp_quantiser = 0;
-
- phase0 = switch_present("--phase0",argc,argv);
- if (phase0) {
- ex_phase[0] = 0;
- }
-
- hand_snr = switch_present("--hand_snr",argc,argv);
- if (hand_snr) {
- fsnr = fopen(argv[hand_snr+1],"rt");
- assert(fsnr != NULL);
- }
-
- bg_est = 0.0;
- postfilt = switch_present("--postfilter",argc,argv);
-
- decimate = switch_present("--dec",argc,argv);
- transition = 0;
-
- /* Initialise ------------------------------------------------------------*/
-
- make_analysis_window(w,W);
- make_synthesis_window(Pn);
- quantise_init();
-
- /* Main loop ------------------------------------------------------------*/
-
- frames = 0;
- sum_snr = 0;
- while(fread(&model,sizeof(model),1,fmodel)) {
- frames++;
-
- /* Read input speech */
-
- fread(buf,sizeof(short),N,fin);
- for(i=0; i<M-N; i++)
- Sn[i] = Sn[i+N];
- for(i=0; i<N; i++)
- Sn[i+M-N] = buf[i];
- dump_Sn(Sn);
- dft_speech(Sw, Sn, w); dump_Sw(Sw);
-
- dump_model(&model);
-
- /* optional zero-phase modelling */
-
- if (phase0) {
- float Wn[M]; /* windowed speech samples */
- float ak_phase[PHASE_LPC+1]; /* autocorrelation coeffs */
- float Rk[PHASE_LPC+1]; /* autocorrelation coeffs */
- COMP Sw_[FFT_ENC];
-
- dump_phase(&model.phi[0], model.L);
-
- /* Determine LPCs for phase modelling. Note that we may also
- find the LPCs as part of the {Am} modelling, this can
- probably be combined in the final codec. However during
- development some subtle bugs were found when combining LPC
- and phase models so for the purpose of development it's
- easier to find LPCs indepenently for phase modelling
- here. */
-
- for(i=0; i<M; i++)
- Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,Rk,M,PHASE_LPC);
- levinson_durbin(Rk,ak_phase,PHASE_LPC);
-
- if (lpc_model)
- assert(order == PHASE_LPC);
-
- dump_ak(ak_phase, PHASE_LPC);
-
- /* determine voicing */
-
- snr = est_voicing_mbe(&model, Sw, W, (FS/TWO_PI)*model.Wo, Sw_, &voiced);
- dump_Sw_(Sw_);
- dump_snr(snr);
-
- /* just to make sure we are not cheating - kill all phases */
-
- for(i=0; i<MAX_AMP; i++)
- model.phi[i] = 0;
- if (hand_snr) {
- fscanf(fsnr,"%f\n",&snr);
- voiced = snr > 2.0;
- }
- phase_synth_zero_order(&model, ak_phase, voiced, ex_phase);
-
- if (postfilt)
- postfilter(&model, voiced, &bg_est);
- }
-
- /* optional LPC model amplitudes */
-
- if (lpc_model) {
- snr = lpc_model_amplitudes(Sn, w, &model, order, lsp, ak);
- sum_snr += snr;
- dump_quantised_model(&model);
- }
-
- /* option decimation to 20ms rate, which enables interpolation
- routine to synthesise in between frame */
-
- if (decimate) {
- if (frames%2) {
-
- /* odd frames use the original model parameters */
-
- model_synth = model_2;
- transition = 0;
- }
- else {
- interp(&model_3, &model_1, &model_synth, &model_a, &model_b,
- &transition);
-
- /* phase need to be supplied outside of this routine, e.g. via
- a phase model */
-
- for(i=1; i<=model_synth.L; i++)
- model_synth.phi[i] = model_2.phi[i];
- }
-
- model_3 = model_2;
- model_2 = model_1;
- model_1 = model;
- model = model_synth;
- }
-
- /*
- Simulate Wo quantisation noise
- model.Wo += 2.0*(PI/8000)*(1.0 - 2.0*(float)rand()/RAND_MAX);
- if (model.Wo > TWO_PI/20.0) model.Wo = TWO_PI/20.0;
- if (model.Wo < TWO_PI/160.0) model.Wo = TWO_PI/160.0;
- model.L = floor(PI/model.Wo);
- */
-
- /* Synthesise speech */
-
- if (fout != NULL) {
-
- if (transition) {
- synthesise(Sn_,&model_a,Pn,1);
- synthesise(Sn_,&model_b,Pn,0);
- }
- else {
- synthesise(Sn_,&model,Pn,1);
- }
-
- /* Save output speech to disk */
-
- for(i=0; i<N; i++) {
- if (Sn_[i] > 32767.0)
- buf[i] = 32767;
- else if (Sn_[i] < -32767.0)
- buf[i] = -32767;
- else
- buf[i] = Sn_[i];
- }
- fwrite(buf,sizeof(short),N,fout);
- }
- }
-
- if (fout != NULL)
- fclose(fout);
-
- if (lpc_model)
- printf("SNR av = %5.2f dB\n", sum_snr/frames);
-
- if (dump)
- dump_off();
-
- if (hand_snr)
- fclose(fsnr);
-
- return 0;
-}
-
+++ /dev/null
-/*---------------------------------------------------------------------------*\
-
- FILE........: sinenc.c
- AUTHOR......: David Rowe
- DATE CREATED: 20/2/95
-
- Sinusoidal speech encoder program using external (Matlab) pitch estimator.
-
-\*---------------------------------------------------------------------------*/
-
-/*
- Copyright (C) 2009 David Rowe
-
- All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2, as
- published by the Free Software Foundation. This program is
- distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "defines.h"
-#include "dump.h"
-#include "sine.h"
-
-/*---------------------------------------------------------------------------*\
-
- switch_present()
-
- Searches the command line arguments for a "switch". If the switch is
- found, returns the command line argument where it ws found, else returns
- NULL.
-
-\*---------------------------------------------------------------------------*/
-
-int switch_present(sw,argc,argv)
- char sw[]; /* switch in string form */
- int argc; /* number of command line arguments */
- char *argv[]; /* array of command line arguments in string form */
-{
- int i; /* loop variable */
-
- for(i=1; i<argc; i++)
- if (!strcmp(sw,argv[i]))
- return(i);
-
- return 0;
-}
-
-/*---------------------------------------------------------------------------*\
-
- MAIN
-
-\*---------------------------------------------------------------------------*/
-
-int main(int argc, char *argv[])
-{
- FILE *fin; /* input speech sample file */
- FILE *fmodel; /* output file of model parameters */
- FILE *fp; /* input text file containing pitch estimates */
- short buf[N]; /* input speech sample buffer */
- float Sn[M]; /* float input speech samples */
- COMP Sw[FFT_ENC]; /* DFT of Sn[] */
- float w[M]; /* time domain hamming window */
- COMP W[FFT_ENC]; /* DFT of w[] */
- MODEL model;
- int length; /* number of frames to process */
- float pitch; /* current pitch estimate from external pitch file */
- int i; /* loop variable */
- FILE *fref; /* optional output file with refined pitch estimate */
- int arg;
- int dump;
- int frames;
-
- if (argc < 5) {
- printf("usage: sinenc InputFile ModelFile Frames PitchFile\n");
- exit(1);
- }
-
- /* Interpret command line arguments -------------------------------------*/
-
- if ((fin = fopen(argv[1],"rb")) == NULL) {
- printf("Error opening input file: %s\n",argv[1]);
- exit(1);
- }
-
- if ((fmodel = fopen(argv[2],"wb")) == NULL) {
- printf("Error opening output model file: %s\n",argv[2]);
- exit(1);
- }
-
- length = atoi(argv[3]);
-
- if ((fp = fopen(argv[4],"rt")) == NULL) {
- printf("Error opening input pitch file: %s\n",argv[4]);
- exit(1);
- }
-
- dump = switch_present("--dump",argc,argv);
- if (dump)
- dump_on(argv[dump+1]);
-
- if ((arg = switch_present("--ref",argc,argv))) {
- if ((fref = fopen(argv[arg+1],"wt")) == NULL) {
- printf("Error opening output pitch refinement file: %s\n",argv[5]);
- exit(1);
- }
- }
- else
- fref = NULL;
-
- /* Initialise sample buffer memories to stop divide by zero errors
- and zero energy frames at the start of simulation */
-
- for(i=0; i<M; i++)
- Sn[i] = 1.0;
-
- make_analysis_window(w, W);
-
- /* Main loop ------------------------------------------------------------*/
-
- frames = 0;
- while((fread(buf,sizeof(short),N,fin) == N) && (frames != length)) {
- frames++;
-
- /* Update input speech buffers */
-
- for(i=0; i<M-N; i++)
- Sn[i] = Sn[i+N];
- for(i=0; i<N; i++)
- Sn[i+M-N] = buf[i];
-
- /* Estimate pitch */
-
- fscanf(fp,"%f\n",&pitch);
-
- /* construct analysis window */
-
- model.Wo = TWO_PI/pitch;
-
- /* estimate and model parameters */
-
- dft_speech(Sw, Sn, w);
- two_stage_pitch_refinement(&model, Sw);
- estimate_amplitudes(&model, Sw, W);
- dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
-
- /* save model parameters */
-
- if (fref != NULL && frames > 2)
- fprintf(fref,"%f\n",model.Wo);
- fwrite(&model,sizeof(model),1,fmodel);
- printf("frame: %d\r",frames);
- }
-
- /* close files and exit */
-
- if (fref != NULL) fclose(fref);
- fclose(fin);
- fclose(fmodel);
-
- if (dump)
- dump_off();
-
- return 0;
-}
-