int main(int argc, char *argv[])
{
- C2CONST c2const = c2const_create(8000);
- int n_samp = c2const.n_samp;
- int m_pitch = c2const.m_pitch;
- FILE *fout = NULL; /* output speech file */
- FILE *fin; /* input speech file */
- short buf[N_SAMP]; /* input/output buffer */
- float buf_float[N_SAMP];
- float Sn[m_pitch]; /* float input speech samples */
- float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */
- COMP Sw[FFT_ENC]; /* DFT of Sn[] */
- codec2_fft_cfg fft_fwd_cfg;
- codec2_fftr_cfg fftr_fwd_cfg;
- codec2_fftr_cfg fftr_inv_cfg;
- float w[m_pitch]; /* time domain hamming window */
- COMP W[FFT_ENC]; /* DFT of w[] */
- MODEL model;
- float Pn[2*N_SAMP]; /* trapezoidal synthesis window */
- float Sn_[2*N_SAMP]; /* synthesised speech */
- int i,m; /* loop variable */
- int frames;
- float prev_Wo, prev__Wo, prev_uq_Wo;
- float pitch;
- char out_file[MAX_STR];
- char ampexp_arg[MAX_STR];
- char phaseexp_arg[MAX_STR];
- float snr;
- float sum_snr;
+ int Fs = 8000;
+ int set_fs;
+
int orderi;
int lpc_model = 0, order = LPC_ORD;
int lsp = 0, lspd = 0, lspvq = 0;
int lspanssi = 0,
#endif
int prede = 0;
- float pre_mem = 0.0, de_mem = 0.0;
- float ak[order];
- // COMP Sw_[FFT_ENC];
- // COMP Ew[FFT_ENC];
-
- int phase0 = 0;
- float ex_phase[MAX_AMP+1];
-
int postfilt;
- float bg_est = 0.0;
-
int hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0, lspmelread = 0;
int lpcpf = 0;
FILE *fvoicing = 0;
FILE *flspmel = 0;
-
- MODEL prev_model;
int dec;
int decimate = 1;
- float lsps[order];
- float e, prev_e;
- int lsp_indexes[order];
- float lsps_[order];
- float Woe_[2];
-
- float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
- MODEL model_dec[4], prev_model_dec;
- float prev_lsps_dec[order], prev_e_dec;
-
- void *nlp_states;
- float hpf_states[2];
+ int amread, Woread;
+ int awread;
+ int hmread;
+ int phase0 = 0;
int scalar_quant_Wo_e = 0;
int scalar_quant_Wo_e_low = 0;
int vector_quant_Wo_e = 0;
int dump_pitch_e = 0;
- FILE *fjvm = NULL;
- #ifdef DUMP
- int dump;
- #endif
- #if 0
- struct PEXP *pexp = NULL;
- struct AEXP *aexp = NULL;
- #endif
float gain = 1.0;
int bpf_en = 0;
int bpfb_en = 0;
- float bpf_buf[BPF_N+N_SAMP];
- float lspmelvq_mse = 0.0;
- int amread, Woread;
FILE *fam = NULL, *fWo = NULL;
- int awread;
FILE *faw = NULL;
- int hmread;
FILE *fhm = NULL;
+ FILE *fjvm = NULL;
+ #ifdef DUMP
+ int dump;
+ #endif
+ char ampexp_arg[MAX_STR];
+ char phaseexp_arg[MAX_STR];
+ char out_file[MAX_STR];
+ FILE *fout = NULL; /* output speech file */
char* opt_string = "ho:";
struct option long_options[] = {
+ { "Fs", required_argument, &set_fs, 1 },
{ "lpc", required_argument, &lpc_model, 1 },
{ "lspjnd", no_argument, &lspjnd, 1 },
{ "lspmel", no_argument, &lspmel, 1 },
{ NULL, no_argument, NULL, 0 }
};
int num_opts=sizeof(long_options)/sizeof(struct option);
- COMP Aw[FFT_ENC];
- COMP H[MAX_AMP];
-
- for(i=0; i<m_pitch; i++) {
- Sn[i] = 1.0;
- Sn_pre[i] = 1.0;
- }
- for(i=0; i<2*N_SAMP; i++)
- Sn_[i] = 0;
-
- prev_uq_Wo = prev_Wo = prev__Wo = c2const.Wo_min;
-
- prev_model.Wo = c2const.Wo_max;
- prev_model.L = floor(PI/prev_model.Wo);
- for(i=1; i<=prev_model.L; i++) {
- prev_model.A[i] = 0.0;
- prev_model.phi[i] = 0.0;
- }
- for(i=1; i<=MAX_AMP; i++) {
- //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
- ex_phase[i] = 0.0;
- }
- e = prev_e = 1;
- hpf_states[0] = hpf_states[1] = 0.0;
-
- nlp_states = nlp_create(m_pitch);
-
- if (argc < 2) {
- print_help(long_options, num_opts, argv);
- }
/*----------------------------------------------------------------*\
break;
switch (opt) {
case 0:
- if(strcmp(long_options[option_index].name, "lpc") == 0) {
+ if(strcmp(long_options[option_index].name, "Fs") == 0) {
+ Fs= atoi(optarg);
+ if((Fs != 8000) && (Fs != 16000)) {
+ fprintf(stderr, "Error Fs must be 8000 or 16000\n");
+ exit(1);
+ }
+ } else if(strcmp(long_options[option_index].name, "lpc") == 0) {
orderi = atoi(optarg);
if((orderi < 4) || (orderi > order)) {
fprintf(stderr, "Error in LPC order (4 to %d): %s\n", order, optarg);
/* Input file */
+ FILE *fin; /* input speech file */
if (strcmp(argv[optind], "-") == 0) fin = stdin;
else if ((fin = fopen(argv[optind],"rb")) == NULL) {
fprintf(stderr, "Error opening input speech file: %s: %s.\n",
exit(1);
}
+ C2CONST c2const = c2const_create(Fs);
+ int n_samp = c2const.n_samp;
+ int m_pitch = c2const.m_pitch;
+
+ short buf[N_SAMP]; /* input/output buffer */
+ float buf_float[N_SAMP];
+ float Sn[m_pitch]; /* float input speech samples */
+ float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */
+ COMP Sw[FFT_ENC]; /* DFT of Sn[] */
+ codec2_fft_cfg fft_fwd_cfg;
+ codec2_fftr_cfg fftr_fwd_cfg;
+ codec2_fftr_cfg fftr_inv_cfg;
+ float w[m_pitch]; /* time domain hamming window */
+ COMP W[FFT_ENC]; /* DFT of w[] */
+ MODEL model;
+ float Pn[2*N_SAMP]; /* trapezoidal synthesis window */
+ float Sn_[2*N_SAMP]; /* synthesised speech */
+ int i,m; /* loop variable */
+ int frames;
+ float prev_f0;
+ float pitch;
+ float snr;
+ float sum_snr;
+
+ float pre_mem = 0.0, de_mem = 0.0;
+ float ak[order];
+ // COMP Sw_[FFT_ENC];
+ // COMP Ew[FFT_ENC];
+
+ float ex_phase[MAX_AMP+1];
+
+ float bg_est = 0.0;
+
+
+ MODEL prev_model;
+ float lsps[order];
+ float e, prev_e;
+ int lsp_indexes[order];
+ float lsps_[order];
+ float Woe_[2];
+
+ float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
+ MODEL model_dec[4], prev_model_dec;
+ float prev_lsps_dec[order], prev_e_dec;
+
+ void *nlp_states;
+ float hpf_states[2];
+ #if 0
+ struct PEXP *pexp = NULL;
+ struct AEXP *aexp = NULL;
+ #endif
+ float bpf_buf[BPF_N+N_SAMP];
+ float lspmelvq_mse = 0.0;
+
+ COMP Aw[FFT_ENC];
+ COMP H[MAX_AMP];
+
+
+ for(i=0; i<m_pitch; i++) {
+ Sn[i] = 1.0;
+ Sn_pre[i] = 1.0;
+ }
+ for(i=0; i<2*N_SAMP; i++)
+ Sn_[i] = 0;
+
+ prev_f0 = 1/P_MAX_S;
+
+ prev_model.Wo = c2const.Wo_max;
+ prev_model.L = floor(PI/prev_model.Wo);
+ for(i=1; i<=prev_model.L; i++) {
+ prev_model.A[i] = 0.0;
+ prev_model.phi[i] = 0.0;
+ }
+ for(i=1; i<=MAX_AMP; i++) {
+ //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
+ ex_phase[i] = 0.0;
+ }
+ e = prev_e = 1;
+ hpf_states[0] = hpf_states[1] = 0.0;
+
+ nlp_states = nlp_create(&c2const);
+
+ if (argc < 2) {
+ print_help(long_options, num_opts, argv);
+ }
+
+
ex_phase[0] = 0;
Woe_[0] = Woe_[1] = 1.0;
\*------------------------------------------------------------*/
- nlp(nlp_states,Sn,N_SAMP,c2const.p_min,c2const.p_max,&pitch,Sw,W,&prev_uq_Wo);
+ nlp(nlp_states, Sn, N_SAMP, &pitch, Sw, W, &prev_f0);
model.Wo = TWO_PI/pitch;
dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w);
make_synthesis_window(&c2->c2const, c2->Pn);
c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
quantise_init();
- c2->prev_Wo_enc = 0.0;
+ c2->prev_f0_enc = 1/P_MAX_S;
c2->bg_est = 0.0;
c2->ex_phase = 0.0;
}
c2->prev_e_dec = 1;
- c2->nlp = nlp_create(m_pitch);
+ c2->nlp = nlp_create(&c2->c2const);
if (c2->nlp == NULL) {
return NULL;
}
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,n_samp,c2->c2const.p_min,c2->c2const.p_max,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+ nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc);
PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
model->Wo = TWO_PI/pitch;
estimate_amplitudes(model, Sw, c2->W, 0);
PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
- c2->prev_Wo_enc = model->Wo;
PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing");
#ifdef DUMP
dump_model(model);
float *Sn_; /* [2*n_samp] synthesised output speech */
float ex_phase; /* excitation model phase track */
float bg_est; /* background noise estimate for post filter */
- float prev_Wo_enc; /* previous frame's pitch estimate */
+ float prev_f0_enc; /* previous frame's f0 estimate */
MODEL prev_model_dec; /* previous frame's model parameters */
float prev_lsps_dec[LPC_ORD]; /* previous frame's LSPs */
float prev_e_dec; /* previous frame's LPC energy */
#define PI 3.141592654 /* mathematical constant */
#endif
#define TWO_PI 6.283185307 /* mathematical constant */
-#define MAX_STR 256 /* maximum string size */
+#define MAX_STR 2048 /* maximum string size */
#define FFT_ENC 512 /* size of FFT used for encoder */
#define FFT_DEC 512 /* size of FFT used in decoder */
#include "codec2_fft.h"
#undef PROFILE
#include "machdep.h"
+#include "os.h"
#include <assert.h>
#include <math.h>
\*---------------------------------------------------------------------------*/
-#define PMAX_M 600 /* maximum NLP analysis window size */
+#define PMAX_M 320 /* maximum NLP analysis window size */
#define COEFF 0.95 /* notch filter parameter */
#define PE_FFT_SIZE 512 /* DFT size for pitch estimation */
#define DEC 5 /* decimation factor */
#define NLP_NTAP 48 /* Decimation LPF order */
#undef POST_PROCESS_MBE /* choose post processor */
-//#undef DUMP
+/* 8 to 16 kHz sample rate conversion */
+
+#define FDMDV_OS 2 /* oversampling rate */
+#define FDMDV_OS_TAPS_16K 48 /* number of OS filter taps at 16kHz */
+#define FDMDV_OS_TAPS_8K (FDMDV_OS_TAPS_16K/FDMDV_OS) /* number of OS filter taps at 8kHz */
/*---------------------------------------------------------------------------*\
};
typedef struct {
+ int Fs; /* sample rate in Hz */
int m;
float w[PMAX_M/DEC]; /* DFT window */
float sq[PMAX_M]; /* squared speech samples */
float mem_x,mem_y; /* memory for notch filter */
float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
- codec2_fft_cfg fft_cfg; /* kiss FFT config */
+ codec2_fft_cfg fft_cfg; /* kiss FFT config */
+ float *Sn16k; /* Fs=16kHz input speech vector */
+ FILE *f;
} NLP;
#ifdef POST_PROCESS_MBE
#endif
float post_process_sub_multiples(COMP Fw[],
int pmin, int pmax, float gmax, int gmax_bin,
- float *prev_Wo);
+ float *prev_f0);
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n);
/*---------------------------------------------------------------------------*\
\*---------------------------------------------------------------------------*/
-void *nlp_create(
-int m /* analysis window size */
-)
+void *nlp_create(C2CONST *c2const)
{
NLP *nlp;
int i;
-
- assert(m <= PMAX_M);
+ int m = c2const->m_pitch;
+ int Fs = c2const->Fs;
nlp = (NLP*)malloc(sizeof(NLP));
if (nlp == NULL)
return NULL;
+ assert((Fs == 8000) || (Fs == 16000));
+ nlp->Fs = Fs;
+
nlp->m = m;
+
+ /* if running at 16kHz allocate storage for decimating filter memory */
+
+ if (Fs == 16000) {
+ nlp->Sn16k = (float*)malloc(sizeof(float)*(FDMDV_OS_TAPS_16K + c2const->n_samp));
+ for(i=0; i<FDMDV_OS_TAPS_16K; i++) {
+ nlp->Sn16k[i] = 0.0;
+ }
+ if (nlp->Sn16k == NULL) {
+ free(nlp);
+ return NULL;
+ }
+
+ /* most processing occurs at 8 kHz sample rate so halve m */
+
+ m /= 2;
+ }
+
+ assert(m <= PMAX_M);
+
for(i=0; i<m/DEC; i++) {
nlp->w[i] = 0.5 - 0.5*cosf(2*PI*i/(m/DEC-1));
}
nlp = (NLP*)nlp_state;
codec2_fft_free(nlp->fft_cfg);
+ if (nlp->Fs == 16000) {
+ free(nlp->Sn16k);
+ }
free(nlp_state);
}
References:
- [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 4
+ [1] http://rowetel.com/downloads/1997_rowe_phd_thesis.pdf Chapter 4
\*---------------------------------------------------------------------------*/
float nlp(
void *nlp_state,
- float Sn[], /* input speech vector */
- int n, /* frames shift (no. new samples in Sn[]) */
- int pmin, /* minimum pitch value */
- int pmax, /* maximum pitch value */
- float *pitch, /* estimated pitch period in samples */
- COMP Sw[], /* Freq domain version of Sn[] */
- COMP W[], /* Freq domain window */
- float *prev_Wo
+ float Sn[], /* input speech vector */
+ int n, /* frames shift (no. new samples in Sn[]) */
+ float *pitch, /* estimated pitch period in samples at current Fs */
+ COMP Sw[], /* Freq domain version of Sn[] */
+ COMP W[], /* Freq domain window */
+ float *prev_f0 /* previous pitch f0 in Hz, memory for pitch tracking */
)
{
NLP *nlp;
- float notch; /* current notch filter output */
+ float notch; /* current notch filter output */
COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (input/output) */
float gmax;
int gmax_bin;
- int m, i,j;
+ int m, i, j;
float best_f0;
PROFILE_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
nlp = (NLP*)nlp_state;
m = nlp->m;
- PROFILE_SAMPLE(start);
-
/* Square, notch filter at DC, and LP filter vector */
- for(i=m-n; i<m; i++) /* square latest speech samples */
- nlp->sq[i] = Sn[i]*Sn[i];
+ /* If running at 16 kHz decimate to 8 kHz, as NLP ws designed for
+ Fs = 8kHz. The decimating filter introduces about 3ms of delay,
+ that shouldn't be a problem as pitch changes slowly. */
+
+ if (nlp->Fs == 8000) {
+ /* Square latest input samples */
+
+ for(i=m-n; i<m; i++) {
+ nlp->sq[i] = Sn[i]*Sn[i];
+ }
+ }
+ else {
+ assert(nlp->Fs == 16000);
+
+ /* re-sample at 8 KHz */
+
+ for(i=0; i<n; i++) {
+ nlp->Sn16k[FDMDV_OS_TAPS_16K+i] = Sn[m-n+i];
+ }
+
+ m /= 2; n /= 2;
+
+ float Sn8k[n];
+ fdmdv_16_to_8(Sn8k, &nlp->Sn16k[FDMDV_OS_TAPS_16K], n);
+
+ /* Square latest input samples */
+
+ for(i=m-n, j=0; i<m; i++, j++) {
+ nlp->sq[i] = Sn8k[j]*Sn8k[j];
+ }
+ assert(j <= n);
+ }
+ //fprintf(stderr, "n: %d m: %d\n", n, m);
+
+ PROFILE_SAMPLE(start);
for(i=m-n; i<m; i++) { /* notch filter at DC */
notch = nlp->sq[i] - nlp->mem_x;
dump_Fw(Fw);
#endif
+ /* todo: express everything in f0, as pitch in samples is dep on Fs */
+
+ int pmin = floor(SAMPLE_RATE*P_MIN_S);
+ int pmax = floor(SAMPLE_RATE*P_MAX_S);
+
/* find global peak */
gmax = 0.0;
PROFILE_SAMPLE_AND_LOG(peakpick, magsq, " peak pick");
#ifdef POST_PROCESS_MBE
- best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo);
+ best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_f0);
#else
- best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
+ best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_f0);
#endif
PROFILE_SAMPLE_AND_LOG(shiftmem, peakpick, " post process");
for(i=0; i<m-n; i++)
nlp->sq[i] = nlp->sq[i+n];
- /* return pitch and F0 estimate */
+ /* return pitch period in samples and F0 estimate */
- *pitch = (float)SAMPLE_RATE/best_f0;
+ *pitch = (float)nlp->Fs/best_f0;
PROFILE_SAMPLE_AND_LOG2(shiftmem, " shift mem");
PROFILE_SAMPLE_AND_LOG2(start, " nlp int");
+ *prev_f0 = best_f0;
+
return(best_f0);
}
float post_process_sub_multiples(COMP Fw[],
int pmin, int pmax, float gmax, int gmax_bin,
- float *prev_Wo)
+ float *prev_f0)
{
int min_bin, cmax_bin;
int mult;
mult = 2;
min_bin = PE_FFT_SIZE*DEC/pmax;
cmax_bin = gmax_bin;
- prev_f0_bin = *prev_Wo*(4000.0/PI)*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
+ prev_f0_bin = *prev_f0*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
while(gmax_bin/mult >= min_bin) {
}
#endif
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: fdmdv_16_to_8()
+ AUTHOR......: David Rowe
+ DATE CREATED: 9 May 2012
+
+ Changes the sample rate of a signal from 16 to 8 kHz.
+
+ n is the number of samples at the 8 kHz rate, there are FDMDV_OS*n
+ samples at the 48 kHz rate. As above however a memory of
+ FDMDV_OS_TAPS samples is reqd for in16k[] (see t16_8.c unit test as example).
+
+ Low pass filter the 16 kHz signal at 4 kHz using the same filter as
+ the upsampler, then just output every FDMDV_OS-th filtered sample.
+
+ Note: this function copied from fdmdv.c, included in nlp.c as a convenience
+ to avoid linking with another source file.
+
+\*---------------------------------------------------------------------------*/
+
+static void fdmdv_16_to_8(float out8k[], float in16k[], int n)
+{
+ float acc;
+ int i,j,k;
+
+ for(i=0, k=0; k<n; i+=FDMDV_OS, k++) {
+ acc = 0.0;
+ for(j=0; j<FDMDV_OS_TAPS_16K; j++)
+ acc += fdmdv_os_filter[j]*in16k[i-j];
+ out8k[k] = acc;
+ }
+
+ /* update filter memory */
+
+ for(i=-FDMDV_OS_TAPS_16K; i<0; i++)
+ in16k[i] = in16k[i + n*FDMDV_OS];
+}
#include "comp.h"
-void *nlp_create(int m);
+void *nlp_create(C2CONST *c2const);
void nlp_destroy(void *nlp_state);
-float nlp(void *nlp_state, float Sn[], int n, int pmin, int pmax,
- float *pitch, COMP Sw[], COMP W[], float *prev_Wo);
+float nlp(void *nlp_state, float Sn[], int n,
+ float *pitch_samples, COMP Sw[], COMP W[], float *prev_f0);
#endif
/* Generate using fir1(47,1/2) in Octave */
-const float fdmdv_os_filter[]= {
+static const float fdmdv_os_filter[]= {
-0.0008215855034550382,
-0.0007833023901802921,
0.001075563790768233,
fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
+ fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
return c2const;
}
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#define N 80 /* frame size */
-#define M 320 /* pitch analysis window size */
-#define PITCH_MIN 20
-#define PITCH_MAX 160
-#define TNLP
#include <stdlib.h>
#include <stdio.h>
/*---------------------------------------------------------------------------*\
- switch_present()
+ switch_present()
- Searches the command line arguments for a "switch". If the switch is
- found, returns the command line argument where it ws found, else returns
- NULL.
+ Searches the command line arguments for a "switch". If the switch is
+ found, returns the command line argument where it ws found, else returns
+ NULL.
\*---------------------------------------------------------------------------*/
\*---------------------------------------------------------------------------*/
-int main(argc,argv)
-int argc;
-char *argv[];
+int main(int argc, char *argv[])
{
+ if (argc < 3) {
+ printf("\nusage: tnlp InputRawSpeechFile Outputf0PitchTextFile "
+ "[--dump DumpFile] [--Fs SampleRateHz]\n");
+ exit(1);
+ }
+
+ int Fs = 8000;
+ if (switch_present("--Fs",argc,argv)) {
+ Fs = atoi(argv[argc+1]);
+ }
+
+ C2CONST c2const = c2const_create(Fs);
+ int n = c2const.n_samp;
+ int m = c2const.m_pitch;
FILE *fin,*fout;
- short buf[N];
- float Sn[M]; /* float input speech samples */
+ short buf[n];
+ float Sn[m]; /* float input speech samples */
kiss_fft_cfg fft_fwd_cfg;
COMP Sw[FFT_ENC]; /* DFT of Sn[] */
- float w[M]; /* time domain hamming window */
+ float w[m]; /* time domain hamming window */
COMP W[FFT_ENC]; /* DFT of w[] */
- float pitch;
+ float pitch_samples;
int i;
- float prev_Wo;
+ float f0, prev_f0;
void *nlp_states;
-#ifdef DUMP
+ #ifdef DUMP
int dump;
-#endif
-
- if (argc < 3) {
- printf("\nusage: tnlp InputRawSpeechFile OutputPitchTextFile "
- "[--dump DumpFile]\n");
- exit(1);
- }
+ #endif
/* Input file */
exit(1);
}
-#ifdef DUMP
+ #ifdef DUMP
dump = switch_present("--dump",argc,argv);
if (dump)
dump_on(argv[dump+1]);
-#else
-/// TODO
-/// #warning "Compile with -DDUMP if you expect to dump anything."
-#endif
+ #else
+ /// TODO
+ /// #warning "Compile with -DDUMP if you expect to dump anything."
+ #endif
- nlp_states = nlp_create(M);
+ for(i=0; i<m; i++) {
+ Sn[i] = 0.0;
+ }
+
+ nlp_states = nlp_create(&c2const);
fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL);
- make_analysis_window(fft_fwd_cfg, w, W);
+ make_analysis_window(&c2const, fft_fwd_cfg, w, W);
frames = 0;
- prev_Wo = 0;
- while(fread(buf,sizeof(short),N,fin)) {
- printf("%d\n", frames++);
-
+ prev_f0 = 1/P_MAX_S;
+ while(fread(buf, sizeof(short), n, fin)) {
/* Update input speech buffers */
- for(i=0; i<M-N; i++)
- Sn[i] = Sn[i+N];
- for(i=0; i<N; i++)
- Sn[i+M-N] = buf[i];
- dft_speech(fft_fwd_cfg, Sw, Sn, w);
-#ifdef DUMP
- dump_Sn(Sn); dump_Sw(Sw);
-#endif
+ for(i=0; i<m-n; i++)
+ Sn[i] = Sn[i+n];
+ for(i=0; i<n; i++)
+ Sn[i+m-n] = buf[i];
+ dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w);
+ #ifdef DUMP
+ dump_Sn(m, Sn); dump_Sw(Sw);
+ #endif
- nlp(nlp_states,Sn,N,PITCH_MIN,PITCH_MAX,&pitch,Sw,W, &prev_Wo);
- prev_Wo = TWO_PI/pitch;
+ f0 = nlp(nlp_states, Sn, n, &pitch_samples, Sw, W, &prev_f0);
- fprintf(fout,"%f\n",pitch);
+ fprintf(stderr,"%d %f %f\n", frames++, f0, pitch_samples);
+ fprintf(fout,"%f %f\n", f0, pitch_samples);
}
fclose(fin);
fclose(fout);
-#ifdef DUMP
+ #ifdef DUMP
if (dump) dump_off();
-#endif
+ #endif
nlp_destroy(nlp_states);
return 0;