3. Phase model developed that uses 0 bits for phase and 1 bit/frame
for voiced/unvoiced decision.
-[[source code]]
+[[source]]
The Source Code
---------------
----------
[1] http://perens.com/[Bruce Perens] introducing the
- http://codec2.org/[codec2 project concept]:
+ http://codec2.org/[codec2 project concept]
[2] David's PhD Thesis,
http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf["Techniques for
- Harmonic Sinusoidal Coding"], used for baseline algorithm:
+ Harmonic Sinusoidal Coding"], used for baseline algorithm
[3] http://www.rowetel.com/blog/?p=128[Open Source Low rate Speech
- Codec Part 1 - Introduction:]
+ Codec Part 1 - Introduction]
[4] http://www.rowetel.com/blog/?p=130[Open Source Low rate Speech
Codec Part 1 - Spectral Magnitudes]
static FILE *fphase_ = NULL;
static FILE *ffw = NULL;
static FILE *fe = NULL;
+static FILE *fsq = NULL;
+static FILE *fdec = NULL;
static char prefix[MAX_STR];
fclose(ffw);
if (fe != NULL)
fclose(fe);
+ if (fsq != NULL)
+ fclose(fsq);
+ if (fdec != NULL)
+ fclose(fdec);
}
void dump_Sn(float Sn[]) {
fprintf(fe,"\n");
}
+void dump_sq(float sq[]) {
+ int i;
+ char s[MAX_STR];
+
+ if (!dumpon) return;
+
+ if (fsq == NULL) {
+ sprintf(s,"%s_sq.txt", prefix);
+ fsq = fopen(s, "wt");
+ assert(fsq != NULL);
+ }
+
+ for(i=0; i<M/2; i++)
+ fprintf(fsq,"%f\t",sq[i]);
+ fprintf(fsq,"\n");
+ for(i=M/2; i<M; i++)
+ fprintf(fsq,"%f\t",sq[i]);
+ fprintf(fsq,"\n");
+}
+
+void dump_dec(COMP Fw[]) {
+ int i;
+ char s[MAX_STR];
+
+ if (!dumpon) return;
+
+ if (fdec == NULL) {
+ sprintf(s,"%s_dec.txt", prefix);
+ fdec = fopen(s, "wt");
+ assert(fdec != NULL);
+ }
+
+ for(i=0; i<320/5; i++)
+ fprintf(fdec,"%f\t",Fw[i].real);
+ fprintf(fdec,"\n");
+}
+
void dump_lsp(float lsp[]);
void dump_phase(float phase[]);
void dump_phase_(float phase[]);
+
+/* NLP states */
+
+void dump_sq(float sq[]);
+void dump_dec(COMP Fw[]);
void dump_Fw(COMP Fw[]);
void dump_e(float e_hz[]);
float Sn[], /* input speech vector */
int n, /* frames shift (no. new samples in Sn[]) */
int m, /* analysis window size */
- int d, /* additional delay (used for testing) */
int pmin, /* minimum pitch value */
int pmax, /* maximum pitch value */
float *pitch, /* estimated pitch period in samples */
/* Square, notch filter at DC, and LP filter vector */
- for(i=0; i<n; i++) /* square speech samples */
- sq[i+d+m-n] = Sn[i]*Sn[i];
+ for(i=m-n; i<M; i++) /* square latest speech samples */
+ sq[i] = Sn[i]*Sn[i];
- for(i=m-n+d; i<m+d; i++) { /* notch filter at DC */
+ for(i=m-n; i<m; i++) { /* notch filter at DC */
notch = sq[i] - mem_x;
notch += COEFF*mem_y;
mem_x = sq[i];
sq[i] = notch;
}
- for(i=m-n+d; i<m+d; i++) { /* FIR filter vector */
+ for(i=m-n; i<m; i++) { /* FIR filter vector */
for(j=0; j<NLP_NTAP-1; j++)
mem_fir[j] = mem_fir[j+1];
}
for(i=0; i<m/DEC; i++)
Fw[i].real = sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+ dump_dec(Fw);
four1(&Fw[-1].imag,PE_FFT_SIZE,1);
for(i=0; i<PE_FFT_SIZE; i++)
Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+ dump_sq(sq);
dump_Fw(Fw);
/* find global peak */
/* Shift samples in buffer to make room for new samples */
- for(i=0; i<m-n+d; i++)
+ for(i=0; i<m-n; i++)
sq[i] = sq[i+n];
/* return pitch and F0 estimate */
#define NLP_NTAP 48 /* Decimation LPF order */
-float nlp(float Sn[], int n, int m, int d, int pmin, int pmax, float *pitch,
- COMP Sw[]);
+float nlp(float Sn[], int n, int m, int pmin, int pmax, float *pitch, COMP Sw[]);
#endif
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
#include <math.h>
#include "nlp.h"
#include "dump.h"
dft_speech();
dump_Sn(Sn); dump_Sw(Sw);
- nlp(Sn,N,M,N-NLP_NTAP/2,PITCH_MIN,PITCH_MAX,&pitch,Sw);
+ nlp(Sn,N,M,PITCH_MIN,PITCH_MAX,&pitch,Sw);
fprintf(fout,"%f\n",pitch);