*/
#include "codec2.h"
+#include "sine.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
-#define BITS_SIZE ((CODEC2_BITS_PER_FRAME + 7) / 8)
-
int main(int argc, char *argv[])
{
struct CODEC2 *codec2;
FILE *fout;
short *buf;
unsigned char *bits;
- int nsam, nbit;
+ int nsam, nbit, i, r;
+
+ for(i=0; i<10; i++) {
+ r = codec2_rand();
+ printf("[%d] r = %d\n", i, r);
+ }
if (argc != 3) {
printf("usage: %s InputRawSpeechFile OutputRawSpeechFile\n", argv[0]);
exit(1);
}
+ #ifdef DUMP
+ dump_on("c2demo");
+ #endif
+
/* Note only one set of Codec 2 states is required for an encoder
and decoder pair. */
- codec2 = codec2_create(CODEC2_MODE_1400);
+ codec2 = codec2_create(CODEC2_MODE_1300);
nsam = codec2_samples_per_frame(codec2);
buf = (short*)malloc(nsam*sizeof(short));
nbit = codec2_bits_per_frame(codec2);
{ "lspd", no_argument, &lspd, 1 },
{ "lspvq", no_argument, &lspvq, 1 },
{ "lspres", no_argument, &lspres, 1 },
+ #ifdef __EXPERIMENTAL__
{ "lspdt", no_argument, &lspdt, 1 },
{ "lspdt_mode", required_argument, NULL, 0 },
+ #endif
{ "lspjvm", no_argument, &lspjvm, 1 },
+ #ifdef __EXPERIMENTAL__
{ "lspanssi", no_argument, &lspanssi, 1 },
+ #endif
{ "phase0", no_argument, &phase0, 1 },
{ "phaseexp", required_argument, &phaseexp, 1 },
{ "ampexp", required_argument, &exp, 1 },
e = prev_e = 1;
hpf_states[0] = hpf_states[1] = 0.0;
- nlp_states = nlp_create();
+ nlp_states = nlp_create(M);
if (argc < 2) {
print_help(long_options, num_opts, argv);
\*------------------------------------------------------------*/
- nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
+ nlp(nlp_states,Sn,N,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
model.Wo = TWO_PI/pitch;
dft_speech(fft_fwd_cfg, Sw, Sn, w);
two_stage_pitch_refinement(&model, Sw);
- estimate_amplitudes(&model, Sw, W);
+ estimate_amplitudes(&model, Sw, W, 1);
uq_Wo = model.Wo;
#ifdef DUMP
lsp_to_lpc(lsps_, ak, LPC_ORD);
}
+#ifdef __EXPERIMENTAL__
if (lspvq) {
lspvq_quantise(lsps, lsps_, LPC_ORD);
bw_expand_lsps(lsps_, LPC_ORD);
lsp_to_lpc(lsps_, ak, LPC_ORD);
}
+#endif
if (lspjvm) {
/* Jean-Marc's multi-stage, split VQ */
}
}
+#ifdef __EXPERIMENTAL__
if (lspanssi) {
/* multi-stage VQ from Anssi Ramo OH3GDD */
bw_expand_lsps(lsps_, LPC_ORD);
lsp_to_lpc(lsps_, ak, LPC_ORD);
}
+#endif
/* experimenting with non-linear LSP spacing to see if
it's just noticable */
/* Odd frames are generated by quantising the difference
between the previous frames LSPs and this frames */
+#ifdef __EXPERIMENTAL__
if (lspdt && !decimate) {
if (frames%2) {
lspdt_quantise(lsps, lsps_, lsps__prev, lspdt_mode);
for(i=0; i<LPC_ORD; i++)
lsps__prev[i] = lsps_[i];
}
+#endif
/*
When decimation is enabled we only send LSPs to the
#include "codec2.h"
#include "lsp.h"
#include "codec2_internal.h"
+#include "machdep.h"
/*---------------------------------------------------------------------------*\
}
c2->prev_e_dec = 1;
- c2->nlp = nlp_create();
+ c2->nlp = nlp_create(M);
if (c2->nlp == NULL) {
free (c2);
return NULL;
for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
}
-
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
int Wo_index, e_index;
int i;
unsigned int nbit = 0;
-
+ #ifdef TIMER
+ unsigned int quant_start;
+ #endif
+
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
Wo_index = encode_Wo(model.Wo);
pack(bits, &nbit, Wo_index, WO_BITS);
+ #ifdef TIMER
+ quant_start = machdep_timer_sample();
+ #endif
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
e_index = encode_energy(e);
pack(bits, &nbit, e_index, E_BITS);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
}
+ #ifdef TIMER
+ machdep_timer_sample_and_log(quant_start, " quant/packing");
+ #endif
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
int i,j;
unsigned int nbit = 0;
float weight;
+ TIMER_VAR(recover_start);
assert(c2 != NULL);
/* Wo, energy, and LSPs are sampled every 40ms so we interpolate
the 3 frames in between */
+ TIMER_SAMPLE(recover_start);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
apply_lpc_correction(&model[i]);
}
+ TIMER_SAMPLE_AND_LOG2(recover_start, " recover");
/* synthesise ------------------------------------------------*/
void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, float ak[])
{
int i;
+ TIMER_VAR(phase_start, pf_start, synth_start);
+
+ #ifdef DUMP
+ dump_quantised_model(model);
+ #endif
+
+ TIMER_SAMPLE(phase_start);
phase_synth_zero_order(c2->fft_fwd_cfg, model, ak, &c2->ex_phase, LPC_ORD);
+
+ TIMER_SAMPLE_AND_LOG(pf_start,phase_start, " phase_synth");
+
postfilter(model, &c2->bg_est);
+
+ TIMER_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter");
+
synthesise(c2->fft_inv_cfg, c2->Sn_, model, c2->Pn, 1);
+
+ TIMER_SAMPLE_AND_LOG2(synth_start, " synth");
+
ear_protection(c2->Sn_, N);
for(i=0; i<N; i++) {
COMP Sw[FFT_ENC];
COMP Sw_[FFT_ENC];
COMP Ew[FFT_ENC];
- float pitch, snr;
+ float pitch;
int i;
+ TIMER_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
/* Read input speech */
for(i=0; i<N; i++)
c2->Sn[i+M-N] = speech[i];
+ TIMER_SAMPLE(dft_start);
dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
+ TIMER_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+ nlp(c2->nlp,c2->Sn,N,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+ TIMER_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
+
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
/* estimate model parameters */
two_stage_pitch_refinement(model, Sw);
- estimate_amplitudes(model, Sw, c2->W);
- snr = est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc);
- //fprintf(stderr,"snr %3.2f v: %d Wo: %f prev_Wo: %f\n",
- // snr, model->voiced, model->Wo, c2->prev_Wo_enc);
+ TIMER_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
+ estimate_amplitudes(model, Sw, c2->W, 0);
+ TIMER_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
+ est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc);
c2->prev_Wo_enc = model->Wo;
+ TIMER_SAMPLE_AND_LOG2(estamps, " est_voicing");
+ #ifdef DUMP
+ dump_model(model);
+ #endif
}
/*---------------------------------------------------------------------------*\
#include <string.h>
#include <math.h>
+#ifdef __EMBEDDED__
+#include "gdb_stdio.h"
+#define fprintf gdb_stdio_fprintf
+#define fopen gdb_stdio_fopen
+#define fclose gdb_stdio_fclose
+#endif
+
#ifdef DUMP
static int dumpon = 0;
void dump_model(MODEL *model) {
int l;
char s[MAX_STR];
+ char line[2048];
if (!dumpon) return;
assert(fmodel != NULL);
}
- fprintf(fmodel,"%f\t%d\t", model->Wo, model->L);
- for(l=1; l<=model->L; l++)
- fprintf(fmodel,"%f\t",model->A[l]);
- for(l=model->L+1; l<MAX_AMP; l++)
- fprintf(fmodel,"0.0\t");
- fprintf(fmodel,"%d\t",model->voiced);
- fprintf(fmodel,"\n");
+ sprintf(line,"%12f %12d ", model->Wo, model->L);
+ for(l=1; l<=model->L; l++) {
+ sprintf(s,"%12f ",model->A[l]);
+ strcat(line, s);
+ }
+ for(l=model->L+1; l<=MAX_AMP; l++) {
+ sprintf(s,"%12f ", 0.0);
+ strcat(line,s);
+ }
+
+ sprintf(s,"%d\n",model->voiced);
+ strcat(line,s);
+ fprintf(fmodel,"%s",line);
}
void dump_quantised_model(MODEL *model) {
int l;
char s[MAX_STR];
+ char line[2048];
if (!dumpon) return;
assert(fqmodel != NULL);
}
- fprintf(fqmodel,"%f\t%d\t", model->Wo, model->L);
- for(l=1; l<=model->L; l++)
- fprintf(fqmodel,"%f\t",model->A[l]);
- for(l=model->L+1; l<MAX_AMP; l++)
- fprintf(fqmodel,"0.0\t");
- fprintf(fqmodel,"\n");
+ sprintf(line,"%12f %12d ", model->Wo, model->L);
+ for(l=1; l<=model->L; l++) {
+ sprintf(s,"%12f ",model->A[l]);
+ strcat(line, s);
+ }
+ for(l=model->L+1; l<=MAX_AMP; l++) {
+ sprintf(s,"%12f ", 0.0);
+ strcat(line, s);
+ }
+
+ sprintf(s,"%d\n",model->voiced);
+ strcat(line, s);
+ fprintf(fqmodel, "%s", line);
}
void dump_phase(float phase[], int L) {
for(l=1; l<=L; l++)
fprintf(fphase,"%f\t",phase[l]);
- for(l=L+1; l<MAX_AMP; l++)
+ for(l=L+1; l<=MAX_AMP; l++)
fprintf(fphase,"%f\t",0.0);
fprintf(fphase,"\n");
}
#ifndef __DUMP__
#define __DUMP__
+#include "defines.h"
#include "comp.h"
+#include "kiss_fft.h"
+#include "codec2_internal.h"
void dump_on(char filename_prefix[]);
void dump_off();
/* post filter */
void dump_bg(float e, float bg_est, float percent_uv);
+void dump_Pwb(COMP Pwb[]);
#endif
--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: machdep.h
+ AUTHOR......: David Rowe
+ DATE CREATED: May 2 2013
+
+ Machine dependant functions.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2013 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 2.1, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __MACHDEP__
+#define __MACHDEP__
+
+#ifdef TIMER
+#define TIMER_VAR(...) unsigned int __VA_ARGS__
+#define TIMER_SAMPLE(timestamp) timestamp = machdep_timer_sample()
+#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label) \
+ timestamp = machdep_timer_sample_and_log(prev_timestamp, label)
+#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label) \
+ machdep_timer_sample_and_log(prev_timestamp, label)
+#else
+#define TIMER_VAR(...)
+#define TIMER_SAMPLE(timestamp)
+#define TIMER_SAMPLE_AND_LOG(timestamp, prev_timestamp, label)
+#define TIMER_SAMPLE_AND_LOG2(prev_timestamp, label)
+#endif
+
+void machdep_timer_init(void);
+void machdep_timer_reset(void);
+unsigned int machdep_timer_sample(void);
+unsigned int machdep_timer_sample_and_log(unsigned int start, char s[]);
+void machdep_timer_print_logged_samples(void);
+
+#endif
#include "nlp.h"
#include "dump.h"
#include "kiss_fft.h"
+#undef TIMER
+#include "machdep.h"
#include <assert.h>
#include <math.h>
#define CNLP 0.3 /* post processor constant */
#define NLP_NTAP 48 /* Decimation LPF order */
+#undef DUMP
+
/*---------------------------------------------------------------------------*\
GLOBALS
};
typedef struct {
+ int m;
+ float w[PMAX_M/DEC]; /* DFT window */
float sq[PMAX_M]; /* squared speech samples */
float mem_x,mem_y; /* memory for notch filter */
float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
\*---------------------------------------------------------------------------*/
-void *nlp_create()
+void *nlp_create(
+int m /* analysis window size */
+)
{
NLP *nlp;
int i;
+ assert(m <= PMAX_M);
+
nlp = (NLP*)malloc(sizeof(NLP));
if (nlp == NULL)
return NULL;
+ nlp->m = m;
+ for(i=0; i<m/DEC; i++) {
+ nlp->w[i] = 0.5 - 0.5*cos(2*PI*i/(m/DEC-1));
+ }
+
for(i=0; i<PMAX_M; i++)
nlp->sq[i] = 0.0;
nlp->mem_x = 0.0;
void *nlp_state,
float Sn[], /* input speech vector */
int n, /* frames shift (no. new samples in Sn[]) */
- int m, /* analysis window size */
int pmin, /* minimum pitch value */
int pmax, /* maximum pitch value */
float *pitch, /* estimated pitch period in samples */
COMP Fw[PE_FFT_SIZE]; /* DFT of squared signal (output) */
float gmax;
int gmax_bin;
- int i,j;
- float best_f0;
-
+ int m, i,j;
+ float best_f0;
+ TIMER_VAR(start, tnotch, filter, peakpick, window, fft, magsq, shiftmem);
+
assert(nlp_state != NULL);
- assert(m <= PMAX_M);
nlp = (NLP*)nlp_state;
+ m = nlp->m;
+
+ TIMER_SAMPLE(start);
/* Square, notch filter at DC, and LP filter vector */
exactly sure why. */
}
+ TIMER_SAMPLE_AND_LOG(tnotch, start, " square and notch");
+
for(i=m-n; i<m; i++) { /* FIR filter vector */
for(j=0; j<NLP_NTAP-1; j++)
nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
}
+ TIMER_SAMPLE_AND_LOG(filter, tnotch, " filter");
+
/* Decimate and DFT */
for(i=0; i<PE_FFT_SIZE; i++) {
fw[i].imag = 0.0;
}
for(i=0; i<m/DEC; i++) {
- fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+ fw[i].real = nlp->sq[i*DEC]*nlp->w[i];
}
+ TIMER_SAMPLE_AND_LOG(window, filter, " window");
#ifdef DUMP
dump_dec(Fw);
#endif
kiss_fft(nlp->fft_cfg, (kiss_fft_cpx *)fw, (kiss_fft_cpx *)Fw);
+ TIMER_SAMPLE_AND_LOG(fft, window, " fft");
+
for(i=0; i<PE_FFT_SIZE; i++)
Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+ TIMER_SAMPLE_AND_LOG(magsq, fft, " mag sq");
#ifdef DUMP
dump_sq(nlp->sq);
dump_Fw(Fw);
}
}
+ TIMER_SAMPLE_AND_LOG(peakpick, magsq, " peak pick");
+
//#define POST_PROCESS_MBE
#ifdef POST_PROCESS_MBE
best_f0 = post_process_mbe(Fw, pmin, pmax, gmax, Sw, W, prev_Wo);
best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, prev_Wo);
#endif
+ TIMER_SAMPLE_AND_LOG(shiftmem, peakpick, " post process");
+
/* Shift samples in buffer to make room for new samples */
for(i=0; i<m-n; i++)
/* return pitch and F0 estimate */
*pitch = (float)SAMPLE_RATE/best_f0;
+
+ TIMER_SAMPLE_AND_LOG2(shiftmem, " shift mem");
+
+ TIMER_SAMPLE_AND_LOG2(start, " nlp int");
+
return(best_f0);
}
int mult;
float thresh, best_f0;
int b, bmin, bmax, lmax_bin;
- float lmax, cmax;
+ float lmax;
int prev_f0_bin;
/* post process estimate by searching submultiples */
if (lmax > thresh)
if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
- cmax = lmax;
cmax_bin = lmax_bin;
}
float f0,best_f0; /* fundamental frequency */
float e,e_min; /* MBE cost function */
int i;
+ #ifdef DUMP
float e_hz[F0_MAX];
+ #endif
int bin;
float f0_min, f0_max;
float f0_start, f0_end;
/* Now look for local maxima. Each local maxima is a candidate
that we test using the MBE pitch estimation algotithm */
+ #ifdef DUMP
for(i=0; i<F0_MAX; i++)
e_hz[i] = -1;
+ #endif
e_min = 1E32;
best_f0 = 50;
for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
e = test_candidate_mbe(Sw, W, f0);
bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
- e_hz[bin] = e;
+ #ifdef DUMP
+ e_hz[bin] = e;
+ #endif
if (e < e_min) {
e_min = e;
best_f0 = f0;
for(f0=f0_start; f0<=f0_end; f0+= 2.5) {
e = test_candidate_mbe(Sw, W, f0);
bin = floor(f0); assert((bin > 0) && (bin < F0_MAX));
+ #ifdef DUMP
e_hz[bin] = e;
+ #endif
if (e < e_min) {
e_min = e;
best_f0 = f0;
return error;
}
-
#include "comp.h"
-void *nlp_create();
+void *nlp_create(int m);
void nlp_destroy(void *nlp_state);
-float nlp(void *nlp_state, float Sn[], int n, int m, int pmin, int pmax,
+float nlp(void *nlp_state, float Sn[], int n, int pmin, int pmax,
float *pitch, COMP Sw[], COMP W[], float *prev_Wo);
#endif
#include "phase.h"
#include "kiss_fft.h"
#include "comp.h"
-#include "glottal.c"
+#include "sine.h"
#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
-#define GLOTTAL_FFT_SIZE 512
-
/*---------------------------------------------------------------------------*\
aks_to_H()
/* Sample magnitude and phase at harmonics */
for(m=1; m<=model->L; m++) {
- am = floor((m - 0.5)*model->Wo/r + 0.5);
- bm = floor((m + 0.5)*model->Wo/r + 0.5);
- b = floor(m*model->Wo/r + 0.5);
-
- Em = 0.0;
- for(i=am; i<bm; i++)
- Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
- Am = sqrt(fabs(Em/(bm-am)));
-
- phi_ = -atan2(Pw[b].imag,Pw[b].real);
- H[m].real = Am*cos(phi_);
- H[m].imag = Am*sin(phi_);
+ am = (int)((m - 0.5)*model->Wo/r + 0.5);
+ bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+ b = (int)(m*model->Wo/r + 0.5);
+
+ Em = 0.0;
+ for(i=am; i<bm; i++)
+ Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
+ Am = sqrtf(fabsf(Em/(bm-am)));
+
+ phi_ = -atan2f(Pw[b].imag,Pw[b].real);
+ H[m].real = Am*cosf(phi_);
+ H[m].imag = Am*sinf(phi_);
}
}
COMP A_[MAX_AMP+1]; /* synthesised harmonic samples */
COMP H[MAX_AMP+1]; /* LPC freq domain samples */
float G;
- float jitter = 0.0;
- float r;
- int b;
G = 1.0;
aks_to_H(fft_fwd_cfg, model, aks, G, H, order);
ex_phase[0] += (model->Wo)*N;
ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
- r = TWO_PI/GLOTTAL_FFT_SIZE;
for(m=1; m<=model->L; m++) {
/* generate excitation */
- if (model->voiced) {
- //float rnd;
-
- b = floor(m*model->Wo/r + 0.5);
- if (b > ((GLOTTAL_FFT_SIZE/2)-1)) {
- b = (GLOTTAL_FFT_SIZE/2)-1;
- }
-
- /* I think adding a little jitter helps improve low pitch
- males like hts1a. This moves the onset of each harmonic
- over +/- 0.25 of a sample.
- */
- //jitter = 0.25*(1.0 - 2.0*rand()/RAND_MAX);
- jitter = 0;
-
- //rnd = (PI/8)*(1.0 - 2.0*rand()/RAND_MAX);
- Ex[m].real = cos(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/);
- Ex[m].imag = sin(ex_phase[0]*m/* - jitter*model->Wo*m + glottal[b]*/);
+ if (model->voiced) {
+
+ Ex[m].real = cosf(ex_phase[0]*m);
+ Ex[m].imag = sinf(ex_phase[0]*m);
}
else {
phase is not needed in the unvoiced case, but no harm in
keeping it.
*/
- float phi = TWO_PI*(float)rand()/RAND_MAX;
- Ex[m].real = cos(phi);
- Ex[m].imag = sin(phi);
+ float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
+ Ex[m].real = cosf(phi);
+ Ex[m].imag = sinf(phi);
}
/* filter using LPC filter */
/* modify sinusoidal phase */
- new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
+ new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
model->phi[m] = new_phi;
}
#include "defines.h"
#include "comp.h"
#include "dump.h"
+#include "sine.h"
#include "postfilter.h"
/*---------------------------------------------------------------------------*\
)
{
int m, uv;
- float e;
+ float e, thresh;
/* determine average energy across spectrum */
- e = 0.0;
+ e = 1E-12;
for(m=1; m<=model->L; m++)
e += model->A[m]*model->A[m];
assert(e > 0.0);
- e = 10.0*log10(e/model->L);
+ e = 10.0*log10f(e/model->L);
/* If beneath threhold, update bg estimate. The idea
of the threshold is to prevent updating during high level
*/
uv = 0;
+ thresh = powf(10.0, (*bg_est + BG_MARGIN)/20.0);
if (model->voiced)
for(m=1; m<=model->L; m++)
- if (20.0*log10(model->A[m]) < (*bg_est + BG_MARGIN)) {
- model->phi[m] = TWO_PI*(float)rand()/RAND_MAX;
+ if (model->A[m] < thresh) {
+ model->phi[m] = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
uv++;
}
#include "lpc.h"
#include "lsp.h"
#include "kiss_fft.h"
+#undef TIMER
+#include "machdep.h"
#define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */
return lsp_cbd[i].log2m;
}
+#ifdef __EXPERIMENTAL__
int lspdt_bits(int i) {
return lsp_cbdt[i].log2m;
}
+#endif
int lsp_pred_vq_bits(int i) {
return lsp_cbjvm[i].log2m;
}
-
+#ifdef __EXPERIMENTAL__
/*---------------------------------------------------------------------------*\
lspvq_quantise
#endif
}
+#endif
#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX_ENTRIES 16384
}
}
+#ifdef __EXPERIMENTAL__
+
#define MBEST_STAGES 4
struct MBEST_LIST {
mbest_destroy(mbest_stage3);
mbest_destroy(mbest_stage4);
}
+#endif
int check_lsp_order(float lsp[], int lpc_order)
{
}
}
-#ifdef NOT_USED
-/*---------------------------------------------------------------------------*\
-
- lpc_model_amplitudes
-
- Derive a LPC model for amplitude samples then estimate amplitude samples
- from this model with optional LSP quantisation.
-
- Returns the spectral distortion for this frame.
-
-\*---------------------------------------------------------------------------*/
-
-float lpc_model_amplitudes(
- float Sn[], /* Input frame of speech samples */
- float w[],
- MODEL *model, /* sinusoidal model parameters */
- int order, /* LPC model order */
- int lsp_quant, /* optional LSP quantisation if non-zero */
- float ak[] /* output aks */
-)
-{
- float Wn[M];
- float R[LPC_MAX+1];
- float E;
- int i,j;
- float snr;
- float lsp[LPC_MAX];
- float lsp_hz[LPC_MAX];
- float lsp_[LPC_MAX];
- int roots; /* number of LSP roots found */
- float wt[LPC_MAX];
-
- for(i=0; i<M; i++)
- Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,R,M,order);
- levinson_durbin(R,ak,order);
-
- E = 0.0;
- for(i=0; i<=order; i++)
- E += ak[i]*R[i];
-
- for(i=0; i<order; i++)
- wt[i] = 1.0;
-
- if (lsp_quant) {
- roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
- if (roots != order)
- printf("LSP roots not found\n");
-
- /* convert from radians to Hz to make quantisers more
- human readable */
-
- for(i=0; i<order; i++)
- lsp_hz[i] = (4000.0/PI)*lsp[i];
-
-#ifdef NOT_NOW
- /* simple uniform scalar quantisers */
-
- for(i=0; i<10; i++) {
- k = lsp_cb[i].k;
- m = lsp_cb[i].m;
- cb = lsp_cb[i].cb;
- index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
- lsp_hz[i] = cb[index*k];
- }
-#endif
-
- /* experiment: simulating uniform quantisation error
- for(i=0; i<order; i++)
- lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
- */
-
- for(i=0; i<order; i++)
- lsp[i] = (PI/4000.0)*lsp_hz[i];
-
- /* Bandwidth Expansion (BW). Prevents any two LSPs getting too
- close together after quantisation. We know from experiment
- that LSP quantisation errors < 12.5Hz (25Hz step size) are
- inaudible so we use that as the minimum LSP separation.
- */
-
- for(i=1; i<5; i++) {
- if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
- lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
- }
-
- /* as quantiser gaps increased, larger BW expansion was required
- to prevent twinkly noises */
-
- for(i=5; i<8; i++) {
- if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
- }
- for(i=8; i<order; i++) {
- if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
- }
-
- for(j=0; j<order; j++)
- lsp_[j] = lsp[j];
-
- lsp_to_lpc(lsp_, ak, order);
-#ifdef DUMP
- dump_lsp(lsp);
-#endif
- }
-
-#ifdef DUMP
- dump_E(E);
-#endif
- #ifdef SIM_QUANT
- /* simulated LPC energy quantisation */
- {
- float e = 10.0*log10(E);
- e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX);
- E = pow(10.0,e/10.0);
- }
- #endif
-
- aks_to_M2(ak,order,model,E,&snr, 1, 0, 1); /* {ak} -> {Am} LPC decode */
-
- return snr;
-}
-#endif
/*---------------------------------------------------------------------------*\
float e_before, e_after, gain;
float Pfw[FFT_ENC]; /* Post filter mag spectrum */
float max_Rw, min_Rw;
- float range, thresh, r, w;
- int m, bin;
+ float coeff;
+ TIMER_VAR(tstart, tfft1, taw, tfft2, tww, tr);
+
+ TIMER_SAMPLE(tstart);
/* Determine LPC inverse filter spectrum 1/A(exp(jw)) -----------*/
x[i].real = ak[i];
kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Aw);
+ TIMER_SAMPLE_AND_LOG(tfft1, tstart, " fft1");
+
for(i=0; i<FFT_ENC/2; i++) {
- Aw[i].real = 1.0/sqrt(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag);
+ Aw[i].real = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag);
}
+ TIMER_SAMPLE_AND_LOG(taw, tfft1, " Aw");
+
/* Determine weighting filter spectrum W(exp(jw)) ---------------*/
for(i=0; i<FFT_ENC; i++) {
x[i].imag = 0.0;
}
- for(i=0; i<=order; i++)
- x[i].real = ak[i] * pow(gamma, (float)i);
+ x[0].real = ak[0];
+ coeff = gamma;
+ for(i=1; i<=order; i++) {
+ x[i].real = ak[i] * coeff;
+ coeff *= gamma;
+ }
kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Ww);
+ TIMER_SAMPLE_AND_LOG(tfft2, taw, " fft2");
+
for(i=0; i<FFT_ENC/2; i++) {
- Ww[i].real = sqrt(Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag);
+ Ww[i].real = Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag;
}
+ TIMER_SAMPLE_AND_LOG(tww, tfft2, " Ww");
+
/* Determined combined filter R = WA ---------------------------*/
max_Rw = 0.0; min_Rw = 1E32;
for(i=0; i<FFT_ENC/2; i++) {
- Rw[i] = Ww[i].real * Aw[i].real;
+ Rw[i] = sqrtf(Ww[i].real * Aw[i].real);
if (Rw[i] > max_Rw)
max_Rw = Rw[i];
if (Rw[i] < min_Rw)
min_Rw = Rw[i];
}
+
+ TIMER_SAMPLE_AND_LOG(tr, tww, " R");
+
#ifdef DUMP
if (dump)
dump_Rw(Rw);
e_after = 1E-4;
for(i=0; i<FFT_ENC/2; i++) {
- Pfw[i] = pow(Rw[i], beta);
+ Pfw[i] = powf(Rw[i], beta);
Pw[i].real *= Pfw[i] * Pfw[i];
e_after += Pw[i].real;
}
Pw[i].real *= 1.4*1.4;
}
}
+
+ TIMER_SAMPLE_AND_LOG2(tr, " filt");
}
float Em; /* energy in band */
float Am; /* spectral amplitude sample */
float signal, noise;
+ TIMER_VAR(tstart, tfft, tpw, tpf);
+
+ TIMER_SAMPLE(tstart);
r = TWO_PI/(FFT_ENC);
for(i=0; i<=order; i++)
pw[i].real = ak[i];
kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)pw, (kiss_fft_cpx *)Pw);
+
+ TIMER_SAMPLE_AND_LOG(tfft, tstart, " fft");
/* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
for(i=0; i<FFT_ENC/2; i++)
Pw[i].real = E/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
+ TIMER_SAMPLE_AND_LOG(tpw, tfft, " Pw");
+
if (pf)
lpc_post_filter(fft_fwd_cfg, model, Pw, ak, order, dump, beta, gamma, bass_boost);
+ TIMER_SAMPLE_AND_LOG(tpf, tpw, " LPC post filter");
+
#ifdef DUMP
if (dump)
dump_Pw(Pw);
signal = 1E-30; noise = 1E-32;
for(m=1; m<=model->L; m++) {
- am = floor((m - 0.5)*model->Wo/r + 0.5);
- bm = floor((m + 0.5)*model->Wo/r + 0.5);
- Em = 0.0;
-
- for(i=am; i<bm; i++)
- Em += Pw[i].real;
- Am = sqrt(Em);
-
- signal += pow(model->A[m],2.0);
- noise += pow(model->A[m] - Am,2.0);
-
- /* This code significantly improves perf of LPC model, in
- particular when combined with phase0. The LPC spectrum tends
- to track just under the peaks of the spectral envelope, and
- just above nulls. This algorithm does the reverse to
- compensate - raising the amplitudes of spectral peaks, while
- attenuating the null. This enhances the formants, and
- supresses the energy between formants. */
-
- if (sim_pf) {
- if (Am > model->A[m])
- Am *= 0.7;
- if (Am < model->A[m])
- Am *= 1.4;
- }
-
- model->A[m] = Am;
+ am = (int)((m - 0.5)*model->Wo/r + 0.5);
+ bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+ Em = 0.0;
+
+ for(i=am; i<bm; i++)
+ Em += Pw[i].real;
+ Am = sqrtf(Em);
+
+ signal += model->A[m]*model->A[m];
+ noise += (model->A[m] - Am)*(model->A[m] - Am);
+
+ /* This code significantly improves perf of LPC model, in
+ particular when combined with phase0. The LPC spectrum tends
+ to track just under the peaks of the spectral envelope, and
+ just above nulls. This algorithm does the reverse to
+ compensate - raising the amplitudes of spectral peaks, while
+ attenuating the null. This enhances the formants, and
+ supresses the energy between formants. */
+
+ if (sim_pf) {
+ if (Am > model->A[m])
+ Am *= 0.7;
+ if (Am < model->A[m])
+ Am *= 1.4;
+ }
+
+ model->A[m] = Am;
}
*snr = 10.0*log10(signal/noise);
+
+ TIMER_SAMPLE_AND_LOG2(tpf, " rec");
}
/*---------------------------------------------------------------------------*\
}
+#ifdef __EXPERIMENTAL__
+
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_lsps_diff_freq_vq()
}
}
-
+#endif
/*---------------------------------------------------------------------------*\
float Wo; /* current "test" fundamental freq. */
float Wom; /* Wo that maximises E */
float Em; /* mamimum energy */
- float r; /* number of rads/bin */
+ float r, one_on_r; /* number of rads/bin */
float p; /* current pitch */
/* Initialisation */
Wom = model->Wo;
Em = 0.0;
r = TWO_PI/FFT_ENC;
-
+ one_on_r = 1.0/r;
+
/* Determine harmonic sum for a range of Wo values */
for(p=pmin; p<=pmax; p+=pstep) {
Wo = TWO_PI/p;
/* Sum harmonic magnitudes */
-
for(m=1; m<=model->L; m++) {
- b = floor(m*Wo/r + 0.5);
- E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
+ b = (int)(m*Wo*one_on_r + 0.5);
+ E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
}
-
/* Compare to see if this is a maximum */
if (E > Em) {
\*---------------------------------------------------------------------------*/
-void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[])
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase)
{
int i,m; /* loop variables */
int am,bm; /* bounds of current harmonic */
int b; /* DFT bin of centre of current harmonic */
float den; /* denominator of amplitude expression */
- float r; /* number of rads/bin */
+ float r, one_on_r; /* number of rads/bin */
int offset;
COMP Am;
r = TWO_PI/FFT_ENC;
+ one_on_r = 1.0/r;
for(m=1; m<=model->L; m++) {
den = 0.0;
- am = floor((m - 0.5)*model->Wo/r + 0.5);
- bm = floor((m + 0.5)*model->Wo/r + 0.5);
- b = floor(m*model->Wo/r + 0.5);
+ am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
+ bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
+ b = (int)(m*model->Wo/r + 0.5);
/* Estimate ampltude of harmonic */
den = 0.0;
Am.real = Am.imag = 0.0;
+ offset = FFT_ENC/2 - (int)(m*model->Wo*one_on_r + 0.5);
for(i=am; i<bm; i++) {
den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
- offset = i + FFT_ENC/2 - floor(m*model->Wo/r + 0.5);
- Am.real += Sw[i].real*W[offset].real;
- Am.imag += Sw[i].imag*W[offset].real;
+ Am.real += Sw[i].real*W[i + offset].real;
+ Am.imag += Sw[i].imag*W[i + offset].real;
}
- model->A[m] = sqrt(den);
+ model->A[m] = sqrtf(den);
- /* Estimate phase of harmonic */
+ if (est_phase) {
- model->phi[m] = atan2(Sw[b].imag,Sw[b].real);
+ /* Estimate phase of harmonic, this is expensive in CPU for
+ embedded devicesso we make it an option */
+
+ model->phi[m] = atan2(Sw[b].imag,Sw[b].real);
+ }
}
}
float Wo;
float sig, snr;
float elow, ehigh, eratio;
- float dF0, sixty;
+ float sixty;
sig = 1E-4;
for(l=1; l<=model->L/4; l++) {
/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+ offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
for(m=al; m<bl; m++) {
- offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
- Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
- Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
- den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+ Am.real += Sw[m].real*W[offset+m].real;
+ Am.imag += Sw[m].imag*W[offset+m].real;
+ den += W[offset+m].real*W[offset+m].real;
}
Am.real = Am.real/den;
/* Determine error between estimated harmonic and original */
+ offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
for(m=al; m<bl; m++) {
- offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
- Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
- Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+ Sw_[m].real = Am.real*W[offset+m].real;
+ Sw_[m].imag = Am.imag*W[offset+m].real;
Ew[m].real = Sw[m].real - Sw_[m].real;
Ew[m].imag = Sw[m].imag - Sw_[m].imag;
error += Ew[m].real*Ew[m].real;
ehigh += model->A[l]*model->A[l];
}
eratio = 10.0*log10(elow/ehigh);
- dF0 = 0.0;
/* Look for Type 1 errors, strongly V speech that has been
accidentally declared UV */
if (eratio < -10.0)
model->voiced = 0;
- /* If pitch is jumping about it's likely this is UV */
-
- /* 13 Feb 2012 - this seems to add some V errors so comment out for now. Maybe
- double check on bg noise files
-
- dF0 = (model->Wo - prev_Wo)*FS/TWO_PI;
- if (fabs(dF0) > 15.0)
- model->voiced = 0;
- */
-
/* A common source of Type 2 errors is the pitch estimator
gives a low (50Hz) estimate for UV speech, which gives a
good match with noise due to the close harmoonic spacing.
if (shift) {
/* Update memories */
-
for(i=0; i<N-1; i++) {
Sn_[i] = Sn_[i+N];
}
for(l=1; l<=model->L; l++) {
//for(l=model->L/2; l<=model->L; l++) {
//for(l=1; l<=model->L/4; l++) {
- b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+ b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
if (b > ((FFT_DEC/2)-1)) {
b = (FFT_DEC/2)-1;
}
- Sw_[b].real = model->A[l]*cos(model->phi[l]);
- Sw_[b].imag = model->A[l]*sin(model->phi[l]);
+ Sw_[b].real = model->A[l]*cosf(model->phi[l]);
+ Sw_[b].imag = model->A[l]*sinf(model->phi[l]);
Sw_[FFT_DEC-b].real = Sw_[b].real;
Sw_[FFT_DEC-b].imag = -Sw_[b].imag;
}
Sn_[i] += sw_[j].real*Pn[i];
}
+
+static unsigned long next = 1;
+
+int codec2_rand(void) {
+ next = next * 1103515245 + 12345;
+ return((unsigned)(next/65536) % 32768);
+}
+
float hpf(float x, float states[]);
void dft_speech(kiss_fft_cfg fft_fwd_cfg, COMP Sw[], float Sn[], float w[]);
void two_stage_pitch_refinement(MODEL *model, COMP Sw[]);
-void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[]);
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[], int est_phase);
float est_voicing_mbe(MODEL *model, COMP Sw[], COMP W[], COMP Sw_[],COMP Ew[],
float prev_Wo);
void make_synthesis_window(float Pn[]);
void synthesise(kiss_fft_cfg fft_inv_cfg, float Sn_[], MODEL *model, float Pn[], int shift);
+#define CODEC2_RAND_MAX 32767
+int codec2_rand(void);
+
#endif