--- /dev/null
+/*---------------------------------------------------------------------------*\
+
+ FILE........: c2enc.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 23/8/2010
+
+ Encodes a file of raw speech samples using codec2 and ouputs a file
+ of bits (each bit is stored in the LSB or each output byte). Demo
+ program for codec2.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2010 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License version 2, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "codec2.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+ void *codec2;
+ FILE *fin;
+ FILE *fout;
+ short buf[CODEC2_SAMPLES_PER_FRAME];
+ char bits[CODEC2_BITS_PER_FRAME];
+
+ if (argc != 3) {
+ printf("usage: %s InputRawspeechFile OutputBitFile\n", argv[0]);
+ exit(0);
+ }
+
+ fin = fopen(argv[1],"rb");
+ if (fin == NULL) {
+ printf("Error opening input speech file: %s\n", argv[1]);
+ exit(0);
+ }
+
+ fout = fopen(argv[2],"wb");
+ if (fout == NULL) {
+ printf("Error opening output bit file: %s\n", argv[2]);
+ exit(0);
+ }
+
+ codec2 = codec2_create();
+
+ while(fread(buf, sizeof(short), CODEC2_SAMPLES_PER_FRAME, fin) ==
+ CODEC2_SAMPLES_PER_FRAME) {
+ codec2_encode(codec2, bits, buf);
+ fwrite(bits, sizeof(char), CODEC2_BITS_PER_FRAME, fout);
+ }
+
+ codec2_destroy(codec2);
+
+ fclose(fin);
+ fclose(fout);
+
+ return 0;
+}
DATE CREATED: 21/8/2010
Codec2 fully quantised encoder and decoder functions. If you want use
- codec2, these are the functions you need to call.
+ codec2, the codec2_xxx functions are for you.
\*---------------------------------------------------------------------------*/
#include "lpc.h"
#include "quantise.h"
#include "phase.h"
-#include "postfilter.h"
#include "interp.h"
+#include "postfilter.h"
+#include "codec2.h"
typedef struct {
float Sn[M]; /* input speech */
float prev_Wo; /* previous frame's pitch estimate */
float ex_phase; /* excitation model phase track */
float bg_est; /* background noise estimate for post filter */
- MODEL *prev_model; /* model parameters from 20ms ago */
+ MODEL prev_model; /* model parameters from 20ms ago */
} CODEC2;
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION HEADERS
+
+\*---------------------------------------------------------------------------*/
+
+void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[]);
+void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model,float ak[]);
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTIONS
+
+\*---------------------------------------------------------------------------*/
+
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_create
for(l=1; l<=MAX_AMP; l++)
c2->prev_model.A[l] = 0.0;
- c2->prev_model.Wo = = TWO_PI/P_MAX;
+ c2->prev_model.Wo = TWO_PI/P_MAX;
return (void*)c2;
}
Harmonic magnitudes (LSPs) 36
Low frequency LPC correction 1
Energy 5
- Pitch (fundamental frequnecy) 7
+ Wo (fundamental frequnecy) 7
Voicing (10ms update) 2
TOTAL 51
void codec2_encode(void *codec2_state, char bits[], short speech[])
{
CODEC2 *c2;
- COMP Sw[FFT_ENC];
- COMP Sw_[FFT_ENC];
MODEL model;
- float pitch;
int voiced1, voiced2;
- int i, nbits;
+ int lsp_indexes[LPC_ORD];
+ int lpc_correction;
+ int energy_index;
+ int Wo_index;
+ int i, nbit = 0;
assert(codec2_state != NULL);
c2 = (CODEC2*)codec2_state;
- /* First Frame - just send voicing ----------------------------------*/
-
- /* Read input speech */
-
- for(i=0; i<M-N; i++)
- c2->Sn[i] = c2->Sn[i+N];
- for(i=0; i<N; i++)
- c2->Sn[i+M-N] = speech[i];
- dft_speech(Sw, c2->Sn, c2->w);
+ /* first 10ms analysis frame - we just want voicing */
- /* Estimate pitch */
-
- nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
- c2->prev_Wo = TWO_PI/pitch;
- model.Wo = TWO_PI/pitch;
- model.L = PI/model.Wo;
-
- /* estimate model parameters */
-
- dft_speech(Sw, c2->Sn, c2->w);
- two_stage_pitch_refinement(&model, Sw);
- estimate_amplitudes(&model, Sw, c2->W);
- est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_);
+ analyse_one_frame(c2, &model, speech);
voiced1 = model.voiced;
- /* Second Frame - send all parameters --------------------------------*/
-
- /* Read input speech */
-
- for(i=0; i<M-N; i++)
- c2->Sn[i] = c2->Sn[i+N];
- for(i=0; i<N; i++)
- c2->Sn[i+M-N] = speech[i+N];
- dft_speech(Sw, c2->Sn, c2->w);
-
- /* Estimate pitch */
-
- nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
- c2->prev_Wo = TWO_PI/pitch;
- model.Wo = TWO_PI/pitch;
- model.L = PI/model.Wo;
+ /* second 10ms analysis frame */
- /* estimate model parameters */
-
- dft_speech(Sw, c2->Sn, c2->w);
- two_stage_pitch_refinement(&model, Sw);
- estimate_amplitudes(&model, Sw, c2->W);
- est_voicing_mbe(&model, Sw, c2->W, (FS/TWO_PI)*model.Wo, Sw_);
+ analyse_one_frame(c2, &model, &speech[N]);
voiced2 = model.voiced;
-
- /* quantise */
- nbits = 0;
- encode_Wo(bits, &nbits, model.Wo);
- encode_voicing(bits, &nbits, voiced1, voiced2);
- encode_amplitudes(bits, &nbits, c2->Sn, c2->w);
- assert(nbits == CODEC2_BITS_PER_FRAME);
+ Wo_index = encode_Wo(model.Wo);
+ encode_amplitudes(lsp_indexes,
+ &lpc_correction,
+ &energy_index,
+ &model,
+ c2->Sn,
+ c2->w);
+
+ pack(bits, &nbit, Wo_index, WO_BITS);
+ for(i=0; i<LPC_ORD; i++)
+ pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+ pack(bits, &nbit, lpc_correction, 1);
+ pack(bits, &nbit, energy_index, E_BITS);
+ pack(bits, &nbit, voiced1, 1);
+ pack(bits, &nbit, voiced2, 1);
+
+ assert(nbit == CODEC2_BITS_PER_FRAME);
}
/*---------------------------------------------------------------------------*\
{
CODEC2 *c2;
MODEL model;
- float ak[LPC_ORD+1];
int voiced1, voiced2;
- int i, nbits;
+ int lsp_indexes[LPC_ORD];
+ int lpc_correction;
+ int energy_index;
+ int Wo_index;
+ float ak[LPC_ORD+1];
+ int i, nbit = 0;
MODEL model_interp;
assert(codec2_state != NULL);
c2 = (CODEC2*)codec2_state;
- nbits = 0;
- model.Wo = decode_Wo(bits, &nbits);
+ Wo_index = unpack(bits, &nbit, WO_BITS);
+ for(i=0; i<LPC_ORD; i++)
+ lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+ lpc_correction = unpack(bits, &nbit, 1);
+ energy_index = unpack(bits, &nbit, E_BITS);
+ voiced1 = unpack(bits, &nbit, 1);
+ voiced2 = unpack(bits, &nbit, 1);
+ assert(nbit == CODEC2_BITS_PER_FRAME);
+
+ decode_amplitudes(&model,
+ ak,
+ lsp_indexes,
+ lpc_correction,
+ energy_index);
+
+ model.Wo = decode_Wo(Wo_index);
model.L = PI/model.Wo;
- decode_voicing(&voiced1, &voiced2, bits, &nbits);
- decode_amplitudes(&model, ak, bits, &nbits);
- assert(nbits == CODEC2_BITS_PER_FRAME);
+ model.voiced = voiced2;
+ model_interp.voiced = voiced1;
+ interpolate(&model_interp, &c2->prev_model, &model);
- /* First synthesis frame - interpolated from adjacent frames */
+ synthesise_one_frame(c2, speech, &model_interp, ak);
+ synthesise_one_frame(c2, &speech[N], &model, ak);
- model_interp.voiced = voiced1;
- interp(&model_interp, &c2->prev_model, &model);
- phase_synth_zero_order(&model_interp, ak, voiced1, &c2->ex_phase);
- postfilter(&model_interp, voiced1, &c2->bg_est);
- synthesise(c2->Sn_, &model_interp, c2->Pn, 1);
+ memcpy(&c2->prev_model, &model, sizeof(MODEL));
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: synthesise_one_frame()
+ AUTHOR......: David Rowe
+ DATE CREATED: 23/8/2010
+
+ Synthesise 80 speech samples (10ms) from model parameters.
+
+\*---------------------------------------------------------------------------*/
+
+void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model, float ak[])
+{
+ int i;
+
+ phase_synth_zero_order(model, ak, &c2->ex_phase);
+ postfilter(model, &c2->bg_est);
+ synthesise(c2->Sn_, model, c2->Pn, 1);
for(i=0; i<N; i++) {
- if (Sn_[i] > 32767.0)
+ if (c2->Sn_[i] > 32767.0)
speech[i] = 32767;
- else if (Sn_[i] < -32767.0)
+ else if (c2->Sn_[i] < -32767.0)
speech[i] = -32767;
else
- speech[i] = Sn_[i];
+ speech[i] = c2->Sn_[i];
}
- /* Second synthesis frame */
+}
- model.voiced = voiced2;
- phase_synth_zero_order(&model, ak, voiced2, &c2->ex_phase);
- postfilter(&model, voiced2, &c2->bg_est);
- synthesise(c2->Sn_, &model, c2->Pn, 1);
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: analyse_one_frame()
+ AUTHOR......: David Rowe
+ DATE CREATED: 23/8/2010
- for(i=0; i<N; i++) {
- if (Sn_[i] > 32767.0)
- speech[i+N] = 32767;
- else if (Sn_[i] < -32767.0)
- speech[i+N] = -32767;
- else
- speech[i+N] = Sn_[i];
- }
+ Extract sinusoidal model parameters from 80 speech samples (10ms of
+ speech).
+
+\*---------------------------------------------------------------------------*/
- memcpy(&c2->prev_model, model, sizeof(MODEL);
-}
+void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[])
+{
+ COMP Sw[FFT_ENC];
+ COMP Sw_[FFT_ENC];
+ float pitch;
+ int i;
+
+ /* Read input speech */
+
+ for(i=0; i<M-N; i++)
+ c2->Sn[i] = c2->Sn[i+N];
+ for(i=0; i<N; i++)
+ c2->Sn[i+M-N] = speech[i];
+ dft_speech(Sw, c2->Sn, c2->w);
+
+ /* Estimate pitch */
+
+ nlp(c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ c2->prev_Wo = TWO_PI/pitch;
+ model->Wo = TWO_PI/pitch;
+ model->L = PI/model->Wo;
+
+ /* estimate model parameters */
+ dft_speech(Sw, c2->Sn, c2->w);
+ two_stage_pitch_refinement(model, Sw);
+ estimate_amplitudes(model, Sw, c2->W);
+ est_voicing_mbe(model, Sw, c2->W, (FS/TWO_PI)*model->Wo, Sw_);
+}
int test_Wo_quant();
int test_lsp_quant();
int test_lsp(int lsp_number, int levels, float max_error_hz);
+int test_energy_quant(int levels, float max_error_dB);
int main() {
quantise_init();
test_Wo_quant();
test_lsp_quant();
+ test_energy_quant(E_LEVELS, 0.5*(E_MAX_DB - E_MIN_DB)/E_LEVELS);
return 0;
}
return 0;
}
+int test_energy_quant(int levels, float max_error_dB) {
+ FILE *fe;
+ float e,e_dec, error, low_e, high_e;
+ int index, index_in, index_out, i;
+
+ /* check 1:1 match between input and output levels */
+
+ for(i=0; i<levels; i++) {
+ index_in = i;
+ e = decode_energy(index_in);
+ index_out = encode_energy(e);
+ if (index_in != index_out) {
+ printf("edB: %f index_in: %d index_out: %d\n",
+ 10.0*log10(e), index_in, index_out);
+ exit(0);
+ }
+ }
+
+ /* check error over range of quantiser */
+
+ low_e = decode_energy(0);
+ high_e = decode_energy(levels-1);
+ fe = fopen("energy_err.txt", "wt");
+
+ for(e=low_e; e<high_e; e +=(high_e-low_e)/1000.0) {
+ index = encode_energy(e);
+ e_dec = decode_energy(index);
+ error = 10.0*log10(e) - 10.0*log10(e_dec);
+ fprintf(fe, "%f\n", error);
+ if (fabs(error) > max_error_dB) {
+ printf("error: %f %f\n", error, max_error_dB);
+ exit(0);
+ }
+ }
+
+ fclose(fe);
+ return 0;
+}
+
int test_lsp(int lsp_number, int levels, float max_error_hz) {
float lsp[LPC_ORD];
int indexes_in[LPC_ORD];
fclose(flsp);
+ printf("OK\n");
+
return 0;
}
float Wo,Wo_dec, error, step_size;
int index, index_in, index_out;
- /* output pitch quant curve for plotting */
+ /* output Wo quant curve for plotting */
f = fopen("quant_pitch.txt","wt");
fclose(f);
- /* check for all pitch codes we get 1:1 match between encoder
+ /* check for all Wo codes we get 1:1 match between encoder
and decoder Wo levels */
for(c=0; c<WO_LEVELS; c++) {